Commit | Line | Data |
---|---|---|
dea4d055 MW |
1 | ;;; -*-lisp-*- |
2 | ;;; | |
3 | ;;; Protocol for parsing. | |
4 | ;;; | |
5 | ;;; (c) 2009 Straylight/Edgeware | |
6 | ;;; | |
7 | ||
8 | ;;;----- Licensing notice --------------------------------------------------- | |
9 | ;;; | |
10 | ;;; This file is part of the Sensble Object Design, an object system for C. | |
11 | ;;; | |
12 | ;;; SOD is free software; you can redistribute it and/or modify | |
13 | ;;; it under the terms of the GNU General Public License as published by | |
14 | ;;; the Free Software Foundation; either version 2 of the License, or | |
15 | ;;; (at your option) any later version. | |
16 | ;;; | |
17 | ;;; SOD is distributed in the hope that it will be useful, | |
18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;;; GNU General Public License for more details. | |
21 | ;;; | |
22 | ;;; You should have received a copy of the GNU General Public License | |
23 | ;;; along with SOD; if not, write to the Free Software Foundation, | |
24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
25 | ||
26 | ;;;-------------------------------------------------------------------------- | |
27 | ;;; Parser protocol discussion. | |
28 | ;;; | |
29 | ;;; Other languages, notably Haskell and ML, have `parser combinator | |
30 | ;;; libraries', which allow one to construct recursive descent parsers using | |
31 | ;;; approximately pleasant syntax. While attempts have been made to | |
32 | ;;; introduce the benefits of these libraries to Lisp, they've not been | |
33 | ;;; altogether successful; this seems due to Lisp's lack of features such as | |
34 | ;;; pattern matching, currying and lazy evaluation. Rather than fight with | |
35 | ;;; Lisp's weaknesses, this library plays to its strength, making heavy use | |
36 | ;;; of macros. Effectively, the `combinators' we build here are /compile- | |
37 | ;;; time/ combinators, not run-time ones. | |
38 | ;;; | |
39 | ;;; A `parser' is simply an expression which returns three values. | |
40 | ;;; | |
41 | ;;; * If the second value is nil, then the parser is said to have /failed/, | |
42 | ;;; and the first value is a list describing the things that the parser | |
43 | ;;; expected to find but didn't. (The precise details of the list items | |
44 | ;;; are important to error-reporting functions, but not to the low-level | |
45 | ;;; machinery, and are left up to higher-level protocols to nail down | |
46 | ;;; harder.) | |
47 | ;;; | |
48 | ;;; * If the second value is not nil, then the parser is said to have | |
49 | ;;; /succeeded/, and the first value is its /result/. | |
50 | ;;; | |
51 | ;;; * The third value indicates whether the parser consumed any of its | |
52 | ;;; input. Parsers don't backtrack implicitly (to avoid space leaks and | |
53 | ;;; bad performance), so the `consumedp' return value is used to decide | |
54 | ;;; whether the parser has `committed' to a particular branch. If the | |
55 | ;;; parser context supports place-capture (many do) then `peek' can be | |
56 | ;;; used to suppress consumption of input in the case of parser failure. | |
57 | ;;; | |
58 | ;;; The functions and macros here are simply ways of gluing together | |
59 | ;;; expressions which obey this protocol. | |
60 | ;;; | |
bf090e02 | 61 | ;;; The main contribution of this file is a macro `with-parser-context' which |
dea4d055 | 62 | ;;; embeds a parsing-specific S-expressions language entered using the new |
bf090e02 MW |
63 | ;;; macro `parse'. The behaviour of this macro is controlled by a pair of |
64 | ;;; compile-time generic functions `expand-parser-spec' and | |
65 | ;;; `expand-parser-form'. As well as the parser expression they're meant to | |
66 | ;;; process, these functions dispatch on a `context' argument, which is | |
67 | ;;; intended to help `leaf' parsers find the terminal symbols which they're | |
68 | ;;; meant to process. | |
dea4d055 | 69 | ;;; |
bf090e02 MW |
70 | ;;; Note that the context is a compile-time object, constructed by the |
71 | ;;; `parse' macro expansion function, though the idea is that it will contain | |
72 | ;;; the name or names of variables holding the run-time parser state (which | |
73 | ;;; will typically be a lexical analyser or an input stream or suchlike). | |
dea4d055 MW |
74 | |
75 | (cl:in-package #:sod-parser) | |
76 | ||
77 | ;;;-------------------------------------------------------------------------- | |
78 | ;;; Utilities. | |
79 | ||
348f3bfe | 80 | (export 'combine-parser-failures) |
dea4d055 MW |
81 | (defun combine-parser-failures (failures) |
82 | "Combine the failure indicators listed in FAILURES. | |
83 | ||
84 | (Note that this means that FAILURES is a list of lists.)" | |
85 | ||
86 | (reduce (lambda (f ff) (union f ff :test #'equal)) | |
87 | failures | |
88 | :initial-value nil)) | |
89 | ||
90 | ;;;-------------------------------------------------------------------------- | |
91 | ;;; Basic protocol. | |
92 | ||
93 | (eval-when (:compile-toplevel :load-toplevel :execute) | |
94 | ||
95 | (export 'expand-parser-spec) | |
96 | (defgeneric expand-parser-spec (context spec) | |
97 | (:documentation | |
98 | "Expand a parser specifier SPEC in a particular parser CONTEXT.") | |
99 | (:method (context (spec list)) | |
100 | (expand-parser-form context (car spec) (cdr spec)))) | |
101 | ||
102 | (export 'expand-parser-form) | |
103 | (defgeneric expand-parser-form (context head tail) | |
104 | (:documentation | |
105 | "Expand a parser list-form given by HEAD and TAIL, in CONTEXT.") | |
106 | (:method (context head tail) | |
1d8cc67a | 107 | (declare (ignore context)) |
dea4d055 MW |
108 | (cons head tail))) |
109 | ||
110 | (export 'wrap-parser) | |
111 | (defgeneric wrap-parser (context form) | |
112 | (:documentation | |
113 | "Enclose FORM in whatever is necessary to make the parser work.") | |
1d8cc67a MW |
114 | (:method (context form) |
115 | (declare (ignore context)) | |
116 | form))) | |
dea4d055 MW |
117 | |
118 | (export 'defparse) | |
119 | (defmacro defparse (name bvl &body body) | |
120 | "Define a new parser form. | |
121 | ||
122 | The full syntax is hairier than it looks: | |
123 | ||
124 | defparse NAME ( [[ :context (CTX SPEC) ]] . BVL ) | |
125 | { FORM }* | |
126 | ||
127 | The macro defines a new parser form (NAME ...) which is expanded by the | |
128 | body FORMs. The BVL is a destructuring lambda-list to be applied to the | |
129 | tail of the form. The body forms are enclosed in a block called NAME. | |
130 | ||
dea4d055 MW |
131 | If the :context key is provided, then the parser form is specialized on a |
132 | particular class of parser contexts SPEC; specialized expanders take | |
133 | priority over less specialized or unspecialized expanders -- so you can | |
134 | use this to override the built-in forms safely if they don't seem to be | |
135 | doing the right thing for you. Also, the context -- which is probably | |
136 | interesting to you if you've bothered to specialize -- is bound to the | |
137 | variable CTX." | |
138 | ||
139 | ;; BUG! misplaces declarations: if you declare the CONTEXT argument | |
140 | ;; `special' it won't be bound properly. I'm really not at all sure I know | |
141 | ;; how to fix this. | |
142 | ||
143 | (with-gensyms (head tail context) | |
144 | (let ((ctxclass t)) | |
145 | (loop | |
146 | (unless (and bvl (keywordp (car bvl))) (return)) | |
147 | (ecase (pop bvl) | |
148 | (:context (destructuring-bind (name spec) (pop bvl) | |
149 | (setf ctxclass spec context name))))) | |
150 | (multiple-value-bind (doc decls body) (parse-body body) | |
151 | `(defmethod expand-parser-form | |
152 | ((,context ,ctxclass) (,head (eql ',name)) ,tail) | |
153 | ,@doc | |
1d8cc67a | 154 | (declare (ignorable ,context)) |
dea4d055 MW |
155 | (block ,name |
156 | (destructuring-bind ,bvl ,tail | |
157 | ,@decls | |
158 | ,@body))))))) | |
159 | ||
160 | (export '(with-parser-context parse)) | |
161 | (defmacro with-parser-context ((class &rest initargs) &body body) | |
162 | "Evaluate BODY with a macro `parse' which expands parser forms. | |
163 | ||
164 | Evaluate BODY as an implicit progn. At compile time, a parser context is | |
165 | constructed by (apply #'make-instance CLASS INITARGS). The BODY can make | |
166 | use of the macro `parse': | |
167 | ||
168 | parse SPEC | |
169 | ||
170 | which parses the input in the manner described by SPEC, in the context of | |
171 | the parser context." | |
172 | ||
173 | (let ((context (apply #'make-instance class initargs))) | |
174 | (wrap-parser context | |
175 | `(macrolet ((parse (form) | |
176 | (expand-parser-spec ',context form))) | |
177 | ,@body)))) | |
178 | ||
179 | ;;;-------------------------------------------------------------------------- | |
180 | ;;; Common parser context protocol. | |
181 | ||
182 | (export 'parser-at-eof-p) | |
183 | (defgeneric parser-at-eof-p (context) | |
184 | (:documentation | |
185 | "Return whether the parser has reached the end of its input. | |
186 | ||
187 | Be careful: all of this is happening at macro expansion time.")) | |
188 | ||
189 | (export 'parser-step) | |
190 | (defgeneric parser-step (context) | |
191 | (:documentation | |
192 | "Advance the parser to the next character. | |
193 | ||
194 | Be careful: all of this is happening at macro-expansion time.")) | |
195 | ||
196 | (defmethod expand-parser-spec (context (spec (eql :eof))) | |
197 | "Tests succeeds if the parser has reached the end of its input. | |
198 | ||
199 | The failure indicator is the keyword `:eof'." | |
200 | ||
201 | `(if ,(parser-at-eof-p context) | |
202 | (values :eof t nil) | |
203 | (values '(:eof) nil nil))) | |
204 | ||
205 | ;;;-------------------------------------------------------------------------- | |
206 | ;;; Useful macros for dealing with parsers. | |
207 | ||
208 | (export 'it) | |
209 | (export 'if-parse) | |
210 | (defmacro if-parse ((&key (result 'it) expected (consumedp (gensym "CP"))) | |
211 | parser consequent &optional (alternative nil altp)) | |
212 | "Conditional parsing construction. | |
213 | ||
214 | If PARSER succeeds, then evaluate CONSEQUENT with RESULT bound to the | |
215 | result; otherwise evaluate ALTERNATIVE with EXPECTED bound to the | |
216 | expected-item list. If ALTERNATIVE is omitted, then propagate the failure | |
217 | following the parser protocol." | |
218 | ||
219 | (with-gensyms (value win) | |
220 | `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser) | |
221 | (declare (ignorable ,consumedp)) | |
222 | (if ,win | |
223 | (let ((,result ,value)) | |
224 | (declare (ignorable ,result)) | |
225 | ,consequent) | |
226 | ,(cond ((not altp) | |
227 | `(values ,value nil ,consumedp)) | |
228 | (expected | |
229 | `(let ((,expected ,value)) ,alternative)) | |
230 | (t | |
231 | alternative)))))) | |
232 | ||
233 | (export 'when-parse) | |
234 | (defmacro when-parse ((&optional (result 'it)) parser &body body) | |
235 | "Convenience macro for conditional parsing. | |
236 | ||
237 | If PARSER succeeds then evaluate BODY with RESULT bound to the result; | |
238 | otherwise propagate the failure." | |
239 | `(if-parse (:result ,result) ,parser (progn ,@body))) | |
240 | ||
241 | (export 'cond-parse) | |
242 | (defmacro cond-parse ((&key (result 'it) expected | |
243 | (consumedp (gensym "CP"))) | |
244 | &body clauses) | |
245 | "Frightening conditional parsing construct. | |
246 | ||
247 | Each of the CLAUSES has the form (PARSER &rest FORMS); the special `fake' | |
248 | parser form `t' may be used to describe a default action. If the PARSER | |
249 | succeeds then evaluate FORMS in order with RESULT bound to the parser | |
250 | result (if there are no forms, then propagate the success); if the PARSER | |
251 | fails without consuming input, then move onto the next clause. | |
252 | ||
253 | If the default clause (if any) is reached, or a parser fails after | |
254 | consuming input, then EXPECTED is bound to a list of failure indicators | |
255 | and the default clause's FORMS are evaluated and with CONSUMEDP bound to a | |
256 | generalized boolean indicating whether any input was consumed. If there | |
257 | is no default clause, and either some parser fails after consuming input, | |
258 | or all of the parsers fail without consuming, then a failure is reported | |
259 | and the input-consumption indicator is propagated. | |
260 | ||
261 | If a parser fails after consuming input, then the failure indicators are | |
262 | whatever that parser reported; if all the parsers fail without consuming | |
263 | then the failure indicators are the union of the indicators reported by | |
264 | the various parsers." | |
265 | ||
266 | (with-gensyms (block fail failarg) | |
267 | (labels ((walk (clauses failures) | |
268 | (cond ((null clauses) | |
269 | (values `(,fail nil (list ,@(reverse failures))) | |
270 | `(values (combine-parser-failures ,failarg) | |
271 | nil | |
272 | ,consumedp))) | |
273 | ((eq (caar clauses) t) | |
274 | (values `(,fail nil (list ,@(reverse failures))) | |
275 | `(,@(if expected | |
276 | `(let ((,expected | |
277 | (combine-parser-failures | |
278 | ,failarg)))) | |
279 | `(progn)) | |
280 | ,@(cdar clauses)))) | |
281 | (t | |
282 | (with-gensyms (value win cp) | |
283 | (multiple-value-bind (inner failbody) | |
284 | (walk (cdr clauses) (cons value failures)) | |
285 | (values `(multiple-value-bind (,value ,win ,cp) | |
286 | (parse ,(caar clauses)) | |
287 | (when ,win | |
288 | (return-from ,block | |
289 | (let ((,result ,value) | |
290 | (,consumedp ,cp)) | |
291 | (declare (ignorable ,result | |
292 | ,consumedp)) | |
293 | ,@(cdar clauses)))) | |
294 | (when ,cp | |
295 | (,fail t (list ,value))) | |
296 | ,inner) | |
297 | failbody))))))) | |
298 | (multiple-value-bind (inner failbody) (walk clauses nil) | |
299 | `(block ,block | |
300 | (flet ((,fail (,consumedp ,failarg) | |
301 | (declare (ignorable ,consumedp ,failarg)) | |
302 | ,failbody)) | |
303 | ,inner)))))) | |
304 | ||
305 | (export 'parser) | |
306 | (defmacro parser (bvl &body parser) | |
307 | "Functional abstraction for parsers." | |
308 | (multiple-value-bind (doc decls body) (parse-body parser) | |
309 | `(lambda ,bvl ,@doc ,@decls (parse ,@body)))) | |
310 | ||
311 | ;;;-------------------------------------------------------------------------- | |
312 | ;;; Standard parser forms. | |
313 | ||
314 | (export 'label) | |
315 | (defparse label (label parser) | |
316 | "If PARSER fails, use LABEL as the expected outcome. | |
317 | ||
318 | The LABEL is only evaluated if necessary." | |
319 | (with-gensyms (value win consumedp) | |
320 | `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser) | |
321 | (if ,win | |
322 | (values ,value ,win ,consumedp) | |
323 | (values (list ,label) nil ,consumedp))))) | |
324 | ||
325 | (defparse t (value) | |
326 | "Succeed, without consuming input, with result VALUE." | |
327 | `(values ,value t nil)) | |
328 | ||
bf090e02 MW |
329 | (defparse nil (indicator) |
330 | "Fail, without consuming input, with indicator VALUE." | |
331 | `(values (list ,indicator) nil nil)) | |
332 | ||
dea4d055 MW |
333 | (defparse when (cond &body parser) |
334 | "If CONDITION is true, then match PARSER; otherwise fail." | |
335 | `(if ,cond (parse ,@parser) (values nil nil nil))) | |
336 | ||
337 | (defmethod expand-parser-spec (context (spec (eql t))) | |
338 | "Always matches without consuming input." | |
1d8cc67a | 339 | (declare (ignore context)) |
dea4d055 MW |
340 | '(values t t nil)) |
341 | ||
bf090e02 MW |
342 | (defmethod expand-parser-spec (context (spec (eql nil))) |
343 | "Always fails without consuming input. The failure indicator is `:fail'." | |
1d8cc67a | 344 | (declare (ignore context)) |
bf090e02 MW |
345 | '(values '(:fail) nil nil)) |
346 | ||
dea4d055 MW |
347 | (export 'seq) |
348 | (defparse seq (binds &body body) | |
349 | "Parse a sequence of heterogeneous items. | |
350 | ||
351 | Syntax: | |
352 | ||
353 | seq ( { ATOMIC-PARSER-FORM | ([VAR] PARSER-FORM) }* ) | |
354 | { FORM }* | |
355 | ||
356 | The behaviour is similar to `let*'. The PARSER-FORMs are processed in | |
357 | order, left to right. If a parser succeeds, then its value is bound to | |
358 | the corresponding VAR, and available within Lisp forms enclosed within | |
359 | subsequent PARSER-FORMs and/or the body FORMs. If any parser fails, then | |
360 | the entire sequence fails. If all of the parsers succeeds, then the FORMs | |
361 | are evaluated as an implicit progn, and the sequence will succeed with the | |
362 | result computed by the final FORM." | |
363 | ||
364 | (with-gensyms (block consumedp) | |
365 | (labels ((walk (binds lets ignores) | |
366 | (if (endp binds) | |
367 | `(let* ((,consumedp nil) | |
368 | ,@(nreverse lets)) | |
369 | ,@(and ignores | |
370 | `((declare (ignore ,@(nreverse ignores))))) | |
371 | (values (progn ,@body) t ,consumedp)) | |
372 | (destructuring-bind (x &optional (y nil yp)) | |
373 | (if (listp (car binds)) | |
374 | (car binds) | |
375 | (list (car binds))) | |
376 | (with-gensyms (var value win cp) | |
377 | (multiple-value-bind (var parser ignores) | |
378 | (if (and yp x) | |
379 | (values x y ignores) | |
380 | (values var (if yp y x) (cons var ignores))) | |
381 | (walk (cdr binds) | |
382 | (cons `(,var (multiple-value-bind | |
383 | (,value ,win ,cp) | |
384 | (parse ,parser) | |
385 | (when ,cp (setf ,consumedp t)) | |
386 | (unless ,win | |
387 | (return-from ,block | |
388 | (values ,value ,nil | |
389 | ,consumedp))) | |
390 | ,value)) | |
391 | lets) | |
392 | ignores))))))) | |
393 | `(block ,block ,(walk binds nil nil))))) | |
394 | ||
395 | (export 'and) | |
396 | (defparse and (:context (context t) &rest parsers) | |
397 | "Parse a sequence of heterogeneous items, but ignore their values. | |
398 | ||
399 | This is just like (and is implemented using) `seq' with all the bindings | |
400 | set to `nil'. The result is `nil'." | |
401 | (with-gensyms (last) | |
402 | (if (null parsers) | |
403 | '(seq () nil) | |
404 | (expand-parser-spec context | |
405 | `(seq (,@(mapcar (lambda (parser) | |
406 | `(nil ,parser)) | |
407 | (butlast parsers)) | |
408 | (,last ,(car (last parsers)))) | |
409 | ,last))))) | |
410 | ||
411 | (export 'lisp) | |
412 | (defparse lisp (&rest forms) | |
413 | "Evaluate FORMs, which should obey the parser protocol." | |
414 | `(progn ,@forms)) | |
415 | ||
416 | (export 'many) | |
417 | (defparse many ((acc init update | |
418 | &key (new 'it) (final acc) (min nil minp) max (commitp t)) | |
419 | parser &optional (sep nil sepp)) | |
420 | "Parse a sequence of homogeneous items. | |
421 | ||
422 | The behaviour is similar to `do'. Initially an accumulator ACC is | |
423 | established, and bound to the value of INIT. The PARSER is then evaluated | |
424 | repeatedly. Each time it succeeds, UPDATE is evaluated with NEW (defaults | |
425 | to `it') bound to the result of the parse, and the value returned by | |
426 | UPDATE is stored back into ACC. If the PARSER fails, then the parse | |
427 | ends. The scope of ACC includes the UPDATE and FINAL forms, and the | |
428 | PARSER and SEP parsers; it is updated by side effects, not rebound. | |
429 | ||
430 | If a SEP parser is provided, then the behaviour changes as follows. | |
431 | Before each attempt to parse a new item using PARSER, the parser SEP is | |
432 | invoked. If SEP fails then the parse ends; if SEP succeeds, and COMMITP | |
433 | is true, then the PARSER must also succeed or the overall parse will | |
434 | fail. If COMMITP is false then a trailing SEP is permitted and ignored. | |
435 | ||
436 | If MAX (which will be evaluated) is not nil, then it must be a number: the | |
437 | parse ends automatically after PARSER has succeeded MAX times. When the | |
438 | parse has ended, if the PARSER succeeded fewer than MIN (which will be | |
439 | evaluated) times then the parse fails. Otherwise, the FINAL form (which | |
440 | defaults to simply returning ACC) is evaluated and its value becomes the | |
441 | result of the parse. MAX defaults to nil -- i.e., no maximum; MIN | |
442 | defaults to 1 if a SEP parser is given, or 0 if not. | |
443 | ||
444 | Note that `many' cannot fail if MIN is zero." | |
445 | ||
446 | ;; Once upon a time, this was a macro of almost infinite hairiness which | |
447 | ;; tried to do everything itself, including inspecting its arguments for | |
448 | ;; constant-ness to decide whether it could elide bits of code. This | |
449 | ;; became unsustainable. Nowadays, it packages up its parser arguments | |
450 | ;; into functions and calls some primitive functions to do the heavy | |
451 | ;; lifting. | |
452 | ;; | |
453 | ;; The precise protocol between this macro and the backend functions is | |
454 | ;; subject to change: don't rely on it. | |
455 | ||
456 | (let* ((accvar (or acc (gensym "ACC-"))) | |
457 | (func (if sepp '%many-sep '%many))) | |
458 | `(let ((,accvar ,init)) | |
459 | (declare (ignorable ,accvar)) | |
460 | (,func (lambda (,new) | |
461 | (declare (ignorable ,new)) | |
462 | (setf ,accvar ,update)) | |
048d0b2d | 463 | (lambda () ,final) |
dea4d055 MW |
464 | (parser () ,parser) |
465 | ,@(and sepp (list `(parser () ,sep))) | |
466 | ,@(and minp `(:min ,min)) | |
467 | ,@(and max `(:max ,max)) | |
468 | ,@(and (not (eq commitp t)) `(:commitp ,commitp)))))) | |
469 | ||
470 | (export 'list) | |
471 | (defparse list ((&rest keys) parser &optional (sep nil sepp)) | |
3109662a | 472 | "Like `many', but simply returns a list of the parser results." |
dea4d055 MW |
473 | (with-gensyms (acc) |
474 | `(parse (many (,acc nil (cons it ,acc) :final (nreverse ,acc) ,@keys) | |
475 | ,parser ,@(and sepp (list sep)))))) | |
476 | ||
477 | (export 'skip-many) | |
478 | (defparse skip-many ((&rest keys) parser &optional (sep nil sepp)) | |
3109662a | 479 | "Like `many', but ignores the results." |
dea4d055 MW |
480 | `(parse (many (nil nil nil ,@keys) |
481 | ,parser ,@(and sepp (list sep))))) | |
482 | ||
483 | (export 'or) | |
484 | (defparse or (&rest parsers) | |
485 | "Try a number of alternative parsers. | |
486 | ||
487 | Each of the PARSERS in turn is tried. If any succeeds, then its result | |
488 | becomes the result of the parse. If any parser fails after consuming | |
489 | input, or if all of the parsers fail, then the overall parse fails, with | |
490 | the union of the expected items from the individual parses." | |
491 | ||
492 | (with-gensyms (fail cp failarg) | |
493 | (labels ((walk (parsers failures) | |
494 | (if (null parsers) | |
495 | `(,fail nil (list ,@(reverse failures))) | |
496 | (with-gensyms (value win consumedp) | |
497 | `(multiple-value-bind (,value ,win ,consumedp) | |
498 | (parse ,(car parsers)) | |
499 | (cond (,win | |
500 | (values ,value ,win ,consumedp)) | |
501 | (,consumedp | |
502 | (,fail t (list ,value))) | |
503 | (t | |
504 | ,(walk (cdr parsers) | |
505 | (cons value failures))))))))) | |
506 | `(flet ((,fail (,cp ,failarg) | |
507 | (values (combine-parser-failures ,failarg) nil ,cp))) | |
508 | ,(walk parsers nil))))) | |
509 | ||
510 | (export '?) | |
511 | (defparse ? (parser &optional (value nil)) | |
512 | "Matches PARSER or nothing; fails if PARSER fails after consuming input." | |
513 | `(parse (or ,parser (t ,value)))) | |
514 | ||
515 | ;;;-------------------------------------------------------------------------- | |
516 | ;;; Pluggable parsers. | |
517 | ||
518 | (export 'call-pluggable-parser) | |
519 | (defun call-pluggable-parser (symbol &rest args) | |
520 | "Call the pluggable parser denoted by SYMBOL. | |
521 | ||
522 | A `pluggable parser' is an indirection point at which a number of | |
523 | alternative parsers can be attached dynamically. The parsers are tried in | |
524 | some arbitrary order, so one should be careful to avoid ambiguities; each | |
525 | is paseed the given ARGS. | |
526 | ||
527 | If any parser succeeds then it determines the result; if any parser fails | |
528 | having consumed input then the pluggable parser fails immediately. If all | |
529 | of the parsers fail without consuming input then the pluggable parser | |
530 | fails with the union of the individual failure indicators." | |
531 | ||
532 | (let ((expected nil)) | |
533 | (dolist (item (get symbol 'parser)) | |
534 | (multiple-value-bind (value winp consumedp) (apply (cdr item) args) | |
535 | (when (or winp consumedp) | |
536 | (return-from call-pluggable-parser (values value winp consumedp))) | |
537 | (push value expected))) | |
538 | (values (combine-parser-failures expected) nil nil))) | |
539 | ||
540 | (export 'plug) | |
541 | (defparse plug (symbol &rest args) | |
542 | "Call the pluggable parser denoted by SYMBOL. | |
543 | ||
544 | This is just like the function `call-pluggable-parser', but the SYMBOL is | |
545 | not evaluated." | |
546 | `(call-pluggable-parser ',symbol ,@args)) | |
547 | ||
548 | (export 'pluggable-parser-add) | |
549 | (defun pluggable-parser-add (symbol tag parser) | |
550 | "Adds an element to a pluggable parser. | |
551 | ||
552 | The pluggable parser itself is denoted by SYMBOL; the TAG is any `eql'- | |
553 | comparable object which identifies the element. The PARSER is a parser | |
554 | function; it will be passed arguments via `pluggable-parser'. | |
555 | ||
556 | If a parser with the given TAG is already attached to SYMBOL then the new | |
557 | parser replaces the old one; otherwise it is added to the collection." | |
558 | ||
559 | (let ((alist (get symbol 'parser))) | |
560 | (aif (assoc tag alist) | |
561 | (setf (cdr it) parser) | |
562 | (setf (get symbol 'parser) (acons tag parser alist))))) | |
563 | ||
564 | (export 'define-pluggable-parser) | |
565 | (defmacro define-pluggable-parser (symbol tag (&rest bvl) &body body) | |
566 | "Adds an element to a pluggable parser. | |
567 | ||
568 | The pluggable parser itself is denoted by SYMBOL; the TAG is any `eql'- | |
569 | comparable object which identifies the element. Neither SYMBOL nor TAG is | |
570 | evaluated. The BODY is a parser expression; the BVL is a lambda list | |
bf090e02 | 571 | describing how to bind the arguments supplied via `pluggable-parser'. |
dea4d055 MW |
572 | |
573 | If a parser with the given TAG is already attached to SYMBOL then the new | |
574 | parser replaces the old one; otherwise it is added to the collection." | |
575 | ||
576 | `(pluggable-parser-add ',symbol ',tag (lambda ,bvl ,@body))) | |
577 | ||
578 | ;;;-------------------------------------------------------------------------- | |
579 | ;;; Rewindable parser context protocol. | |
580 | ||
581 | (eval-when (:compile-toplevel :load-toplevel :execute) | |
582 | ||
583 | (export 'parser-capture-place) | |
584 | (defgeneric parser-capture-place (context) | |
585 | (:documentation | |
586 | "Capture the current position of a parser CONTEXT. | |
587 | ||
588 | The return value may later be used with `parser-restore-place'. Be | |
589 | careful: all of this is happening at macro-expansion time.") | |
590 | (:method (context) | |
591 | (error "Parser context ~S doesn't support rewinding." context))) | |
592 | ||
593 | (export 'parser-restore-place) | |
594 | (defgeneric parser-restore-place (context place) | |
595 | (:documentation | |
596 | "`Rewind' the parser CONTEXT back to the captured PLACE. | |
597 | ||
598 | The place was previously captured by `parser-capture-place'. Be careful: | |
599 | all of this is happening at macro-expansion time.")) | |
600 | ||
601 | (export 'parser-release-place) | |
602 | (defgeneric parser-release-place (context place) | |
603 | (:documentation | |
604 | "Release a PLACE captured from the parser CONTEXT. | |
605 | ||
606 | The place was previously captured by `parser-capture-place'. The | |
607 | underlying scanner can use this call to determine whether there are | |
608 | outstanding captured places, and thereby optimize its behaviour. Be | |
609 | careful: all of this is happening at macro-expansion time.") | |
1d8cc67a MW |
610 | (:method (context place) |
611 | (declare (ignore context place)) | |
612 | nil)) | |
dea4d055 MW |
613 | |
614 | (export 'parser-places-must-be-released-p) | |
615 | (defgeneric parser-places-must-be-released-p (context) | |
616 | (:documentation | |
617 | "Answer whether places captured from the parser CONTEXT need releasing. | |
618 | ||
619 | Some contexts -- well, actually, their run-time counterparts -- work | |
620 | better if they can keep track of which places are captured, or at least if | |
621 | there are captured places outstanding. If this function returns true | |
622 | (which is the default) then `with-parser-place' (and hence parser macros | |
623 | such as `peek') will expand to `unwind-protect' forms in order to perform | |
624 | the correct cleanup. If it returns false, then the `unwind-protect' is | |
625 | omitted so that the runtime code does't have to register cleanup | |
626 | handlers.") | |
1d8cc67a MW |
627 | (:method (context) |
628 | (declare (ignore context)) | |
629 | t))) | |
dea4d055 MW |
630 | |
631 | (export 'with-parser-place) | |
632 | (defmacro with-parser-place ((place context) &body body) | |
633 | "Evaluate BODY surrounded with a binding of PLACE to a captured place. | |
634 | ||
635 | The surrounding code will release the PLACE properly on exit from the body | |
636 | forms. This is all happening at macro-expansion time." | |
637 | ;; ... which means that it's a bit hairy. Fortunately, the nested | |
638 | ;; backquotes aren't that bad. | |
639 | (once-only (context) | |
640 | (with-gensyms (bodyfunc) | |
641 | `(with-gensyms (,place) | |
642 | (flet ((,bodyfunc () ,@body)) | |
643 | `(let ((,,place ,(parser-capture-place ,context))) | |
644 | ,(if (parser-places-must-be-released-p ,context) | |
645 | `(unwind-protect ,(,bodyfunc) | |
646 | ,(parser-release-place ,context ,place)) | |
647 | (,bodyfunc)))))))) | |
648 | ||
649 | (export 'peek) | |
650 | (defparse peek (:context (context t) parser) | |
651 | "Attempt to run PARSER, but rewind the underlying source if it fails." | |
652 | (with-gensyms (value win consumedp) | |
653 | (with-parser-place (place context) | |
654 | `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser) | |
655 | (cond (,win | |
656 | (values ,value ,win ,consumedp)) | |
657 | (t | |
658 | ,(parser-restore-place context place) | |
659 | (values ,value ,win nil))))))) | |
660 | ||
661 | ;;;-------------------------------------------------------------------------- | |
662 | ;;; Character parser context protocol. | |
663 | ||
664 | (export 'character-parser-context) | |
665 | (defclass character-parser-context () | |
666 | () | |
667 | (:documentation | |
668 | "Base class for parsers which read one character at a time.")) | |
669 | ||
670 | (export 'parser-current-char) | |
671 | (defgeneric parser-current-char (context) | |
672 | (:documentation | |
673 | "Return the parser's current character. | |
674 | ||
675 | It is an error to invoke this operation if the parser is at end-of-file; | |
676 | you must check this first. Be careful: all of this is happening at | |
677 | macro-expansion time.")) | |
678 | ||
bf090e02 | 679 | (export 'if-char) |
dea4d055 MW |
680 | (defparse if-char (:context (context character-parser-context) |
681 | (&optional (char 'it)) condition consequent alternative) | |
682 | "Basic character-testing parser. | |
683 | ||
684 | If there is a current character, bind it to CHAR and evaluate the | |
bf090e02 MW |
685 | CONDITION; if that is true, then evaluate CONSEQUENT and step the parser |
686 | (in that order); otherwise, if either we're at EOF or the CONDITION | |
687 | returns false, evaluate ALTERNATIVE. The result of `if-char' are the | |
688 | values returned by CONSEQUENT or ALTERNATIVE." | |
dea4d055 MW |
689 | |
690 | (with-gensyms (block) | |
691 | `(block ,block | |
692 | (unless ,(parser-at-eof-p context) | |
693 | (let ((,char ,(parser-current-char context))) | |
694 | (when ,condition | |
bf090e02 MW |
695 | (return-from ,block |
696 | (multiple-value-prog1 ,consequent | |
697 | ,(parser-step context)))))) | |
dea4d055 MW |
698 | ,alternative))) |
699 | ||
700 | (defmethod expand-parser-spec | |
701 | ((context character-parser-context) (spec (eql :any))) | |
702 | "Matches any character; result is the character. | |
703 | ||
704 | The failure indicator is the keyword `:any'." | |
705 | (expand-parser-spec context | |
706 | '(if-char () t | |
707 | (values it t t) | |
708 | (values '(:any) nil nil)))) | |
709 | ||
710 | (export 'char) | |
711 | (defparse char (:context (context character-parser-context) char) | |
712 | "Matches the character CHAR (evaluated); result is the character. | |
713 | ||
714 | The failure indicator is CHAR." | |
715 | ||
716 | (once-only (char) | |
717 | (with-gensyms (it) | |
718 | (expand-parser-spec context | |
719 | `(if-char (,it) (char= ,it ,char) | |
720 | (values ,it t t) | |
721 | (values (list ,char) nil nil)))))) | |
722 | ||
723 | (defmethod expand-parser-spec | |
724 | ((context character-parser-context) (char character)) | |
725 | (expand-parser-spec context `(char ,char))) | |
726 | ||
727 | (export 'satisfies) | |
728 | (defparse satisfies (:context (context character-parser-context) predicate) | |
729 | "Matches a character that satisfies the PREDICATE | |
730 | ||
731 | The PREDICATE is a function designator. On success, the result is the | |
732 | character. The failure indicator is PREDICATE; you probably want to apply | |
733 | a `label'." | |
734 | ||
735 | (with-gensyms (it) | |
736 | (expand-parser-spec context | |
737 | `(if-char (,it) (,predicate ,it) | |
738 | (values ,it t t) | |
739 | (values '(,predicate) nil nil))))) | |
740 | ||
741 | (export 'not) | |
742 | (defparse not (:context (context character-parser-context) char) | |
743 | "Matches any character other than CHAR; result is the character. | |
744 | ||
745 | The failure indicator is (not CHAR)." | |
746 | ||
747 | (once-only (char) | |
748 | (with-gensyms (it) | |
749 | (expand-parser-spec context | |
750 | `(if-char (,it) (char/= ,it ,char) | |
751 | (values ,it t t) | |
752 | (values `((not ,,char)) nil nil)))))) | |
753 | ||
754 | (export 'filter) | |
755 | (defparse filter (:context (context character-parser-context) predicate) | |
756 | "Matches a character that satisfies the PREDICATE; result is the output of | |
757 | PREDICATE. | |
758 | ||
759 | The failure indicator is PREDICATE; you probably want to apply a `label'." | |
760 | ||
761 | ;; Can't do this one with `if-char'. | |
762 | (with-gensyms (block value) | |
763 | `(block ,block | |
764 | (unless ,(parser-at-eof-p context) | |
765 | (let ((,value (,predicate ,(parser-current-char context)))) | |
766 | (when ,value | |
767 | ,(parser-step context) | |
768 | (return-from ,block (values ,value t t))))) | |
769 | (values '(,predicate) nil nil)))) | |
770 | ||
771 | (defmethod expand-parser-spec | |
772 | ((context character-parser-context) (spec (eql :whitespace))) | |
773 | "Matches any sequence of whitespace; result is nil. | |
774 | ||
775 | Cannot fail." | |
776 | ||
777 | `(progn | |
778 | (cond ((and (not ,(parser-at-eof-p context)) | |
779 | (whitespace-char-p ,(parser-current-char context))) | |
780 | (loop | |
781 | ,(parser-step context) | |
782 | (when (or ,(parser-at-eof-p context) | |
783 | (not (whitespace-char-p | |
784 | ,(parser-current-char context)))) | |
785 | (return (values nil t t))))) | |
786 | (t | |
787 | (values nil t nil))))) | |
788 | ||
789 | (defmethod expand-parser-spec | |
790 | ((context character-parser-context) (string string)) | |
791 | "Matches the constituent characters of STRING; result is the string. | |
792 | ||
793 | The failure indicator is STRING; on failure, the input is rewound, so this | |
794 | only works on rewindable contexts." | |
795 | ||
796 | (with-gensyms (i) | |
797 | (unless (typep string 'simple-string) | |
798 | (setf string (make-array (length string) :initial-contents string))) | |
799 | (with-parser-place (place context) | |
800 | `(dotimes (,i ,(length string) (values ,string t | |
801 | ,(plusp (length string)))) | |
802 | (when (or ,(parser-at-eof-p context) | |
803 | (char/= ,(parser-current-char context) | |
804 | (schar ,string ,i))) | |
805 | ,(parser-restore-place context place) | |
806 | (return (values '(,string) nil nil))) | |
807 | ,(parser-step context))))) | |
808 | ||
809 | ;;;-------------------------------------------------------------------------- | |
810 | ;;; Token parser context protocol. | |
811 | ||
812 | (export 'token-parser-context) | |
813 | (defclass token-parser-context () | |
814 | () | |
815 | (:documentation | |
816 | "Base class for parsers which read tokens with associated semantic values. | |
817 | ||
818 | A token, according to the model suggested by this class, has a /type/, | |
819 | which classifies the token and is the main contributer to guiding the | |
820 | parse, and a /value/, which carries additional semantic information. | |
821 | ||
822 | This may seem redundant given Lisp's dynamic type system; but it's not | |
823 | actually capable of drawing sufficiently fine distinctions easily. For | |
824 | example, we can represent a symbol either as a string or a symbol; but | |
825 | using strings conflicts with being able to represent string literals, and | |
826 | using symbols looks ugly and they don't get GCed. Similarly, it'd be | |
827 | convenient to represent punctuation as characters, but that conflicts with | |
828 | using them for character literals. So, we introduce our own notion of | |
829 | token type. | |
830 | ||
831 | Token scanners are expected to signal end-of-file with a token of type | |
832 | `:eof'.")) | |
833 | ||
834 | (export 'parser-token-type) | |
835 | (defgeneric parser-token-type (context) | |
836 | (:documentation | |
837 | "Return the parser's current token type.")) | |
838 | ||
839 | (export 'parser-token-value) | |
840 | (defgeneric parser-token-value (context) | |
841 | (:documentation | |
842 | "Return the parser's current token's semantic value.")) | |
843 | ||
844 | (export 'token) | |
845 | (defparse token (:context (context token-parser-context) | |
846 | type &optional (value nil valuep) &key peekp) | |
847 | "Match tokens of a particular type. | |
848 | ||
849 | A token matches under the following conditions: | |
850 | ||
851 | * If the value of TYPE is `t' then the match succeeds if and only if the | |
c91b90c3 | 852 | parser is not at end-of-file. |
dea4d055 MW |
853 | |
854 | * If the value of TYPE is not `eql' to the token type then the match | |
855 | fails. | |
856 | ||
857 | * If VALUE is specified, and the value of VALUE is not `equal' to the | |
858 | token semantic value then the match fails. | |
859 | ||
860 | * Otherwise the match succeeds. | |
861 | ||
862 | If the match is successful and the parser is not at end-of-file, and the | |
863 | value of PEEKP is nil then the parser advances to the next token; the | |
864 | result of the match is the token's value. | |
865 | ||
866 | If the match fails then the failure indicator is either TYPE or (TYPE | |
867 | VALUE), depending on whether a VALUE was specified." | |
868 | ||
869 | (once-only (type value peekp) | |
870 | (with-gensyms (tokty tokval) | |
871 | `(let ((,tokty ,(parser-token-type context)) | |
872 | (,tokval ,(parser-token-value context))) | |
873 | (if ,(if (eq type t) | |
874 | `(not (eq ,tokty :eof)) | |
875 | (flet ((check-value (cond) | |
876 | (if valuep | |
877 | `(and ,cond (equal ,tokval ,value)) | |
878 | cond))) | |
879 | (if (constantp type) | |
880 | (check-value `(eql ,tokty ,type)) | |
881 | `(if (eq ,type t) | |
882 | (not (eq ,tokty :eof)) | |
883 | ,(check-value `(eql ,tokty ,type)))))) | |
884 | ,(let* ((result `(values ,tokval t ,(if (constantp peekp) | |
885 | (not peekp) | |
886 | `(not ,peekp)))) | |
887 | (step (parser-step context))) | |
888 | (cond ((not (constantp peekp)) | |
889 | `(multiple-value-prog1 ,result | |
890 | (unless ,peekp ,step))) | |
891 | (peekp | |
892 | result) | |
893 | (t | |
894 | `(multiple-value-prog1 ,result | |
895 | ,step)))) | |
896 | (values (list ,(if valuep `(list ,type ,value) type)) | |
897 | nil nil)))))) | |
898 | ||
899 | (defmethod expand-parser-spec ((context token-parser-context) spec) | |
900 | (if (atom spec) | |
901 | (expand-parser-spec context `(token ,spec)) | |
902 | (call-next-method))) | |
903 | ||
904 | (defmethod expand-parser-spec ((context token-parser-context) (spec string)) | |
905 | (expand-parser-spec context `(token :id ,spec))) | |
906 | ||
907 | ;;;----- That's all, folks -------------------------------------------------- |