Commit | Line | Data |
---|---|---|
dea4d055 MW |
1 | ;;; -*-lisp-*- |
2 | ;;; | |
3 | ;;; Protocol for parsing. | |
4 | ;;; | |
5 | ;;; (c) 2009 Straylight/Edgeware | |
6 | ;;; | |
7 | ||
8 | ;;;----- Licensing notice --------------------------------------------------- | |
9 | ;;; | |
10 | ;;; This file is part of the Sensble Object Design, an object system for C. | |
11 | ;;; | |
12 | ;;; SOD is free software; you can redistribute it and/or modify | |
13 | ;;; it under the terms of the GNU General Public License as published by | |
14 | ;;; the Free Software Foundation; either version 2 of the License, or | |
15 | ;;; (at your option) any later version. | |
16 | ;;; | |
17 | ;;; SOD is distributed in the hope that it will be useful, | |
18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;;; GNU General Public License for more details. | |
21 | ;;; | |
22 | ;;; You should have received a copy of the GNU General Public License | |
23 | ;;; along with SOD; if not, write to the Free Software Foundation, | |
24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
25 | ||
26 | ;;;-------------------------------------------------------------------------- | |
27 | ;;; Parser protocol discussion. | |
28 | ;;; | |
29 | ;;; Other languages, notably Haskell and ML, have `parser combinator | |
30 | ;;; libraries', which allow one to construct recursive descent parsers using | |
31 | ;;; approximately pleasant syntax. While attempts have been made to | |
32 | ;;; introduce the benefits of these libraries to Lisp, they've not been | |
33 | ;;; altogether successful; this seems due to Lisp's lack of features such as | |
34 | ;;; pattern matching, currying and lazy evaluation. Rather than fight with | |
35 | ;;; Lisp's weaknesses, this library plays to its strength, making heavy use | |
36 | ;;; of macros. Effectively, the `combinators' we build here are /compile- | |
37 | ;;; time/ combinators, not run-time ones. | |
38 | ;;; | |
39 | ;;; A `parser' is simply an expression which returns three values. | |
40 | ;;; | |
41 | ;;; * If the second value is nil, then the parser is said to have /failed/, | |
42 | ;;; and the first value is a list describing the things that the parser | |
43 | ;;; expected to find but didn't. (The precise details of the list items | |
44 | ;;; are important to error-reporting functions, but not to the low-level | |
45 | ;;; machinery, and are left up to higher-level protocols to nail down | |
46 | ;;; harder.) | |
47 | ;;; | |
48 | ;;; * If the second value is not nil, then the parser is said to have | |
49 | ;;; /succeeded/, and the first value is its /result/. | |
50 | ;;; | |
51 | ;;; * The third value indicates whether the parser consumed any of its | |
52 | ;;; input. Parsers don't backtrack implicitly (to avoid space leaks and | |
53 | ;;; bad performance), so the `consumedp' return value is used to decide | |
54 | ;;; whether the parser has `committed' to a particular branch. If the | |
55 | ;;; parser context supports place-capture (many do) then `peek' can be | |
56 | ;;; used to suppress consumption of input in the case of parser failure. | |
57 | ;;; | |
58 | ;;; The functions and macros here are simply ways of gluing together | |
59 | ;;; expressions which obey this protocol. | |
60 | ;;; | |
bf090e02 | 61 | ;;; The main contribution of this file is a macro `with-parser-context' which |
dea4d055 | 62 | ;;; embeds a parsing-specific S-expressions language entered using the new |
bf090e02 MW |
63 | ;;; macro `parse'. The behaviour of this macro is controlled by a pair of |
64 | ;;; compile-time generic functions `expand-parser-spec' and | |
65 | ;;; `expand-parser-form'. As well as the parser expression they're meant to | |
66 | ;;; process, these functions dispatch on a `context' argument, which is | |
67 | ;;; intended to help `leaf' parsers find the terminal symbols which they're | |
68 | ;;; meant to process. | |
dea4d055 | 69 | ;;; |
bf090e02 MW |
70 | ;;; Note that the context is a compile-time object, constructed by the |
71 | ;;; `parse' macro expansion function, though the idea is that it will contain | |
72 | ;;; the name or names of variables holding the run-time parser state (which | |
73 | ;;; will typically be a lexical analyser or an input stream or suchlike). | |
dea4d055 MW |
74 | |
75 | (cl:in-package #:sod-parser) | |
76 | ||
77 | ;;;-------------------------------------------------------------------------- | |
78 | ;;; Utilities. | |
79 | ||
80 | (defun combine-parser-failures (failures) | |
81 | "Combine the failure indicators listed in FAILURES. | |
82 | ||
83 | (Note that this means that FAILURES is a list of lists.)" | |
84 | ||
85 | (reduce (lambda (f ff) (union f ff :test #'equal)) | |
86 | failures | |
87 | :initial-value nil)) | |
88 | ||
89 | ;;;-------------------------------------------------------------------------- | |
90 | ;;; Basic protocol. | |
91 | ||
92 | (eval-when (:compile-toplevel :load-toplevel :execute) | |
93 | ||
94 | (export 'expand-parser-spec) | |
95 | (defgeneric expand-parser-spec (context spec) | |
96 | (:documentation | |
97 | "Expand a parser specifier SPEC in a particular parser CONTEXT.") | |
98 | (:method (context (spec list)) | |
99 | (expand-parser-form context (car spec) (cdr spec)))) | |
100 | ||
101 | (export 'expand-parser-form) | |
102 | (defgeneric expand-parser-form (context head tail) | |
103 | (:documentation | |
104 | "Expand a parser list-form given by HEAD and TAIL, in CONTEXT.") | |
105 | (:method (context head tail) | |
1d8cc67a | 106 | (declare (ignore context)) |
dea4d055 MW |
107 | (cons head tail))) |
108 | ||
109 | (export 'wrap-parser) | |
110 | (defgeneric wrap-parser (context form) | |
111 | (:documentation | |
112 | "Enclose FORM in whatever is necessary to make the parser work.") | |
1d8cc67a MW |
113 | (:method (context form) |
114 | (declare (ignore context)) | |
115 | form))) | |
dea4d055 MW |
116 | |
117 | (export 'defparse) | |
118 | (defmacro defparse (name bvl &body body) | |
119 | "Define a new parser form. | |
120 | ||
121 | The full syntax is hairier than it looks: | |
122 | ||
123 | defparse NAME ( [[ :context (CTX SPEC) ]] . BVL ) | |
124 | { FORM }* | |
125 | ||
126 | The macro defines a new parser form (NAME ...) which is expanded by the | |
127 | body FORMs. The BVL is a destructuring lambda-list to be applied to the | |
128 | tail of the form. The body forms are enclosed in a block called NAME. | |
129 | ||
dea4d055 MW |
130 | If the :context key is provided, then the parser form is specialized on a |
131 | particular class of parser contexts SPEC; specialized expanders take | |
132 | priority over less specialized or unspecialized expanders -- so you can | |
133 | use this to override the built-in forms safely if they don't seem to be | |
134 | doing the right thing for you. Also, the context -- which is probably | |
135 | interesting to you if you've bothered to specialize -- is bound to the | |
136 | variable CTX." | |
137 | ||
138 | ;; BUG! misplaces declarations: if you declare the CONTEXT argument | |
139 | ;; `special' it won't be bound properly. I'm really not at all sure I know | |
140 | ;; how to fix this. | |
141 | ||
142 | (with-gensyms (head tail context) | |
143 | (let ((ctxclass t)) | |
144 | (loop | |
145 | (unless (and bvl (keywordp (car bvl))) (return)) | |
146 | (ecase (pop bvl) | |
147 | (:context (destructuring-bind (name spec) (pop bvl) | |
148 | (setf ctxclass spec context name))))) | |
149 | (multiple-value-bind (doc decls body) (parse-body body) | |
150 | `(defmethod expand-parser-form | |
151 | ((,context ,ctxclass) (,head (eql ',name)) ,tail) | |
152 | ,@doc | |
1d8cc67a | 153 | (declare (ignorable ,context)) |
dea4d055 MW |
154 | (block ,name |
155 | (destructuring-bind ,bvl ,tail | |
156 | ,@decls | |
157 | ,@body))))))) | |
158 | ||
159 | (export '(with-parser-context parse)) | |
160 | (defmacro with-parser-context ((class &rest initargs) &body body) | |
161 | "Evaluate BODY with a macro `parse' which expands parser forms. | |
162 | ||
163 | Evaluate BODY as an implicit progn. At compile time, a parser context is | |
164 | constructed by (apply #'make-instance CLASS INITARGS). The BODY can make | |
165 | use of the macro `parse': | |
166 | ||
167 | parse SPEC | |
168 | ||
169 | which parses the input in the manner described by SPEC, in the context of | |
170 | the parser context." | |
171 | ||
172 | (let ((context (apply #'make-instance class initargs))) | |
173 | (wrap-parser context | |
174 | `(macrolet ((parse (form) | |
175 | (expand-parser-spec ',context form))) | |
176 | ,@body)))) | |
177 | ||
178 | ;;;-------------------------------------------------------------------------- | |
179 | ;;; Common parser context protocol. | |
180 | ||
181 | (export 'parser-at-eof-p) | |
182 | (defgeneric parser-at-eof-p (context) | |
183 | (:documentation | |
184 | "Return whether the parser has reached the end of its input. | |
185 | ||
186 | Be careful: all of this is happening at macro expansion time.")) | |
187 | ||
188 | (export 'parser-step) | |
189 | (defgeneric parser-step (context) | |
190 | (:documentation | |
191 | "Advance the parser to the next character. | |
192 | ||
193 | Be careful: all of this is happening at macro-expansion time.")) | |
194 | ||
195 | (defmethod expand-parser-spec (context (spec (eql :eof))) | |
196 | "Tests succeeds if the parser has reached the end of its input. | |
197 | ||
198 | The failure indicator is the keyword `:eof'." | |
199 | ||
200 | `(if ,(parser-at-eof-p context) | |
201 | (values :eof t nil) | |
202 | (values '(:eof) nil nil))) | |
203 | ||
204 | ;;;-------------------------------------------------------------------------- | |
205 | ;;; Useful macros for dealing with parsers. | |
206 | ||
207 | (export 'it) | |
208 | (export 'if-parse) | |
209 | (defmacro if-parse ((&key (result 'it) expected (consumedp (gensym "CP"))) | |
210 | parser consequent &optional (alternative nil altp)) | |
211 | "Conditional parsing construction. | |
212 | ||
213 | If PARSER succeeds, then evaluate CONSEQUENT with RESULT bound to the | |
214 | result; otherwise evaluate ALTERNATIVE with EXPECTED bound to the | |
215 | expected-item list. If ALTERNATIVE is omitted, then propagate the failure | |
216 | following the parser protocol." | |
217 | ||
218 | (with-gensyms (value win) | |
219 | `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser) | |
220 | (declare (ignorable ,consumedp)) | |
221 | (if ,win | |
222 | (let ((,result ,value)) | |
223 | (declare (ignorable ,result)) | |
224 | ,consequent) | |
225 | ,(cond ((not altp) | |
226 | `(values ,value nil ,consumedp)) | |
227 | (expected | |
228 | `(let ((,expected ,value)) ,alternative)) | |
229 | (t | |
230 | alternative)))))) | |
231 | ||
232 | (export 'when-parse) | |
233 | (defmacro when-parse ((&optional (result 'it)) parser &body body) | |
234 | "Convenience macro for conditional parsing. | |
235 | ||
236 | If PARSER succeeds then evaluate BODY with RESULT bound to the result; | |
237 | otherwise propagate the failure." | |
238 | `(if-parse (:result ,result) ,parser (progn ,@body))) | |
239 | ||
240 | (export 'cond-parse) | |
241 | (defmacro cond-parse ((&key (result 'it) expected | |
242 | (consumedp (gensym "CP"))) | |
243 | &body clauses) | |
244 | "Frightening conditional parsing construct. | |
245 | ||
246 | Each of the CLAUSES has the form (PARSER &rest FORMS); the special `fake' | |
247 | parser form `t' may be used to describe a default action. If the PARSER | |
248 | succeeds then evaluate FORMS in order with RESULT bound to the parser | |
249 | result (if there are no forms, then propagate the success); if the PARSER | |
250 | fails without consuming input, then move onto the next clause. | |
251 | ||
252 | If the default clause (if any) is reached, or a parser fails after | |
253 | consuming input, then EXPECTED is bound to a list of failure indicators | |
254 | and the default clause's FORMS are evaluated and with CONSUMEDP bound to a | |
255 | generalized boolean indicating whether any input was consumed. If there | |
256 | is no default clause, and either some parser fails after consuming input, | |
257 | or all of the parsers fail without consuming, then a failure is reported | |
258 | and the input-consumption indicator is propagated. | |
259 | ||
260 | If a parser fails after consuming input, then the failure indicators are | |
261 | whatever that parser reported; if all the parsers fail without consuming | |
262 | then the failure indicators are the union of the indicators reported by | |
263 | the various parsers." | |
264 | ||
265 | (with-gensyms (block fail failarg) | |
266 | (labels ((walk (clauses failures) | |
267 | (cond ((null clauses) | |
268 | (values `(,fail nil (list ,@(reverse failures))) | |
269 | `(values (combine-parser-failures ,failarg) | |
270 | nil | |
271 | ,consumedp))) | |
272 | ((eq (caar clauses) t) | |
273 | (values `(,fail nil (list ,@(reverse failures))) | |
274 | `(,@(if expected | |
275 | `(let ((,expected | |
276 | (combine-parser-failures | |
277 | ,failarg)))) | |
278 | `(progn)) | |
279 | ,@(cdar clauses)))) | |
280 | (t | |
281 | (with-gensyms (value win cp) | |
282 | (multiple-value-bind (inner failbody) | |
283 | (walk (cdr clauses) (cons value failures)) | |
284 | (values `(multiple-value-bind (,value ,win ,cp) | |
285 | (parse ,(caar clauses)) | |
286 | (when ,win | |
287 | (return-from ,block | |
288 | (let ((,result ,value) | |
289 | (,consumedp ,cp)) | |
290 | (declare (ignorable ,result | |
291 | ,consumedp)) | |
292 | ,@(cdar clauses)))) | |
293 | (when ,cp | |
294 | (,fail t (list ,value))) | |
295 | ,inner) | |
296 | failbody))))))) | |
297 | (multiple-value-bind (inner failbody) (walk clauses nil) | |
298 | `(block ,block | |
299 | (flet ((,fail (,consumedp ,failarg) | |
300 | (declare (ignorable ,consumedp ,failarg)) | |
301 | ,failbody)) | |
302 | ,inner)))))) | |
303 | ||
304 | (export 'parser) | |
305 | (defmacro parser (bvl &body parser) | |
306 | "Functional abstraction for parsers." | |
307 | (multiple-value-bind (doc decls body) (parse-body parser) | |
308 | `(lambda ,bvl ,@doc ,@decls (parse ,@body)))) | |
309 | ||
310 | ;;;-------------------------------------------------------------------------- | |
311 | ;;; Standard parser forms. | |
312 | ||
313 | (export 'label) | |
314 | (defparse label (label parser) | |
315 | "If PARSER fails, use LABEL as the expected outcome. | |
316 | ||
317 | The LABEL is only evaluated if necessary." | |
318 | (with-gensyms (value win consumedp) | |
319 | `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser) | |
320 | (if ,win | |
321 | (values ,value ,win ,consumedp) | |
322 | (values (list ,label) nil ,consumedp))))) | |
323 | ||
324 | (defparse t (value) | |
325 | "Succeed, without consuming input, with result VALUE." | |
326 | `(values ,value t nil)) | |
327 | ||
bf090e02 MW |
328 | (defparse nil (indicator) |
329 | "Fail, without consuming input, with indicator VALUE." | |
330 | `(values (list ,indicator) nil nil)) | |
331 | ||
dea4d055 MW |
332 | (defparse when (cond &body parser) |
333 | "If CONDITION is true, then match PARSER; otherwise fail." | |
334 | `(if ,cond (parse ,@parser) (values nil nil nil))) | |
335 | ||
336 | (defmethod expand-parser-spec (context (spec (eql t))) | |
337 | "Always matches without consuming input." | |
1d8cc67a | 338 | (declare (ignore context)) |
dea4d055 MW |
339 | '(values t t nil)) |
340 | ||
bf090e02 MW |
341 | (defmethod expand-parser-spec (context (spec (eql nil))) |
342 | "Always fails without consuming input. The failure indicator is `:fail'." | |
1d8cc67a | 343 | (declare (ignore context)) |
bf090e02 MW |
344 | '(values '(:fail) nil nil)) |
345 | ||
dea4d055 MW |
346 | (export 'seq) |
347 | (defparse seq (binds &body body) | |
348 | "Parse a sequence of heterogeneous items. | |
349 | ||
350 | Syntax: | |
351 | ||
352 | seq ( { ATOMIC-PARSER-FORM | ([VAR] PARSER-FORM) }* ) | |
353 | { FORM }* | |
354 | ||
355 | The behaviour is similar to `let*'. The PARSER-FORMs are processed in | |
356 | order, left to right. If a parser succeeds, then its value is bound to | |
357 | the corresponding VAR, and available within Lisp forms enclosed within | |
358 | subsequent PARSER-FORMs and/or the body FORMs. If any parser fails, then | |
359 | the entire sequence fails. If all of the parsers succeeds, then the FORMs | |
360 | are evaluated as an implicit progn, and the sequence will succeed with the | |
361 | result computed by the final FORM." | |
362 | ||
363 | (with-gensyms (block consumedp) | |
364 | (labels ((walk (binds lets ignores) | |
365 | (if (endp binds) | |
366 | `(let* ((,consumedp nil) | |
367 | ,@(nreverse lets)) | |
368 | ,@(and ignores | |
369 | `((declare (ignore ,@(nreverse ignores))))) | |
370 | (values (progn ,@body) t ,consumedp)) | |
371 | (destructuring-bind (x &optional (y nil yp)) | |
372 | (if (listp (car binds)) | |
373 | (car binds) | |
374 | (list (car binds))) | |
375 | (with-gensyms (var value win cp) | |
376 | (multiple-value-bind (var parser ignores) | |
377 | (if (and yp x) | |
378 | (values x y ignores) | |
379 | (values var (if yp y x) (cons var ignores))) | |
380 | (walk (cdr binds) | |
381 | (cons `(,var (multiple-value-bind | |
382 | (,value ,win ,cp) | |
383 | (parse ,parser) | |
384 | (when ,cp (setf ,consumedp t)) | |
385 | (unless ,win | |
386 | (return-from ,block | |
387 | (values ,value ,nil | |
388 | ,consumedp))) | |
389 | ,value)) | |
390 | lets) | |
391 | ignores))))))) | |
392 | `(block ,block ,(walk binds nil nil))))) | |
393 | ||
394 | (export 'and) | |
395 | (defparse and (:context (context t) &rest parsers) | |
396 | "Parse a sequence of heterogeneous items, but ignore their values. | |
397 | ||
398 | This is just like (and is implemented using) `seq' with all the bindings | |
399 | set to `nil'. The result is `nil'." | |
400 | (with-gensyms (last) | |
401 | (if (null parsers) | |
402 | '(seq () nil) | |
403 | (expand-parser-spec context | |
404 | `(seq (,@(mapcar (lambda (parser) | |
405 | `(nil ,parser)) | |
406 | (butlast parsers)) | |
407 | (,last ,(car (last parsers)))) | |
408 | ,last))))) | |
409 | ||
410 | (export 'lisp) | |
411 | (defparse lisp (&rest forms) | |
412 | "Evaluate FORMs, which should obey the parser protocol." | |
413 | `(progn ,@forms)) | |
414 | ||
415 | (export 'many) | |
416 | (defparse many ((acc init update | |
417 | &key (new 'it) (final acc) (min nil minp) max (commitp t)) | |
418 | parser &optional (sep nil sepp)) | |
419 | "Parse a sequence of homogeneous items. | |
420 | ||
421 | The behaviour is similar to `do'. Initially an accumulator ACC is | |
422 | established, and bound to the value of INIT. The PARSER is then evaluated | |
423 | repeatedly. Each time it succeeds, UPDATE is evaluated with NEW (defaults | |
424 | to `it') bound to the result of the parse, and the value returned by | |
425 | UPDATE is stored back into ACC. If the PARSER fails, then the parse | |
426 | ends. The scope of ACC includes the UPDATE and FINAL forms, and the | |
427 | PARSER and SEP parsers; it is updated by side effects, not rebound. | |
428 | ||
429 | If a SEP parser is provided, then the behaviour changes as follows. | |
430 | Before each attempt to parse a new item using PARSER, the parser SEP is | |
431 | invoked. If SEP fails then the parse ends; if SEP succeeds, and COMMITP | |
432 | is true, then the PARSER must also succeed or the overall parse will | |
433 | fail. If COMMITP is false then a trailing SEP is permitted and ignored. | |
434 | ||
435 | If MAX (which will be evaluated) is not nil, then it must be a number: the | |
436 | parse ends automatically after PARSER has succeeded MAX times. When the | |
437 | parse has ended, if the PARSER succeeded fewer than MIN (which will be | |
438 | evaluated) times then the parse fails. Otherwise, the FINAL form (which | |
439 | defaults to simply returning ACC) is evaluated and its value becomes the | |
440 | result of the parse. MAX defaults to nil -- i.e., no maximum; MIN | |
441 | defaults to 1 if a SEP parser is given, or 0 if not. | |
442 | ||
443 | Note that `many' cannot fail if MIN is zero." | |
444 | ||
445 | ;; Once upon a time, this was a macro of almost infinite hairiness which | |
446 | ;; tried to do everything itself, including inspecting its arguments for | |
447 | ;; constant-ness to decide whether it could elide bits of code. This | |
448 | ;; became unsustainable. Nowadays, it packages up its parser arguments | |
449 | ;; into functions and calls some primitive functions to do the heavy | |
450 | ;; lifting. | |
451 | ;; | |
452 | ;; The precise protocol between this macro and the backend functions is | |
453 | ;; subject to change: don't rely on it. | |
454 | ||
455 | (let* ((accvar (or acc (gensym "ACC-"))) | |
456 | (func (if sepp '%many-sep '%many))) | |
457 | `(let ((,accvar ,init)) | |
458 | (declare (ignorable ,accvar)) | |
459 | (,func (lambda (,new) | |
460 | (declare (ignorable ,new)) | |
461 | (setf ,accvar ,update)) | |
048d0b2d | 462 | (lambda () ,final) |
dea4d055 MW |
463 | (parser () ,parser) |
464 | ,@(and sepp (list `(parser () ,sep))) | |
465 | ,@(and minp `(:min ,min)) | |
466 | ,@(and max `(:max ,max)) | |
467 | ,@(and (not (eq commitp t)) `(:commitp ,commitp)))))) | |
468 | ||
469 | (export 'list) | |
470 | (defparse list ((&rest keys) parser &optional (sep nil sepp)) | |
3109662a | 471 | "Like `many', but simply returns a list of the parser results." |
dea4d055 MW |
472 | (with-gensyms (acc) |
473 | `(parse (many (,acc nil (cons it ,acc) :final (nreverse ,acc) ,@keys) | |
474 | ,parser ,@(and sepp (list sep)))))) | |
475 | ||
476 | (export 'skip-many) | |
477 | (defparse skip-many ((&rest keys) parser &optional (sep nil sepp)) | |
3109662a | 478 | "Like `many', but ignores the results." |
dea4d055 MW |
479 | `(parse (many (nil nil nil ,@keys) |
480 | ,parser ,@(and sepp (list sep))))) | |
481 | ||
482 | (export 'or) | |
483 | (defparse or (&rest parsers) | |
484 | "Try a number of alternative parsers. | |
485 | ||
486 | Each of the PARSERS in turn is tried. If any succeeds, then its result | |
487 | becomes the result of the parse. If any parser fails after consuming | |
488 | input, or if all of the parsers fail, then the overall parse fails, with | |
489 | the union of the expected items from the individual parses." | |
490 | ||
491 | (with-gensyms (fail cp failarg) | |
492 | (labels ((walk (parsers failures) | |
493 | (if (null parsers) | |
494 | `(,fail nil (list ,@(reverse failures))) | |
495 | (with-gensyms (value win consumedp) | |
496 | `(multiple-value-bind (,value ,win ,consumedp) | |
497 | (parse ,(car parsers)) | |
498 | (cond (,win | |
499 | (values ,value ,win ,consumedp)) | |
500 | (,consumedp | |
501 | (,fail t (list ,value))) | |
502 | (t | |
503 | ,(walk (cdr parsers) | |
504 | (cons value failures))))))))) | |
505 | `(flet ((,fail (,cp ,failarg) | |
506 | (values (combine-parser-failures ,failarg) nil ,cp))) | |
507 | ,(walk parsers nil))))) | |
508 | ||
509 | (export '?) | |
510 | (defparse ? (parser &optional (value nil)) | |
511 | "Matches PARSER or nothing; fails if PARSER fails after consuming input." | |
512 | `(parse (or ,parser (t ,value)))) | |
513 | ||
514 | ;;;-------------------------------------------------------------------------- | |
515 | ;;; Pluggable parsers. | |
516 | ||
517 | (export 'call-pluggable-parser) | |
518 | (defun call-pluggable-parser (symbol &rest args) | |
519 | "Call the pluggable parser denoted by SYMBOL. | |
520 | ||
521 | A `pluggable parser' is an indirection point at which a number of | |
522 | alternative parsers can be attached dynamically. The parsers are tried in | |
523 | some arbitrary order, so one should be careful to avoid ambiguities; each | |
524 | is paseed the given ARGS. | |
525 | ||
526 | If any parser succeeds then it determines the result; if any parser fails | |
527 | having consumed input then the pluggable parser fails immediately. If all | |
528 | of the parsers fail without consuming input then the pluggable parser | |
529 | fails with the union of the individual failure indicators." | |
530 | ||
531 | (let ((expected nil)) | |
532 | (dolist (item (get symbol 'parser)) | |
533 | (multiple-value-bind (value winp consumedp) (apply (cdr item) args) | |
534 | (when (or winp consumedp) | |
535 | (return-from call-pluggable-parser (values value winp consumedp))) | |
536 | (push value expected))) | |
537 | (values (combine-parser-failures expected) nil nil))) | |
538 | ||
539 | (export 'plug) | |
540 | (defparse plug (symbol &rest args) | |
541 | "Call the pluggable parser denoted by SYMBOL. | |
542 | ||
543 | This is just like the function `call-pluggable-parser', but the SYMBOL is | |
544 | not evaluated." | |
545 | `(call-pluggable-parser ',symbol ,@args)) | |
546 | ||
547 | (export 'pluggable-parser-add) | |
548 | (defun pluggable-parser-add (symbol tag parser) | |
549 | "Adds an element to a pluggable parser. | |
550 | ||
551 | The pluggable parser itself is denoted by SYMBOL; the TAG is any `eql'- | |
552 | comparable object which identifies the element. The PARSER is a parser | |
553 | function; it will be passed arguments via `pluggable-parser'. | |
554 | ||
555 | If a parser with the given TAG is already attached to SYMBOL then the new | |
556 | parser replaces the old one; otherwise it is added to the collection." | |
557 | ||
558 | (let ((alist (get symbol 'parser))) | |
559 | (aif (assoc tag alist) | |
560 | (setf (cdr it) parser) | |
561 | (setf (get symbol 'parser) (acons tag parser alist))))) | |
562 | ||
563 | (export 'define-pluggable-parser) | |
564 | (defmacro define-pluggable-parser (symbol tag (&rest bvl) &body body) | |
565 | "Adds an element to a pluggable parser. | |
566 | ||
567 | The pluggable parser itself is denoted by SYMBOL; the TAG is any `eql'- | |
568 | comparable object which identifies the element. Neither SYMBOL nor TAG is | |
569 | evaluated. The BODY is a parser expression; the BVL is a lambda list | |
bf090e02 | 570 | describing how to bind the arguments supplied via `pluggable-parser'. |
dea4d055 MW |
571 | |
572 | If a parser with the given TAG is already attached to SYMBOL then the new | |
573 | parser replaces the old one; otherwise it is added to the collection." | |
574 | ||
575 | `(pluggable-parser-add ',symbol ',tag (lambda ,bvl ,@body))) | |
576 | ||
577 | ;;;-------------------------------------------------------------------------- | |
578 | ;;; Rewindable parser context protocol. | |
579 | ||
580 | (eval-when (:compile-toplevel :load-toplevel :execute) | |
581 | ||
582 | (export 'parser-capture-place) | |
583 | (defgeneric parser-capture-place (context) | |
584 | (:documentation | |
585 | "Capture the current position of a parser CONTEXT. | |
586 | ||
587 | The return value may later be used with `parser-restore-place'. Be | |
588 | careful: all of this is happening at macro-expansion time.") | |
589 | (:method (context) | |
590 | (error "Parser context ~S doesn't support rewinding." context))) | |
591 | ||
592 | (export 'parser-restore-place) | |
593 | (defgeneric parser-restore-place (context place) | |
594 | (:documentation | |
595 | "`Rewind' the parser CONTEXT back to the captured PLACE. | |
596 | ||
597 | The place was previously captured by `parser-capture-place'. Be careful: | |
598 | all of this is happening at macro-expansion time.")) | |
599 | ||
600 | (export 'parser-release-place) | |
601 | (defgeneric parser-release-place (context place) | |
602 | (:documentation | |
603 | "Release a PLACE captured from the parser CONTEXT. | |
604 | ||
605 | The place was previously captured by `parser-capture-place'. The | |
606 | underlying scanner can use this call to determine whether there are | |
607 | outstanding captured places, and thereby optimize its behaviour. Be | |
608 | careful: all of this is happening at macro-expansion time.") | |
1d8cc67a MW |
609 | (:method (context place) |
610 | (declare (ignore context place)) | |
611 | nil)) | |
dea4d055 MW |
612 | |
613 | (export 'parser-places-must-be-released-p) | |
614 | (defgeneric parser-places-must-be-released-p (context) | |
615 | (:documentation | |
616 | "Answer whether places captured from the parser CONTEXT need releasing. | |
617 | ||
618 | Some contexts -- well, actually, their run-time counterparts -- work | |
619 | better if they can keep track of which places are captured, or at least if | |
620 | there are captured places outstanding. If this function returns true | |
621 | (which is the default) then `with-parser-place' (and hence parser macros | |
622 | such as `peek') will expand to `unwind-protect' forms in order to perform | |
623 | the correct cleanup. If it returns false, then the `unwind-protect' is | |
624 | omitted so that the runtime code does't have to register cleanup | |
625 | handlers.") | |
1d8cc67a MW |
626 | (:method (context) |
627 | (declare (ignore context)) | |
628 | t))) | |
dea4d055 MW |
629 | |
630 | (export 'with-parser-place) | |
631 | (defmacro with-parser-place ((place context) &body body) | |
632 | "Evaluate BODY surrounded with a binding of PLACE to a captured place. | |
633 | ||
634 | The surrounding code will release the PLACE properly on exit from the body | |
635 | forms. This is all happening at macro-expansion time." | |
636 | ;; ... which means that it's a bit hairy. Fortunately, the nested | |
637 | ;; backquotes aren't that bad. | |
638 | (once-only (context) | |
639 | (with-gensyms (bodyfunc) | |
640 | `(with-gensyms (,place) | |
641 | (flet ((,bodyfunc () ,@body)) | |
642 | `(let ((,,place ,(parser-capture-place ,context))) | |
643 | ,(if (parser-places-must-be-released-p ,context) | |
644 | `(unwind-protect ,(,bodyfunc) | |
645 | ,(parser-release-place ,context ,place)) | |
646 | (,bodyfunc)))))))) | |
647 | ||
648 | (export 'peek) | |
649 | (defparse peek (:context (context t) parser) | |
650 | "Attempt to run PARSER, but rewind the underlying source if it fails." | |
651 | (with-gensyms (value win consumedp) | |
652 | (with-parser-place (place context) | |
653 | `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser) | |
654 | (cond (,win | |
655 | (values ,value ,win ,consumedp)) | |
656 | (t | |
657 | ,(parser-restore-place context place) | |
658 | (values ,value ,win nil))))))) | |
659 | ||
660 | ;;;-------------------------------------------------------------------------- | |
661 | ;;; Character parser context protocol. | |
662 | ||
663 | (export 'character-parser-context) | |
664 | (defclass character-parser-context () | |
665 | () | |
666 | (:documentation | |
667 | "Base class for parsers which read one character at a time.")) | |
668 | ||
669 | (export 'parser-current-char) | |
670 | (defgeneric parser-current-char (context) | |
671 | (:documentation | |
672 | "Return the parser's current character. | |
673 | ||
674 | It is an error to invoke this operation if the parser is at end-of-file; | |
675 | you must check this first. Be careful: all of this is happening at | |
676 | macro-expansion time.")) | |
677 | ||
bf090e02 | 678 | (export 'if-char) |
dea4d055 MW |
679 | (defparse if-char (:context (context character-parser-context) |
680 | (&optional (char 'it)) condition consequent alternative) | |
681 | "Basic character-testing parser. | |
682 | ||
683 | If there is a current character, bind it to CHAR and evaluate the | |
bf090e02 MW |
684 | CONDITION; if that is true, then evaluate CONSEQUENT and step the parser |
685 | (in that order); otherwise, if either we're at EOF or the CONDITION | |
686 | returns false, evaluate ALTERNATIVE. The result of `if-char' are the | |
687 | values returned by CONSEQUENT or ALTERNATIVE." | |
dea4d055 MW |
688 | |
689 | (with-gensyms (block) | |
690 | `(block ,block | |
691 | (unless ,(parser-at-eof-p context) | |
692 | (let ((,char ,(parser-current-char context))) | |
693 | (when ,condition | |
bf090e02 MW |
694 | (return-from ,block |
695 | (multiple-value-prog1 ,consequent | |
696 | ,(parser-step context)))))) | |
dea4d055 MW |
697 | ,alternative))) |
698 | ||
699 | (defmethod expand-parser-spec | |
700 | ((context character-parser-context) (spec (eql :any))) | |
701 | "Matches any character; result is the character. | |
702 | ||
703 | The failure indicator is the keyword `:any'." | |
704 | (expand-parser-spec context | |
705 | '(if-char () t | |
706 | (values it t t) | |
707 | (values '(:any) nil nil)))) | |
708 | ||
709 | (export 'char) | |
710 | (defparse char (:context (context character-parser-context) char) | |
711 | "Matches the character CHAR (evaluated); result is the character. | |
712 | ||
713 | The failure indicator is CHAR." | |
714 | ||
715 | (once-only (char) | |
716 | (with-gensyms (it) | |
717 | (expand-parser-spec context | |
718 | `(if-char (,it) (char= ,it ,char) | |
719 | (values ,it t t) | |
720 | (values (list ,char) nil nil)))))) | |
721 | ||
722 | (defmethod expand-parser-spec | |
723 | ((context character-parser-context) (char character)) | |
724 | (expand-parser-spec context `(char ,char))) | |
725 | ||
726 | (export 'satisfies) | |
727 | (defparse satisfies (:context (context character-parser-context) predicate) | |
728 | "Matches a character that satisfies the PREDICATE | |
729 | ||
730 | The PREDICATE is a function designator. On success, the result is the | |
731 | character. The failure indicator is PREDICATE; you probably want to apply | |
732 | a `label'." | |
733 | ||
734 | (with-gensyms (it) | |
735 | (expand-parser-spec context | |
736 | `(if-char (,it) (,predicate ,it) | |
737 | (values ,it t t) | |
738 | (values '(,predicate) nil nil))))) | |
739 | ||
740 | (export 'not) | |
741 | (defparse not (:context (context character-parser-context) char) | |
742 | "Matches any character other than CHAR; result is the character. | |
743 | ||
744 | The failure indicator is (not CHAR)." | |
745 | ||
746 | (once-only (char) | |
747 | (with-gensyms (it) | |
748 | (expand-parser-spec context | |
749 | `(if-char (,it) (char/= ,it ,char) | |
750 | (values ,it t t) | |
751 | (values `((not ,,char)) nil nil)))))) | |
752 | ||
753 | (export 'filter) | |
754 | (defparse filter (:context (context character-parser-context) predicate) | |
755 | "Matches a character that satisfies the PREDICATE; result is the output of | |
756 | PREDICATE. | |
757 | ||
758 | The failure indicator is PREDICATE; you probably want to apply a `label'." | |
759 | ||
760 | ;; Can't do this one with `if-char'. | |
761 | (with-gensyms (block value) | |
762 | `(block ,block | |
763 | (unless ,(parser-at-eof-p context) | |
764 | (let ((,value (,predicate ,(parser-current-char context)))) | |
765 | (when ,value | |
766 | ,(parser-step context) | |
767 | (return-from ,block (values ,value t t))))) | |
768 | (values '(,predicate) nil nil)))) | |
769 | ||
770 | (defmethod expand-parser-spec | |
771 | ((context character-parser-context) (spec (eql :whitespace))) | |
772 | "Matches any sequence of whitespace; result is nil. | |
773 | ||
774 | Cannot fail." | |
775 | ||
776 | `(progn | |
777 | (cond ((and (not ,(parser-at-eof-p context)) | |
778 | (whitespace-char-p ,(parser-current-char context))) | |
779 | (loop | |
780 | ,(parser-step context) | |
781 | (when (or ,(parser-at-eof-p context) | |
782 | (not (whitespace-char-p | |
783 | ,(parser-current-char context)))) | |
784 | (return (values nil t t))))) | |
785 | (t | |
786 | (values nil t nil))))) | |
787 | ||
788 | (defmethod expand-parser-spec | |
789 | ((context character-parser-context) (string string)) | |
790 | "Matches the constituent characters of STRING; result is the string. | |
791 | ||
792 | The failure indicator is STRING; on failure, the input is rewound, so this | |
793 | only works on rewindable contexts." | |
794 | ||
795 | (with-gensyms (i) | |
796 | (unless (typep string 'simple-string) | |
797 | (setf string (make-array (length string) :initial-contents string))) | |
798 | (with-parser-place (place context) | |
799 | `(dotimes (,i ,(length string) (values ,string t | |
800 | ,(plusp (length string)))) | |
801 | (when (or ,(parser-at-eof-p context) | |
802 | (char/= ,(parser-current-char context) | |
803 | (schar ,string ,i))) | |
804 | ,(parser-restore-place context place) | |
805 | (return (values '(,string) nil nil))) | |
806 | ,(parser-step context))))) | |
807 | ||
808 | ;;;-------------------------------------------------------------------------- | |
809 | ;;; Token parser context protocol. | |
810 | ||
811 | (export 'token-parser-context) | |
812 | (defclass token-parser-context () | |
813 | () | |
814 | (:documentation | |
815 | "Base class for parsers which read tokens with associated semantic values. | |
816 | ||
817 | A token, according to the model suggested by this class, has a /type/, | |
818 | which classifies the token and is the main contributer to guiding the | |
819 | parse, and a /value/, which carries additional semantic information. | |
820 | ||
821 | This may seem redundant given Lisp's dynamic type system; but it's not | |
822 | actually capable of drawing sufficiently fine distinctions easily. For | |
823 | example, we can represent a symbol either as a string or a symbol; but | |
824 | using strings conflicts with being able to represent string literals, and | |
825 | using symbols looks ugly and they don't get GCed. Similarly, it'd be | |
826 | convenient to represent punctuation as characters, but that conflicts with | |
827 | using them for character literals. So, we introduce our own notion of | |
828 | token type. | |
829 | ||
830 | Token scanners are expected to signal end-of-file with a token of type | |
831 | `:eof'.")) | |
832 | ||
833 | (export 'parser-token-type) | |
834 | (defgeneric parser-token-type (context) | |
835 | (:documentation | |
836 | "Return the parser's current token type.")) | |
837 | ||
838 | (export 'parser-token-value) | |
839 | (defgeneric parser-token-value (context) | |
840 | (:documentation | |
841 | "Return the parser's current token's semantic value.")) | |
842 | ||
843 | (export 'token) | |
844 | (defparse token (:context (context token-parser-context) | |
845 | type &optional (value nil valuep) &key peekp) | |
846 | "Match tokens of a particular type. | |
847 | ||
848 | A token matches under the following conditions: | |
849 | ||
850 | * If the value of TYPE is `t' then the match succeeds if and only if the | |
c91b90c3 | 851 | parser is not at end-of-file. |
dea4d055 MW |
852 | |
853 | * If the value of TYPE is not `eql' to the token type then the match | |
854 | fails. | |
855 | ||
856 | * If VALUE is specified, and the value of VALUE is not `equal' to the | |
857 | token semantic value then the match fails. | |
858 | ||
859 | * Otherwise the match succeeds. | |
860 | ||
861 | If the match is successful and the parser is not at end-of-file, and the | |
862 | value of PEEKP is nil then the parser advances to the next token; the | |
863 | result of the match is the token's value. | |
864 | ||
865 | If the match fails then the failure indicator is either TYPE or (TYPE | |
866 | VALUE), depending on whether a VALUE was specified." | |
867 | ||
868 | (once-only (type value peekp) | |
869 | (with-gensyms (tokty tokval) | |
870 | `(let ((,tokty ,(parser-token-type context)) | |
871 | (,tokval ,(parser-token-value context))) | |
872 | (if ,(if (eq type t) | |
873 | `(not (eq ,tokty :eof)) | |
874 | (flet ((check-value (cond) | |
875 | (if valuep | |
876 | `(and ,cond (equal ,tokval ,value)) | |
877 | cond))) | |
878 | (if (constantp type) | |
879 | (check-value `(eql ,tokty ,type)) | |
880 | `(if (eq ,type t) | |
881 | (not (eq ,tokty :eof)) | |
882 | ,(check-value `(eql ,tokty ,type)))))) | |
883 | ,(let* ((result `(values ,tokval t ,(if (constantp peekp) | |
884 | (not peekp) | |
885 | `(not ,peekp)))) | |
886 | (step (parser-step context))) | |
887 | (cond ((not (constantp peekp)) | |
888 | `(multiple-value-prog1 ,result | |
889 | (unless ,peekp ,step))) | |
890 | (peekp | |
891 | result) | |
892 | (t | |
893 | `(multiple-value-prog1 ,result | |
894 | ,step)))) | |
895 | (values (list ,(if valuep `(list ,type ,value) type)) | |
896 | nil nil)))))) | |
897 | ||
898 | (defmethod expand-parser-spec ((context token-parser-context) spec) | |
899 | (if (atom spec) | |
900 | (expand-parser-spec context `(token ,spec)) | |
901 | (call-next-method))) | |
902 | ||
903 | (defmethod expand-parser-spec ((context token-parser-context) (spec string)) | |
904 | (expand-parser-spec context `(token :id ,spec))) | |
905 | ||
906 | ;;;----- That's all, folks -------------------------------------------------- |