Commit | Line | Data |
---|---|---|
dea4d055 MW |
1 | ;;; -*-lisp-*- |
2 | ;;; | |
3 | ;;; Protocol for parsing. | |
4 | ;;; | |
5 | ;;; (c) 2009 Straylight/Edgeware | |
6 | ;;; | |
7 | ||
8 | ;;;----- Licensing notice --------------------------------------------------- | |
9 | ;;; | |
10 | ;;; This file is part of the Sensble Object Design, an object system for C. | |
11 | ;;; | |
12 | ;;; SOD is free software; you can redistribute it and/or modify | |
13 | ;;; it under the terms of the GNU General Public License as published by | |
14 | ;;; the Free Software Foundation; either version 2 of the License, or | |
15 | ;;; (at your option) any later version. | |
16 | ;;; | |
17 | ;;; SOD is distributed in the hope that it will be useful, | |
18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;;; GNU General Public License for more details. | |
21 | ;;; | |
22 | ;;; You should have received a copy of the GNU General Public License | |
23 | ;;; along with SOD; if not, write to the Free Software Foundation, | |
24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
25 | ||
26 | ;;;-------------------------------------------------------------------------- | |
27 | ;;; Parser protocol discussion. | |
28 | ;;; | |
29 | ;;; Other languages, notably Haskell and ML, have `parser combinator | |
30 | ;;; libraries', which allow one to construct recursive descent parsers using | |
31 | ;;; approximately pleasant syntax. While attempts have been made to | |
32 | ;;; introduce the benefits of these libraries to Lisp, they've not been | |
33 | ;;; altogether successful; this seems due to Lisp's lack of features such as | |
34 | ;;; pattern matching, currying and lazy evaluation. Rather than fight with | |
35 | ;;; Lisp's weaknesses, this library plays to its strength, making heavy use | |
36 | ;;; of macros. Effectively, the `combinators' we build here are /compile- | |
37 | ;;; time/ combinators, not run-time ones. | |
38 | ;;; | |
39 | ;;; A `parser' is simply an expression which returns three values. | |
40 | ;;; | |
41 | ;;; * If the second value is nil, then the parser is said to have /failed/, | |
42 | ;;; and the first value is a list describing the things that the parser | |
43 | ;;; expected to find but didn't. (The precise details of the list items | |
44 | ;;; are important to error-reporting functions, but not to the low-level | |
45 | ;;; machinery, and are left up to higher-level protocols to nail down | |
46 | ;;; harder.) | |
47 | ;;; | |
48 | ;;; * If the second value is not nil, then the parser is said to have | |
49 | ;;; /succeeded/, and the first value is its /result/. | |
50 | ;;; | |
51 | ;;; * The third value indicates whether the parser consumed any of its | |
52 | ;;; input. Parsers don't backtrack implicitly (to avoid space leaks and | |
53 | ;;; bad performance), so the `consumedp' return value is used to decide | |
54 | ;;; whether the parser has `committed' to a particular branch. If the | |
55 | ;;; parser context supports place-capture (many do) then `peek' can be | |
56 | ;;; used to suppress consumption of input in the case of parser failure. | |
57 | ;;; | |
58 | ;;; The functions and macros here are simply ways of gluing together | |
59 | ;;; expressions which obey this protocol. | |
60 | ;;; | |
bf090e02 | 61 | ;;; The main contribution of this file is a macro `with-parser-context' which |
dea4d055 | 62 | ;;; embeds a parsing-specific S-expressions language entered using the new |
bf090e02 MW |
63 | ;;; macro `parse'. The behaviour of this macro is controlled by a pair of |
64 | ;;; compile-time generic functions `expand-parser-spec' and | |
65 | ;;; `expand-parser-form'. As well as the parser expression they're meant to | |
66 | ;;; process, these functions dispatch on a `context' argument, which is | |
67 | ;;; intended to help `leaf' parsers find the terminal symbols which they're | |
68 | ;;; meant to process. | |
dea4d055 | 69 | ;;; |
bf090e02 MW |
70 | ;;; Note that the context is a compile-time object, constructed by the |
71 | ;;; `parse' macro expansion function, though the idea is that it will contain | |
72 | ;;; the name or names of variables holding the run-time parser state (which | |
73 | ;;; will typically be a lexical analyser or an input stream or suchlike). | |
dea4d055 MW |
74 | |
75 | (cl:in-package #:sod-parser) | |
76 | ||
77 | ;;;-------------------------------------------------------------------------- | |
78 | ;;; Utilities. | |
79 | ||
80 | (defun combine-parser-failures (failures) | |
81 | "Combine the failure indicators listed in FAILURES. | |
82 | ||
83 | (Note that this means that FAILURES is a list of lists.)" | |
84 | ||
85 | (reduce (lambda (f ff) (union f ff :test #'equal)) | |
86 | failures | |
87 | :initial-value nil)) | |
88 | ||
89 | ;;;-------------------------------------------------------------------------- | |
90 | ;;; Basic protocol. | |
91 | ||
92 | (eval-when (:compile-toplevel :load-toplevel :execute) | |
93 | ||
94 | (export 'expand-parser-spec) | |
95 | (defgeneric expand-parser-spec (context spec) | |
96 | (:documentation | |
97 | "Expand a parser specifier SPEC in a particular parser CONTEXT.") | |
98 | (:method (context (spec list)) | |
99 | (expand-parser-form context (car spec) (cdr spec)))) | |
100 | ||
101 | (export 'expand-parser-form) | |
102 | (defgeneric expand-parser-form (context head tail) | |
103 | (:documentation | |
104 | "Expand a parser list-form given by HEAD and TAIL, in CONTEXT.") | |
105 | (:method (context head tail) | |
106 | (cons head tail))) | |
107 | ||
108 | (export 'wrap-parser) | |
109 | (defgeneric wrap-parser (context form) | |
110 | (:documentation | |
111 | "Enclose FORM in whatever is necessary to make the parser work.") | |
112 | (:method (context form) form))) | |
113 | ||
114 | (export 'defparse) | |
115 | (defmacro defparse (name bvl &body body) | |
116 | "Define a new parser form. | |
117 | ||
118 | The full syntax is hairier than it looks: | |
119 | ||
120 | defparse NAME ( [[ :context (CTX SPEC) ]] . BVL ) | |
121 | { FORM }* | |
122 | ||
123 | The macro defines a new parser form (NAME ...) which is expanded by the | |
124 | body FORMs. The BVL is a destructuring lambda-list to be applied to the | |
125 | tail of the form. The body forms are enclosed in a block called NAME. | |
126 | ||
dea4d055 MW |
127 | If the :context key is provided, then the parser form is specialized on a |
128 | particular class of parser contexts SPEC; specialized expanders take | |
129 | priority over less specialized or unspecialized expanders -- so you can | |
130 | use this to override the built-in forms safely if they don't seem to be | |
131 | doing the right thing for you. Also, the context -- which is probably | |
132 | interesting to you if you've bothered to specialize -- is bound to the | |
133 | variable CTX." | |
134 | ||
135 | ;; BUG! misplaces declarations: if you declare the CONTEXT argument | |
136 | ;; `special' it won't be bound properly. I'm really not at all sure I know | |
137 | ;; how to fix this. | |
138 | ||
139 | (with-gensyms (head tail context) | |
140 | (let ((ctxclass t)) | |
141 | (loop | |
142 | (unless (and bvl (keywordp (car bvl))) (return)) | |
143 | (ecase (pop bvl) | |
144 | (:context (destructuring-bind (name spec) (pop bvl) | |
145 | (setf ctxclass spec context name))))) | |
146 | (multiple-value-bind (doc decls body) (parse-body body) | |
147 | `(defmethod expand-parser-form | |
148 | ((,context ,ctxclass) (,head (eql ',name)) ,tail) | |
149 | ,@doc | |
150 | (block ,name | |
151 | (destructuring-bind ,bvl ,tail | |
152 | ,@decls | |
153 | ,@body))))))) | |
154 | ||
155 | (export '(with-parser-context parse)) | |
156 | (defmacro with-parser-context ((class &rest initargs) &body body) | |
157 | "Evaluate BODY with a macro `parse' which expands parser forms. | |
158 | ||
159 | Evaluate BODY as an implicit progn. At compile time, a parser context is | |
160 | constructed by (apply #'make-instance CLASS INITARGS). The BODY can make | |
161 | use of the macro `parse': | |
162 | ||
163 | parse SPEC | |
164 | ||
165 | which parses the input in the manner described by SPEC, in the context of | |
166 | the parser context." | |
167 | ||
168 | (let ((context (apply #'make-instance class initargs))) | |
169 | (wrap-parser context | |
170 | `(macrolet ((parse (form) | |
171 | (expand-parser-spec ',context form))) | |
172 | ,@body)))) | |
173 | ||
174 | ;;;-------------------------------------------------------------------------- | |
175 | ;;; Common parser context protocol. | |
176 | ||
177 | (export 'parser-at-eof-p) | |
178 | (defgeneric parser-at-eof-p (context) | |
179 | (:documentation | |
180 | "Return whether the parser has reached the end of its input. | |
181 | ||
182 | Be careful: all of this is happening at macro expansion time.")) | |
183 | ||
184 | (export 'parser-step) | |
185 | (defgeneric parser-step (context) | |
186 | (:documentation | |
187 | "Advance the parser to the next character. | |
188 | ||
189 | Be careful: all of this is happening at macro-expansion time.")) | |
190 | ||
191 | (defmethod expand-parser-spec (context (spec (eql :eof))) | |
192 | "Tests succeeds if the parser has reached the end of its input. | |
193 | ||
194 | The failure indicator is the keyword `:eof'." | |
195 | ||
196 | `(if ,(parser-at-eof-p context) | |
197 | (values :eof t nil) | |
198 | (values '(:eof) nil nil))) | |
199 | ||
200 | ;;;-------------------------------------------------------------------------- | |
201 | ;;; Useful macros for dealing with parsers. | |
202 | ||
203 | (export 'it) | |
204 | (export 'if-parse) | |
205 | (defmacro if-parse ((&key (result 'it) expected (consumedp (gensym "CP"))) | |
206 | parser consequent &optional (alternative nil altp)) | |
207 | "Conditional parsing construction. | |
208 | ||
209 | If PARSER succeeds, then evaluate CONSEQUENT with RESULT bound to the | |
210 | result; otherwise evaluate ALTERNATIVE with EXPECTED bound to the | |
211 | expected-item list. If ALTERNATIVE is omitted, then propagate the failure | |
212 | following the parser protocol." | |
213 | ||
214 | (with-gensyms (value win) | |
215 | `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser) | |
216 | (declare (ignorable ,consumedp)) | |
217 | (if ,win | |
218 | (let ((,result ,value)) | |
219 | (declare (ignorable ,result)) | |
220 | ,consequent) | |
221 | ,(cond ((not altp) | |
222 | `(values ,value nil ,consumedp)) | |
223 | (expected | |
224 | `(let ((,expected ,value)) ,alternative)) | |
225 | (t | |
226 | alternative)))))) | |
227 | ||
228 | (export 'when-parse) | |
229 | (defmacro when-parse ((&optional (result 'it)) parser &body body) | |
230 | "Convenience macro for conditional parsing. | |
231 | ||
232 | If PARSER succeeds then evaluate BODY with RESULT bound to the result; | |
233 | otherwise propagate the failure." | |
234 | `(if-parse (:result ,result) ,parser (progn ,@body))) | |
235 | ||
236 | (export 'cond-parse) | |
237 | (defmacro cond-parse ((&key (result 'it) expected | |
238 | (consumedp (gensym "CP"))) | |
239 | &body clauses) | |
240 | "Frightening conditional parsing construct. | |
241 | ||
242 | Each of the CLAUSES has the form (PARSER &rest FORMS); the special `fake' | |
243 | parser form `t' may be used to describe a default action. If the PARSER | |
244 | succeeds then evaluate FORMS in order with RESULT bound to the parser | |
245 | result (if there are no forms, then propagate the success); if the PARSER | |
246 | fails without consuming input, then move onto the next clause. | |
247 | ||
248 | If the default clause (if any) is reached, or a parser fails after | |
249 | consuming input, then EXPECTED is bound to a list of failure indicators | |
250 | and the default clause's FORMS are evaluated and with CONSUMEDP bound to a | |
251 | generalized boolean indicating whether any input was consumed. If there | |
252 | is no default clause, and either some parser fails after consuming input, | |
253 | or all of the parsers fail without consuming, then a failure is reported | |
254 | and the input-consumption indicator is propagated. | |
255 | ||
256 | If a parser fails after consuming input, then the failure indicators are | |
257 | whatever that parser reported; if all the parsers fail without consuming | |
258 | then the failure indicators are the union of the indicators reported by | |
259 | the various parsers." | |
260 | ||
261 | (with-gensyms (block fail failarg) | |
262 | (labels ((walk (clauses failures) | |
263 | (cond ((null clauses) | |
264 | (values `(,fail nil (list ,@(reverse failures))) | |
265 | `(values (combine-parser-failures ,failarg) | |
266 | nil | |
267 | ,consumedp))) | |
268 | ((eq (caar clauses) t) | |
269 | (values `(,fail nil (list ,@(reverse failures))) | |
270 | `(,@(if expected | |
271 | `(let ((,expected | |
272 | (combine-parser-failures | |
273 | ,failarg)))) | |
274 | `(progn)) | |
275 | ,@(cdar clauses)))) | |
276 | (t | |
277 | (with-gensyms (value win cp) | |
278 | (multiple-value-bind (inner failbody) | |
279 | (walk (cdr clauses) (cons value failures)) | |
280 | (values `(multiple-value-bind (,value ,win ,cp) | |
281 | (parse ,(caar clauses)) | |
282 | (when ,win | |
283 | (return-from ,block | |
284 | (let ((,result ,value) | |
285 | (,consumedp ,cp)) | |
286 | (declare (ignorable ,result | |
287 | ,consumedp)) | |
288 | ,@(cdar clauses)))) | |
289 | (when ,cp | |
290 | (,fail t (list ,value))) | |
291 | ,inner) | |
292 | failbody))))))) | |
293 | (multiple-value-bind (inner failbody) (walk clauses nil) | |
294 | `(block ,block | |
295 | (flet ((,fail (,consumedp ,failarg) | |
296 | (declare (ignorable ,consumedp ,failarg)) | |
297 | ,failbody)) | |
298 | ,inner)))))) | |
299 | ||
300 | (export 'parser) | |
301 | (defmacro parser (bvl &body parser) | |
302 | "Functional abstraction for parsers." | |
303 | (multiple-value-bind (doc decls body) (parse-body parser) | |
304 | `(lambda ,bvl ,@doc ,@decls (parse ,@body)))) | |
305 | ||
306 | ;;;-------------------------------------------------------------------------- | |
307 | ;;; Standard parser forms. | |
308 | ||
309 | (export 'label) | |
310 | (defparse label (label parser) | |
311 | "If PARSER fails, use LABEL as the expected outcome. | |
312 | ||
313 | The LABEL is only evaluated if necessary." | |
314 | (with-gensyms (value win consumedp) | |
315 | `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser) | |
316 | (if ,win | |
317 | (values ,value ,win ,consumedp) | |
318 | (values (list ,label) nil ,consumedp))))) | |
319 | ||
320 | (defparse t (value) | |
321 | "Succeed, without consuming input, with result VALUE." | |
322 | `(values ,value t nil)) | |
323 | ||
bf090e02 MW |
324 | (defparse nil (indicator) |
325 | "Fail, without consuming input, with indicator VALUE." | |
326 | `(values (list ,indicator) nil nil)) | |
327 | ||
dea4d055 MW |
328 | (defparse when (cond &body parser) |
329 | "If CONDITION is true, then match PARSER; otherwise fail." | |
330 | `(if ,cond (parse ,@parser) (values nil nil nil))) | |
331 | ||
332 | (defmethod expand-parser-spec (context (spec (eql t))) | |
333 | "Always matches without consuming input." | |
334 | '(values t t nil)) | |
335 | ||
bf090e02 MW |
336 | (defmethod expand-parser-spec (context (spec (eql nil))) |
337 | "Always fails without consuming input. The failure indicator is `:fail'." | |
338 | '(values '(:fail) nil nil)) | |
339 | ||
dea4d055 MW |
340 | (export 'seq) |
341 | (defparse seq (binds &body body) | |
342 | "Parse a sequence of heterogeneous items. | |
343 | ||
344 | Syntax: | |
345 | ||
346 | seq ( { ATOMIC-PARSER-FORM | ([VAR] PARSER-FORM) }* ) | |
347 | { FORM }* | |
348 | ||
349 | The behaviour is similar to `let*'. The PARSER-FORMs are processed in | |
350 | order, left to right. If a parser succeeds, then its value is bound to | |
351 | the corresponding VAR, and available within Lisp forms enclosed within | |
352 | subsequent PARSER-FORMs and/or the body FORMs. If any parser fails, then | |
353 | the entire sequence fails. If all of the parsers succeeds, then the FORMs | |
354 | are evaluated as an implicit progn, and the sequence will succeed with the | |
355 | result computed by the final FORM." | |
356 | ||
357 | (with-gensyms (block consumedp) | |
358 | (labels ((walk (binds lets ignores) | |
359 | (if (endp binds) | |
360 | `(let* ((,consumedp nil) | |
361 | ,@(nreverse lets)) | |
362 | ,@(and ignores | |
363 | `((declare (ignore ,@(nreverse ignores))))) | |
364 | (values (progn ,@body) t ,consumedp)) | |
365 | (destructuring-bind (x &optional (y nil yp)) | |
366 | (if (listp (car binds)) | |
367 | (car binds) | |
368 | (list (car binds))) | |
369 | (with-gensyms (var value win cp) | |
370 | (multiple-value-bind (var parser ignores) | |
371 | (if (and yp x) | |
372 | (values x y ignores) | |
373 | (values var (if yp y x) (cons var ignores))) | |
374 | (walk (cdr binds) | |
375 | (cons `(,var (multiple-value-bind | |
376 | (,value ,win ,cp) | |
377 | (parse ,parser) | |
378 | (when ,cp (setf ,consumedp t)) | |
379 | (unless ,win | |
380 | (return-from ,block | |
381 | (values ,value ,nil | |
382 | ,consumedp))) | |
383 | ,value)) | |
384 | lets) | |
385 | ignores))))))) | |
386 | `(block ,block ,(walk binds nil nil))))) | |
387 | ||
388 | (export 'and) | |
389 | (defparse and (:context (context t) &rest parsers) | |
390 | "Parse a sequence of heterogeneous items, but ignore their values. | |
391 | ||
392 | This is just like (and is implemented using) `seq' with all the bindings | |
393 | set to `nil'. The result is `nil'." | |
394 | (with-gensyms (last) | |
395 | (if (null parsers) | |
396 | '(seq () nil) | |
397 | (expand-parser-spec context | |
398 | `(seq (,@(mapcar (lambda (parser) | |
399 | `(nil ,parser)) | |
400 | (butlast parsers)) | |
401 | (,last ,(car (last parsers)))) | |
402 | ,last))))) | |
403 | ||
404 | (export 'lisp) | |
405 | (defparse lisp (&rest forms) | |
406 | "Evaluate FORMs, which should obey the parser protocol." | |
407 | `(progn ,@forms)) | |
408 | ||
409 | (export 'many) | |
410 | (defparse many ((acc init update | |
411 | &key (new 'it) (final acc) (min nil minp) max (commitp t)) | |
412 | parser &optional (sep nil sepp)) | |
413 | "Parse a sequence of homogeneous items. | |
414 | ||
415 | The behaviour is similar to `do'. Initially an accumulator ACC is | |
416 | established, and bound to the value of INIT. The PARSER is then evaluated | |
417 | repeatedly. Each time it succeeds, UPDATE is evaluated with NEW (defaults | |
418 | to `it') bound to the result of the parse, and the value returned by | |
419 | UPDATE is stored back into ACC. If the PARSER fails, then the parse | |
420 | ends. The scope of ACC includes the UPDATE and FINAL forms, and the | |
421 | PARSER and SEP parsers; it is updated by side effects, not rebound. | |
422 | ||
423 | If a SEP parser is provided, then the behaviour changes as follows. | |
424 | Before each attempt to parse a new item using PARSER, the parser SEP is | |
425 | invoked. If SEP fails then the parse ends; if SEP succeeds, and COMMITP | |
426 | is true, then the PARSER must also succeed or the overall parse will | |
427 | fail. If COMMITP is false then a trailing SEP is permitted and ignored. | |
428 | ||
429 | If MAX (which will be evaluated) is not nil, then it must be a number: the | |
430 | parse ends automatically after PARSER has succeeded MAX times. When the | |
431 | parse has ended, if the PARSER succeeded fewer than MIN (which will be | |
432 | evaluated) times then the parse fails. Otherwise, the FINAL form (which | |
433 | defaults to simply returning ACC) is evaluated and its value becomes the | |
434 | result of the parse. MAX defaults to nil -- i.e., no maximum; MIN | |
435 | defaults to 1 if a SEP parser is given, or 0 if not. | |
436 | ||
437 | Note that `many' cannot fail if MIN is zero." | |
438 | ||
439 | ;; Once upon a time, this was a macro of almost infinite hairiness which | |
440 | ;; tried to do everything itself, including inspecting its arguments for | |
441 | ;; constant-ness to decide whether it could elide bits of code. This | |
442 | ;; became unsustainable. Nowadays, it packages up its parser arguments | |
443 | ;; into functions and calls some primitive functions to do the heavy | |
444 | ;; lifting. | |
445 | ;; | |
446 | ;; The precise protocol between this macro and the backend functions is | |
447 | ;; subject to change: don't rely on it. | |
448 | ||
449 | (let* ((accvar (or acc (gensym "ACC-"))) | |
450 | (func (if sepp '%many-sep '%many))) | |
451 | `(let ((,accvar ,init)) | |
452 | (declare (ignorable ,accvar)) | |
453 | (,func (lambda (,new) | |
454 | (declare (ignorable ,new)) | |
455 | (setf ,accvar ,update)) | |
048d0b2d | 456 | (lambda () ,final) |
dea4d055 MW |
457 | (parser () ,parser) |
458 | ,@(and sepp (list `(parser () ,sep))) | |
459 | ,@(and minp `(:min ,min)) | |
460 | ,@(and max `(:max ,max)) | |
461 | ,@(and (not (eq commitp t)) `(:commitp ,commitp)))))) | |
462 | ||
463 | (export 'list) | |
464 | (defparse list ((&rest keys) parser &optional (sep nil sepp)) | |
3109662a | 465 | "Like `many', but simply returns a list of the parser results." |
dea4d055 MW |
466 | (with-gensyms (acc) |
467 | `(parse (many (,acc nil (cons it ,acc) :final (nreverse ,acc) ,@keys) | |
468 | ,parser ,@(and sepp (list sep)))))) | |
469 | ||
470 | (export 'skip-many) | |
471 | (defparse skip-many ((&rest keys) parser &optional (sep nil sepp)) | |
3109662a | 472 | "Like `many', but ignores the results." |
dea4d055 MW |
473 | `(parse (many (nil nil nil ,@keys) |
474 | ,parser ,@(and sepp (list sep))))) | |
475 | ||
476 | (export 'or) | |
477 | (defparse or (&rest parsers) | |
478 | "Try a number of alternative parsers. | |
479 | ||
480 | Each of the PARSERS in turn is tried. If any succeeds, then its result | |
481 | becomes the result of the parse. If any parser fails after consuming | |
482 | input, or if all of the parsers fail, then the overall parse fails, with | |
483 | the union of the expected items from the individual parses." | |
484 | ||
485 | (with-gensyms (fail cp failarg) | |
486 | (labels ((walk (parsers failures) | |
487 | (if (null parsers) | |
488 | `(,fail nil (list ,@(reverse failures))) | |
489 | (with-gensyms (value win consumedp) | |
490 | `(multiple-value-bind (,value ,win ,consumedp) | |
491 | (parse ,(car parsers)) | |
492 | (cond (,win | |
493 | (values ,value ,win ,consumedp)) | |
494 | (,consumedp | |
495 | (,fail t (list ,value))) | |
496 | (t | |
497 | ,(walk (cdr parsers) | |
498 | (cons value failures))))))))) | |
499 | `(flet ((,fail (,cp ,failarg) | |
500 | (values (combine-parser-failures ,failarg) nil ,cp))) | |
501 | ,(walk parsers nil))))) | |
502 | ||
503 | (export '?) | |
504 | (defparse ? (parser &optional (value nil)) | |
505 | "Matches PARSER or nothing; fails if PARSER fails after consuming input." | |
506 | `(parse (or ,parser (t ,value)))) | |
507 | ||
508 | ;;;-------------------------------------------------------------------------- | |
509 | ;;; Pluggable parsers. | |
510 | ||
511 | (export 'call-pluggable-parser) | |
512 | (defun call-pluggable-parser (symbol &rest args) | |
513 | "Call the pluggable parser denoted by SYMBOL. | |
514 | ||
515 | A `pluggable parser' is an indirection point at which a number of | |
516 | alternative parsers can be attached dynamically. The parsers are tried in | |
517 | some arbitrary order, so one should be careful to avoid ambiguities; each | |
518 | is paseed the given ARGS. | |
519 | ||
520 | If any parser succeeds then it determines the result; if any parser fails | |
521 | having consumed input then the pluggable parser fails immediately. If all | |
522 | of the parsers fail without consuming input then the pluggable parser | |
523 | fails with the union of the individual failure indicators." | |
524 | ||
525 | (let ((expected nil)) | |
526 | (dolist (item (get symbol 'parser)) | |
527 | (multiple-value-bind (value winp consumedp) (apply (cdr item) args) | |
528 | (when (or winp consumedp) | |
529 | (return-from call-pluggable-parser (values value winp consumedp))) | |
530 | (push value expected))) | |
531 | (values (combine-parser-failures expected) nil nil))) | |
532 | ||
533 | (export 'plug) | |
534 | (defparse plug (symbol &rest args) | |
535 | "Call the pluggable parser denoted by SYMBOL. | |
536 | ||
537 | This is just like the function `call-pluggable-parser', but the SYMBOL is | |
538 | not evaluated." | |
539 | `(call-pluggable-parser ',symbol ,@args)) | |
540 | ||
541 | (export 'pluggable-parser-add) | |
542 | (defun pluggable-parser-add (symbol tag parser) | |
543 | "Adds an element to a pluggable parser. | |
544 | ||
545 | The pluggable parser itself is denoted by SYMBOL; the TAG is any `eql'- | |
546 | comparable object which identifies the element. The PARSER is a parser | |
547 | function; it will be passed arguments via `pluggable-parser'. | |
548 | ||
549 | If a parser with the given TAG is already attached to SYMBOL then the new | |
550 | parser replaces the old one; otherwise it is added to the collection." | |
551 | ||
552 | (let ((alist (get symbol 'parser))) | |
553 | (aif (assoc tag alist) | |
554 | (setf (cdr it) parser) | |
555 | (setf (get symbol 'parser) (acons tag parser alist))))) | |
556 | ||
557 | (export 'define-pluggable-parser) | |
558 | (defmacro define-pluggable-parser (symbol tag (&rest bvl) &body body) | |
559 | "Adds an element to a pluggable parser. | |
560 | ||
561 | The pluggable parser itself is denoted by SYMBOL; the TAG is any `eql'- | |
562 | comparable object which identifies the element. Neither SYMBOL nor TAG is | |
563 | evaluated. The BODY is a parser expression; the BVL is a lambda list | |
bf090e02 | 564 | describing how to bind the arguments supplied via `pluggable-parser'. |
dea4d055 MW |
565 | |
566 | If a parser with the given TAG is already attached to SYMBOL then the new | |
567 | parser replaces the old one; otherwise it is added to the collection." | |
568 | ||
569 | `(pluggable-parser-add ',symbol ',tag (lambda ,bvl ,@body))) | |
570 | ||
571 | ;;;-------------------------------------------------------------------------- | |
572 | ;;; Rewindable parser context protocol. | |
573 | ||
574 | (eval-when (:compile-toplevel :load-toplevel :execute) | |
575 | ||
576 | (export 'parser-capture-place) | |
577 | (defgeneric parser-capture-place (context) | |
578 | (:documentation | |
579 | "Capture the current position of a parser CONTEXT. | |
580 | ||
581 | The return value may later be used with `parser-restore-place'. Be | |
582 | careful: all of this is happening at macro-expansion time.") | |
583 | (:method (context) | |
584 | (error "Parser context ~S doesn't support rewinding." context))) | |
585 | ||
586 | (export 'parser-restore-place) | |
587 | (defgeneric parser-restore-place (context place) | |
588 | (:documentation | |
589 | "`Rewind' the parser CONTEXT back to the captured PLACE. | |
590 | ||
591 | The place was previously captured by `parser-capture-place'. Be careful: | |
592 | all of this is happening at macro-expansion time.")) | |
593 | ||
594 | (export 'parser-release-place) | |
595 | (defgeneric parser-release-place (context place) | |
596 | (:documentation | |
597 | "Release a PLACE captured from the parser CONTEXT. | |
598 | ||
599 | The place was previously captured by `parser-capture-place'. The | |
600 | underlying scanner can use this call to determine whether there are | |
601 | outstanding captured places, and thereby optimize its behaviour. Be | |
602 | careful: all of this is happening at macro-expansion time.") | |
603 | (:method (context place) nil)) | |
604 | ||
605 | (export 'parser-places-must-be-released-p) | |
606 | (defgeneric parser-places-must-be-released-p (context) | |
607 | (:documentation | |
608 | "Answer whether places captured from the parser CONTEXT need releasing. | |
609 | ||
610 | Some contexts -- well, actually, their run-time counterparts -- work | |
611 | better if they can keep track of which places are captured, or at least if | |
612 | there are captured places outstanding. If this function returns true | |
613 | (which is the default) then `with-parser-place' (and hence parser macros | |
614 | such as `peek') will expand to `unwind-protect' forms in order to perform | |
615 | the correct cleanup. If it returns false, then the `unwind-protect' is | |
616 | omitted so that the runtime code does't have to register cleanup | |
617 | handlers.") | |
618 | (:method (context) t))) | |
619 | ||
620 | (export 'with-parser-place) | |
621 | (defmacro with-parser-place ((place context) &body body) | |
622 | "Evaluate BODY surrounded with a binding of PLACE to a captured place. | |
623 | ||
624 | The surrounding code will release the PLACE properly on exit from the body | |
625 | forms. This is all happening at macro-expansion time." | |
626 | ;; ... which means that it's a bit hairy. Fortunately, the nested | |
627 | ;; backquotes aren't that bad. | |
628 | (once-only (context) | |
629 | (with-gensyms (bodyfunc) | |
630 | `(with-gensyms (,place) | |
631 | (flet ((,bodyfunc () ,@body)) | |
632 | `(let ((,,place ,(parser-capture-place ,context))) | |
633 | ,(if (parser-places-must-be-released-p ,context) | |
634 | `(unwind-protect ,(,bodyfunc) | |
635 | ,(parser-release-place ,context ,place)) | |
636 | (,bodyfunc)))))))) | |
637 | ||
638 | (export 'peek) | |
639 | (defparse peek (:context (context t) parser) | |
640 | "Attempt to run PARSER, but rewind the underlying source if it fails." | |
641 | (with-gensyms (value win consumedp) | |
642 | (with-parser-place (place context) | |
643 | `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser) | |
644 | (cond (,win | |
645 | (values ,value ,win ,consumedp)) | |
646 | (t | |
647 | ,(parser-restore-place context place) | |
648 | (values ,value ,win nil))))))) | |
649 | ||
650 | ;;;-------------------------------------------------------------------------- | |
651 | ;;; Character parser context protocol. | |
652 | ||
653 | (export 'character-parser-context) | |
654 | (defclass character-parser-context () | |
655 | () | |
656 | (:documentation | |
657 | "Base class for parsers which read one character at a time.")) | |
658 | ||
659 | (export 'parser-current-char) | |
660 | (defgeneric parser-current-char (context) | |
661 | (:documentation | |
662 | "Return the parser's current character. | |
663 | ||
664 | It is an error to invoke this operation if the parser is at end-of-file; | |
665 | you must check this first. Be careful: all of this is happening at | |
666 | macro-expansion time.")) | |
667 | ||
bf090e02 | 668 | (export 'if-char) |
dea4d055 MW |
669 | (defparse if-char (:context (context character-parser-context) |
670 | (&optional (char 'it)) condition consequent alternative) | |
671 | "Basic character-testing parser. | |
672 | ||
673 | If there is a current character, bind it to CHAR and evaluate the | |
bf090e02 MW |
674 | CONDITION; if that is true, then evaluate CONSEQUENT and step the parser |
675 | (in that order); otherwise, if either we're at EOF or the CONDITION | |
676 | returns false, evaluate ALTERNATIVE. The result of `if-char' are the | |
677 | values returned by CONSEQUENT or ALTERNATIVE." | |
dea4d055 MW |
678 | |
679 | (with-gensyms (block) | |
680 | `(block ,block | |
681 | (unless ,(parser-at-eof-p context) | |
682 | (let ((,char ,(parser-current-char context))) | |
683 | (when ,condition | |
bf090e02 MW |
684 | (return-from ,block |
685 | (multiple-value-prog1 ,consequent | |
686 | ,(parser-step context)))))) | |
dea4d055 MW |
687 | ,alternative))) |
688 | ||
689 | (defmethod expand-parser-spec | |
690 | ((context character-parser-context) (spec (eql :any))) | |
691 | "Matches any character; result is the character. | |
692 | ||
693 | The failure indicator is the keyword `:any'." | |
694 | (expand-parser-spec context | |
695 | '(if-char () t | |
696 | (values it t t) | |
697 | (values '(:any) nil nil)))) | |
698 | ||
699 | (export 'char) | |
700 | (defparse char (:context (context character-parser-context) char) | |
701 | "Matches the character CHAR (evaluated); result is the character. | |
702 | ||
703 | The failure indicator is CHAR." | |
704 | ||
705 | (once-only (char) | |
706 | (with-gensyms (it) | |
707 | (expand-parser-spec context | |
708 | `(if-char (,it) (char= ,it ,char) | |
709 | (values ,it t t) | |
710 | (values (list ,char) nil nil)))))) | |
711 | ||
712 | (defmethod expand-parser-spec | |
713 | ((context character-parser-context) (char character)) | |
714 | (expand-parser-spec context `(char ,char))) | |
715 | ||
716 | (export 'satisfies) | |
717 | (defparse satisfies (:context (context character-parser-context) predicate) | |
718 | "Matches a character that satisfies the PREDICATE | |
719 | ||
720 | The PREDICATE is a function designator. On success, the result is the | |
721 | character. The failure indicator is PREDICATE; you probably want to apply | |
722 | a `label'." | |
723 | ||
724 | (with-gensyms (it) | |
725 | (expand-parser-spec context | |
726 | `(if-char (,it) (,predicate ,it) | |
727 | (values ,it t t) | |
728 | (values '(,predicate) nil nil))))) | |
729 | ||
730 | (export 'not) | |
731 | (defparse not (:context (context character-parser-context) char) | |
732 | "Matches any character other than CHAR; result is the character. | |
733 | ||
734 | The failure indicator is (not CHAR)." | |
735 | ||
736 | (once-only (char) | |
737 | (with-gensyms (it) | |
738 | (expand-parser-spec context | |
739 | `(if-char (,it) (char/= ,it ,char) | |
740 | (values ,it t t) | |
741 | (values `((not ,,char)) nil nil)))))) | |
742 | ||
743 | (export 'filter) | |
744 | (defparse filter (:context (context character-parser-context) predicate) | |
745 | "Matches a character that satisfies the PREDICATE; result is the output of | |
746 | PREDICATE. | |
747 | ||
748 | The failure indicator is PREDICATE; you probably want to apply a `label'." | |
749 | ||
750 | ;; Can't do this one with `if-char'. | |
751 | (with-gensyms (block value) | |
752 | `(block ,block | |
753 | (unless ,(parser-at-eof-p context) | |
754 | (let ((,value (,predicate ,(parser-current-char context)))) | |
755 | (when ,value | |
756 | ,(parser-step context) | |
757 | (return-from ,block (values ,value t t))))) | |
758 | (values '(,predicate) nil nil)))) | |
759 | ||
760 | (defmethod expand-parser-spec | |
761 | ((context character-parser-context) (spec (eql :whitespace))) | |
762 | "Matches any sequence of whitespace; result is nil. | |
763 | ||
764 | Cannot fail." | |
765 | ||
766 | `(progn | |
767 | (cond ((and (not ,(parser-at-eof-p context)) | |
768 | (whitespace-char-p ,(parser-current-char context))) | |
769 | (loop | |
770 | ,(parser-step context) | |
771 | (when (or ,(parser-at-eof-p context) | |
772 | (not (whitespace-char-p | |
773 | ,(parser-current-char context)))) | |
774 | (return (values nil t t))))) | |
775 | (t | |
776 | (values nil t nil))))) | |
777 | ||
778 | (defmethod expand-parser-spec | |
779 | ((context character-parser-context) (string string)) | |
780 | "Matches the constituent characters of STRING; result is the string. | |
781 | ||
782 | The failure indicator is STRING; on failure, the input is rewound, so this | |
783 | only works on rewindable contexts." | |
784 | ||
785 | (with-gensyms (i) | |
786 | (unless (typep string 'simple-string) | |
787 | (setf string (make-array (length string) :initial-contents string))) | |
788 | (with-parser-place (place context) | |
789 | `(dotimes (,i ,(length string) (values ,string t | |
790 | ,(plusp (length string)))) | |
791 | (when (or ,(parser-at-eof-p context) | |
792 | (char/= ,(parser-current-char context) | |
793 | (schar ,string ,i))) | |
794 | ,(parser-restore-place context place) | |
795 | (return (values '(,string) nil nil))) | |
796 | ,(parser-step context))))) | |
797 | ||
798 | ;;;-------------------------------------------------------------------------- | |
799 | ;;; Token parser context protocol. | |
800 | ||
801 | (export 'token-parser-context) | |
802 | (defclass token-parser-context () | |
803 | () | |
804 | (:documentation | |
805 | "Base class for parsers which read tokens with associated semantic values. | |
806 | ||
807 | A token, according to the model suggested by this class, has a /type/, | |
808 | which classifies the token and is the main contributer to guiding the | |
809 | parse, and a /value/, which carries additional semantic information. | |
810 | ||
811 | This may seem redundant given Lisp's dynamic type system; but it's not | |
812 | actually capable of drawing sufficiently fine distinctions easily. For | |
813 | example, we can represent a symbol either as a string or a symbol; but | |
814 | using strings conflicts with being able to represent string literals, and | |
815 | using symbols looks ugly and they don't get GCed. Similarly, it'd be | |
816 | convenient to represent punctuation as characters, but that conflicts with | |
817 | using them for character literals. So, we introduce our own notion of | |
818 | token type. | |
819 | ||
820 | Token scanners are expected to signal end-of-file with a token of type | |
821 | `:eof'.")) | |
822 | ||
823 | (export 'parser-token-type) | |
824 | (defgeneric parser-token-type (context) | |
825 | (:documentation | |
826 | "Return the parser's current token type.")) | |
827 | ||
828 | (export 'parser-token-value) | |
829 | (defgeneric parser-token-value (context) | |
830 | (:documentation | |
831 | "Return the parser's current token's semantic value.")) | |
832 | ||
833 | (export 'token) | |
834 | (defparse token (:context (context token-parser-context) | |
835 | type &optional (value nil valuep) &key peekp) | |
836 | "Match tokens of a particular type. | |
837 | ||
838 | A token matches under the following conditions: | |
839 | ||
840 | * If the value of TYPE is `t' then the match succeeds if and only if the | |
c91b90c3 | 841 | parser is not at end-of-file. |
dea4d055 MW |
842 | |
843 | * If the value of TYPE is not `eql' to the token type then the match | |
844 | fails. | |
845 | ||
846 | * If VALUE is specified, and the value of VALUE is not `equal' to the | |
847 | token semantic value then the match fails. | |
848 | ||
849 | * Otherwise the match succeeds. | |
850 | ||
851 | If the match is successful and the parser is not at end-of-file, and the | |
852 | value of PEEKP is nil then the parser advances to the next token; the | |
853 | result of the match is the token's value. | |
854 | ||
855 | If the match fails then the failure indicator is either TYPE or (TYPE | |
856 | VALUE), depending on whether a VALUE was specified." | |
857 | ||
858 | (once-only (type value peekp) | |
859 | (with-gensyms (tokty tokval) | |
860 | `(let ((,tokty ,(parser-token-type context)) | |
861 | (,tokval ,(parser-token-value context))) | |
862 | (if ,(if (eq type t) | |
863 | `(not (eq ,tokty :eof)) | |
864 | (flet ((check-value (cond) | |
865 | (if valuep | |
866 | `(and ,cond (equal ,tokval ,value)) | |
867 | cond))) | |
868 | (if (constantp type) | |
869 | (check-value `(eql ,tokty ,type)) | |
870 | `(if (eq ,type t) | |
871 | (not (eq ,tokty :eof)) | |
872 | ,(check-value `(eql ,tokty ,type)))))) | |
873 | ,(let* ((result `(values ,tokval t ,(if (constantp peekp) | |
874 | (not peekp) | |
875 | `(not ,peekp)))) | |
876 | (step (parser-step context))) | |
877 | (cond ((not (constantp peekp)) | |
878 | `(multiple-value-prog1 ,result | |
879 | (unless ,peekp ,step))) | |
880 | (peekp | |
881 | result) | |
882 | (t | |
883 | `(multiple-value-prog1 ,result | |
884 | ,step)))) | |
885 | (values (list ,(if valuep `(list ,type ,value) type)) | |
886 | nil nil)))))) | |
887 | ||
888 | (defmethod expand-parser-spec ((context token-parser-context) spec) | |
889 | (if (atom spec) | |
890 | (expand-parser-spec context `(token ,spec)) | |
891 | (call-next-method))) | |
892 | ||
893 | (defmethod expand-parser-spec ((context token-parser-context) (spec string)) | |
894 | (expand-parser-spec context `(token :id ,spec))) | |
895 | ||
896 | ;;;----- That's all, folks -------------------------------------------------- |