chiark / gitweb /
Refactoring more or less complete. Maybe I should test it.
[sod] / src / parser / parser-proto.lisp
CommitLineData
dea4d055
MW
1;;; -*-lisp-*-
2;;;
3;;; Protocol for parsing.
4;;;
5;;; (c) 2009 Straylight/Edgeware
6;;;
7
8;;;----- Licensing notice ---------------------------------------------------
9;;;
10;;; This file is part of the Sensble Object Design, an object system for C.
11;;;
12;;; SOD is free software; you can redistribute it and/or modify
13;;; it under the terms of the GNU General Public License as published by
14;;; the Free Software Foundation; either version 2 of the License, or
15;;; (at your option) any later version.
16;;;
17;;; SOD is distributed in the hope that it will be useful,
18;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;;; GNU General Public License for more details.
21;;;
22;;; You should have received a copy of the GNU General Public License
23;;; along with SOD; if not, write to the Free Software Foundation,
24;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26;;;--------------------------------------------------------------------------
27;;; Parser protocol discussion.
28;;;
29;;; Other languages, notably Haskell and ML, have `parser combinator
30;;; libraries', which allow one to construct recursive descent parsers using
31;;; approximately pleasant syntax. While attempts have been made to
32;;; introduce the benefits of these libraries to Lisp, they've not been
33;;; altogether successful; this seems due to Lisp's lack of features such as
34;;; pattern matching, currying and lazy evaluation. Rather than fight with
35;;; Lisp's weaknesses, this library plays to its strength, making heavy use
36;;; of macros. Effectively, the `combinators' we build here are /compile-
37;;; time/ combinators, not run-time ones.
38;;;
39;;; A `parser' is simply an expression which returns three values.
40;;;
41;;; * If the second value is nil, then the parser is said to have /failed/,
42;;; and the first value is a list describing the things that the parser
43;;; expected to find but didn't. (The precise details of the list items
44;;; are important to error-reporting functions, but not to the low-level
45;;; machinery, and are left up to higher-level protocols to nail down
46;;; harder.)
47;;;
48;;; * If the second value is not nil, then the parser is said to have
49;;; /succeeded/, and the first value is its /result/.
50;;;
51;;; * The third value indicates whether the parser consumed any of its
52;;; input. Parsers don't backtrack implicitly (to avoid space leaks and
53;;; bad performance), so the `consumedp' return value is used to decide
54;;; whether the parser has `committed' to a particular branch. If the
55;;; parser context supports place-capture (many do) then `peek' can be
56;;; used to suppress consumption of input in the case of parser failure.
57;;;
58;;; The functions and macros here are simply ways of gluing together
59;;; expressions which obey this protocol.
60;;;
bf090e02 61;;; The main contribution of this file is a macro `with-parser-context' which
dea4d055 62;;; embeds a parsing-specific S-expressions language entered using the new
bf090e02
MW
63;;; macro `parse'. The behaviour of this macro is controlled by a pair of
64;;; compile-time generic functions `expand-parser-spec' and
65;;; `expand-parser-form'. As well as the parser expression they're meant to
66;;; process, these functions dispatch on a `context' argument, which is
67;;; intended to help `leaf' parsers find the terminal symbols which they're
68;;; meant to process.
dea4d055 69;;;
bf090e02
MW
70;;; Note that the context is a compile-time object, constructed by the
71;;; `parse' macro expansion function, though the idea is that it will contain
72;;; the name or names of variables holding the run-time parser state (which
73;;; will typically be a lexical analyser or an input stream or suchlike).
dea4d055
MW
74
75(cl:in-package #:sod-parser)
76
77;;;--------------------------------------------------------------------------
78;;; Utilities.
79
80(defun combine-parser-failures (failures)
81 "Combine the failure indicators listed in FAILURES.
82
83 (Note that this means that FAILURES is a list of lists.)"
84
85 (reduce (lambda (f ff) (union f ff :test #'equal))
86 failures
87 :initial-value nil))
88
89;;;--------------------------------------------------------------------------
90;;; Basic protocol.
91
92(eval-when (:compile-toplevel :load-toplevel :execute)
93
94 (export 'expand-parser-spec)
95 (defgeneric expand-parser-spec (context spec)
96 (:documentation
97 "Expand a parser specifier SPEC in a particular parser CONTEXT.")
98 (:method (context (spec list))
99 (expand-parser-form context (car spec) (cdr spec))))
100
101 (export 'expand-parser-form)
102 (defgeneric expand-parser-form (context head tail)
103 (:documentation
104 "Expand a parser list-form given by HEAD and TAIL, in CONTEXT.")
105 (:method (context head tail)
106 (cons head tail)))
107
108 (export 'wrap-parser)
109 (defgeneric wrap-parser (context form)
110 (:documentation
111 "Enclose FORM in whatever is necessary to make the parser work.")
112 (:method (context form) form)))
113
114(export 'defparse)
115(defmacro defparse (name bvl &body body)
116 "Define a new parser form.
117
118 The full syntax is hairier than it looks:
119
120 defparse NAME ( [[ :context (CTX SPEC) ]] . BVL )
121 { FORM }*
122
123 The macro defines a new parser form (NAME ...) which is expanded by the
124 body FORMs. The BVL is a destructuring lambda-list to be applied to the
125 tail of the form. The body forms are enclosed in a block called NAME.
126
dea4d055
MW
127 If the :context key is provided, then the parser form is specialized on a
128 particular class of parser contexts SPEC; specialized expanders take
129 priority over less specialized or unspecialized expanders -- so you can
130 use this to override the built-in forms safely if they don't seem to be
131 doing the right thing for you. Also, the context -- which is probably
132 interesting to you if you've bothered to specialize -- is bound to the
133 variable CTX."
134
135 ;; BUG! misplaces declarations: if you declare the CONTEXT argument
136 ;; `special' it won't be bound properly. I'm really not at all sure I know
137 ;; how to fix this.
138
139 (with-gensyms (head tail context)
140 (let ((ctxclass t))
141 (loop
142 (unless (and bvl (keywordp (car bvl))) (return))
143 (ecase (pop bvl)
144 (:context (destructuring-bind (name spec) (pop bvl)
145 (setf ctxclass spec context name)))))
146 (multiple-value-bind (doc decls body) (parse-body body)
147 `(defmethod expand-parser-form
148 ((,context ,ctxclass) (,head (eql ',name)) ,tail)
149 ,@doc
150 (block ,name
151 (destructuring-bind ,bvl ,tail
152 ,@decls
153 ,@body)))))))
154
155(export '(with-parser-context parse))
156(defmacro with-parser-context ((class &rest initargs) &body body)
157 "Evaluate BODY with a macro `parse' which expands parser forms.
158
159 Evaluate BODY as an implicit progn. At compile time, a parser context is
160 constructed by (apply #'make-instance CLASS INITARGS). The BODY can make
161 use of the macro `parse':
162
163 parse SPEC
164
165 which parses the input in the manner described by SPEC, in the context of
166 the parser context."
167
168 (let ((context (apply #'make-instance class initargs)))
169 (wrap-parser context
170 `(macrolet ((parse (form)
171 (expand-parser-spec ',context form)))
172 ,@body))))
173
174;;;--------------------------------------------------------------------------
175;;; Common parser context protocol.
176
177(export 'parser-at-eof-p)
178(defgeneric parser-at-eof-p (context)
179 (:documentation
180 "Return whether the parser has reached the end of its input.
181
182 Be careful: all of this is happening at macro expansion time."))
183
184(export 'parser-step)
185(defgeneric parser-step (context)
186 (:documentation
187 "Advance the parser to the next character.
188
189 Be careful: all of this is happening at macro-expansion time."))
190
191(defmethod expand-parser-spec (context (spec (eql :eof)))
192 "Tests succeeds if the parser has reached the end of its input.
193
194 The failure indicator is the keyword `:eof'."
195
196 `(if ,(parser-at-eof-p context)
197 (values :eof t nil)
198 (values '(:eof) nil nil)))
199
200;;;--------------------------------------------------------------------------
201;;; Useful macros for dealing with parsers.
202
203(export 'it)
204(export 'if-parse)
205(defmacro if-parse ((&key (result 'it) expected (consumedp (gensym "CP")))
206 parser consequent &optional (alternative nil altp))
207 "Conditional parsing construction.
208
209 If PARSER succeeds, then evaluate CONSEQUENT with RESULT bound to the
210 result; otherwise evaluate ALTERNATIVE with EXPECTED bound to the
211 expected-item list. If ALTERNATIVE is omitted, then propagate the failure
212 following the parser protocol."
213
214 (with-gensyms (value win)
215 `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser)
216 (declare (ignorable ,consumedp))
217 (if ,win
218 (let ((,result ,value))
219 (declare (ignorable ,result))
220 ,consequent)
221 ,(cond ((not altp)
222 `(values ,value nil ,consumedp))
223 (expected
224 `(let ((,expected ,value)) ,alternative))
225 (t
226 alternative))))))
227
228(export 'when-parse)
229(defmacro when-parse ((&optional (result 'it)) parser &body body)
230 "Convenience macro for conditional parsing.
231
232 If PARSER succeeds then evaluate BODY with RESULT bound to the result;
233 otherwise propagate the failure."
234 `(if-parse (:result ,result) ,parser (progn ,@body)))
235
236(export 'cond-parse)
237(defmacro cond-parse ((&key (result 'it) expected
238 (consumedp (gensym "CP")))
239 &body clauses)
240 "Frightening conditional parsing construct.
241
242 Each of the CLAUSES has the form (PARSER &rest FORMS); the special `fake'
243 parser form `t' may be used to describe a default action. If the PARSER
244 succeeds then evaluate FORMS in order with RESULT bound to the parser
245 result (if there are no forms, then propagate the success); if the PARSER
246 fails without consuming input, then move onto the next clause.
247
248 If the default clause (if any) is reached, or a parser fails after
249 consuming input, then EXPECTED is bound to a list of failure indicators
250 and the default clause's FORMS are evaluated and with CONSUMEDP bound to a
251 generalized boolean indicating whether any input was consumed. If there
252 is no default clause, and either some parser fails after consuming input,
253 or all of the parsers fail without consuming, then a failure is reported
254 and the input-consumption indicator is propagated.
255
256 If a parser fails after consuming input, then the failure indicators are
257 whatever that parser reported; if all the parsers fail without consuming
258 then the failure indicators are the union of the indicators reported by
259 the various parsers."
260
261 (with-gensyms (block fail failarg)
262 (labels ((walk (clauses failures)
263 (cond ((null clauses)
264 (values `(,fail nil (list ,@(reverse failures)))
265 `(values (combine-parser-failures ,failarg)
266 nil
267 ,consumedp)))
268 ((eq (caar clauses) t)
269 (values `(,fail nil (list ,@(reverse failures)))
270 `(,@(if expected
271 `(let ((,expected
272 (combine-parser-failures
273 ,failarg))))
274 `(progn))
275 ,@(cdar clauses))))
276 (t
277 (with-gensyms (value win cp)
278 (multiple-value-bind (inner failbody)
279 (walk (cdr clauses) (cons value failures))
280 (values `(multiple-value-bind (,value ,win ,cp)
281 (parse ,(caar clauses))
282 (when ,win
283 (return-from ,block
284 (let ((,result ,value)
285 (,consumedp ,cp))
286 (declare (ignorable ,result
287 ,consumedp))
288 ,@(cdar clauses))))
289 (when ,cp
290 (,fail t (list ,value)))
291 ,inner)
292 failbody)))))))
293 (multiple-value-bind (inner failbody) (walk clauses nil)
294 `(block ,block
295 (flet ((,fail (,consumedp ,failarg)
296 (declare (ignorable ,consumedp ,failarg))
297 ,failbody))
298 ,inner))))))
299
300(export 'parser)
301(defmacro parser (bvl &body parser)
302 "Functional abstraction for parsers."
303 (multiple-value-bind (doc decls body) (parse-body parser)
304 `(lambda ,bvl ,@doc ,@decls (parse ,@body))))
305
306;;;--------------------------------------------------------------------------
307;;; Standard parser forms.
308
309(export 'label)
310(defparse label (label parser)
311 "If PARSER fails, use LABEL as the expected outcome.
312
313 The LABEL is only evaluated if necessary."
314 (with-gensyms (value win consumedp)
315 `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser)
316 (if ,win
317 (values ,value ,win ,consumedp)
318 (values (list ,label) nil ,consumedp)))))
319
320(defparse t (value)
321 "Succeed, without consuming input, with result VALUE."
322 `(values ,value t nil))
323
bf090e02
MW
324(defparse nil (indicator)
325 "Fail, without consuming input, with indicator VALUE."
326 `(values (list ,indicator) nil nil))
327
dea4d055
MW
328(defparse when (cond &body parser)
329 "If CONDITION is true, then match PARSER; otherwise fail."
330 `(if ,cond (parse ,@parser) (values nil nil nil)))
331
332(defmethod expand-parser-spec (context (spec (eql t)))
333 "Always matches without consuming input."
334 '(values t t nil))
335
bf090e02
MW
336(defmethod expand-parser-spec (context (spec (eql nil)))
337 "Always fails without consuming input. The failure indicator is `:fail'."
338 '(values '(:fail) nil nil))
339
dea4d055
MW
340(export 'seq)
341(defparse seq (binds &body body)
342 "Parse a sequence of heterogeneous items.
343
344 Syntax:
345
346 seq ( { ATOMIC-PARSER-FORM | ([VAR] PARSER-FORM) }* )
347 { FORM }*
348
349 The behaviour is similar to `let*'. The PARSER-FORMs are processed in
350 order, left to right. If a parser succeeds, then its value is bound to
351 the corresponding VAR, and available within Lisp forms enclosed within
352 subsequent PARSER-FORMs and/or the body FORMs. If any parser fails, then
353 the entire sequence fails. If all of the parsers succeeds, then the FORMs
354 are evaluated as an implicit progn, and the sequence will succeed with the
355 result computed by the final FORM."
356
357 (with-gensyms (block consumedp)
358 (labels ((walk (binds lets ignores)
359 (if (endp binds)
360 `(let* ((,consumedp nil)
361 ,@(nreverse lets))
362 ,@(and ignores
363 `((declare (ignore ,@(nreverse ignores)))))
364 (values (progn ,@body) t ,consumedp))
365 (destructuring-bind (x &optional (y nil yp))
366 (if (listp (car binds))
367 (car binds)
368 (list (car binds)))
369 (with-gensyms (var value win cp)
370 (multiple-value-bind (var parser ignores)
371 (if (and yp x)
372 (values x y ignores)
373 (values var (if yp y x) (cons var ignores)))
374 (walk (cdr binds)
375 (cons `(,var (multiple-value-bind
376 (,value ,win ,cp)
377 (parse ,parser)
378 (when ,cp (setf ,consumedp t))
379 (unless ,win
380 (return-from ,block
381 (values ,value ,nil
382 ,consumedp)))
383 ,value))
384 lets)
385 ignores)))))))
386 `(block ,block ,(walk binds nil nil)))))
387
388(export 'and)
389(defparse and (:context (context t) &rest parsers)
390 "Parse a sequence of heterogeneous items, but ignore their values.
391
392 This is just like (and is implemented using) `seq' with all the bindings
393 set to `nil'. The result is `nil'."
394 (with-gensyms (last)
395 (if (null parsers)
396 '(seq () nil)
397 (expand-parser-spec context
398 `(seq (,@(mapcar (lambda (parser)
399 `(nil ,parser))
400 (butlast parsers))
401 (,last ,(car (last parsers))))
402 ,last)))))
403
404(export 'lisp)
405(defparse lisp (&rest forms)
406 "Evaluate FORMs, which should obey the parser protocol."
407 `(progn ,@forms))
408
409(export 'many)
410(defparse many ((acc init update
411 &key (new 'it) (final acc) (min nil minp) max (commitp t))
412 parser &optional (sep nil sepp))
413 "Parse a sequence of homogeneous items.
414
415 The behaviour is similar to `do'. Initially an accumulator ACC is
416 established, and bound to the value of INIT. The PARSER is then evaluated
417 repeatedly. Each time it succeeds, UPDATE is evaluated with NEW (defaults
418 to `it') bound to the result of the parse, and the value returned by
419 UPDATE is stored back into ACC. If the PARSER fails, then the parse
420 ends. The scope of ACC includes the UPDATE and FINAL forms, and the
421 PARSER and SEP parsers; it is updated by side effects, not rebound.
422
423 If a SEP parser is provided, then the behaviour changes as follows.
424 Before each attempt to parse a new item using PARSER, the parser SEP is
425 invoked. If SEP fails then the parse ends; if SEP succeeds, and COMMITP
426 is true, then the PARSER must also succeed or the overall parse will
427 fail. If COMMITP is false then a trailing SEP is permitted and ignored.
428
429 If MAX (which will be evaluated) is not nil, then it must be a number: the
430 parse ends automatically after PARSER has succeeded MAX times. When the
431 parse has ended, if the PARSER succeeded fewer than MIN (which will be
432 evaluated) times then the parse fails. Otherwise, the FINAL form (which
433 defaults to simply returning ACC) is evaluated and its value becomes the
434 result of the parse. MAX defaults to nil -- i.e., no maximum; MIN
435 defaults to 1 if a SEP parser is given, or 0 if not.
436
437 Note that `many' cannot fail if MIN is zero."
438
439 ;; Once upon a time, this was a macro of almost infinite hairiness which
440 ;; tried to do everything itself, including inspecting its arguments for
441 ;; constant-ness to decide whether it could elide bits of code. This
442 ;; became unsustainable. Nowadays, it packages up its parser arguments
443 ;; into functions and calls some primitive functions to do the heavy
444 ;; lifting.
445 ;;
446 ;; The precise protocol between this macro and the backend functions is
447 ;; subject to change: don't rely on it.
448
449 (let* ((accvar (or acc (gensym "ACC-")))
450 (func (if sepp '%many-sep '%many)))
451 `(let ((,accvar ,init))
452 (declare (ignorable ,accvar))
453 (,func (lambda (,new)
454 (declare (ignorable ,new))
455 (setf ,accvar ,update))
048d0b2d 456 (lambda () ,final)
dea4d055
MW
457 (parser () ,parser)
458 ,@(and sepp (list `(parser () ,sep)))
459 ,@(and minp `(:min ,min))
460 ,@(and max `(:max ,max))
461 ,@(and (not (eq commitp t)) `(:commitp ,commitp))))))
462
463(export 'list)
464(defparse list ((&rest keys) parser &optional (sep nil sepp))
3109662a 465 "Like `many', but simply returns a list of the parser results."
dea4d055
MW
466 (with-gensyms (acc)
467 `(parse (many (,acc nil (cons it ,acc) :final (nreverse ,acc) ,@keys)
468 ,parser ,@(and sepp (list sep))))))
469
470(export 'skip-many)
471(defparse skip-many ((&rest keys) parser &optional (sep nil sepp))
3109662a 472 "Like `many', but ignores the results."
dea4d055
MW
473 `(parse (many (nil nil nil ,@keys)
474 ,parser ,@(and sepp (list sep)))))
475
476(export 'or)
477(defparse or (&rest parsers)
478 "Try a number of alternative parsers.
479
480 Each of the PARSERS in turn is tried. If any succeeds, then its result
481 becomes the result of the parse. If any parser fails after consuming
482 input, or if all of the parsers fail, then the overall parse fails, with
483 the union of the expected items from the individual parses."
484
485 (with-gensyms (fail cp failarg)
486 (labels ((walk (parsers failures)
487 (if (null parsers)
488 `(,fail nil (list ,@(reverse failures)))
489 (with-gensyms (value win consumedp)
490 `(multiple-value-bind (,value ,win ,consumedp)
491 (parse ,(car parsers))
492 (cond (,win
493 (values ,value ,win ,consumedp))
494 (,consumedp
495 (,fail t (list ,value)))
496 (t
497 ,(walk (cdr parsers)
498 (cons value failures)))))))))
499 `(flet ((,fail (,cp ,failarg)
500 (values (combine-parser-failures ,failarg) nil ,cp)))
501 ,(walk parsers nil)))))
502
503(export '?)
504(defparse ? (parser &optional (value nil))
505 "Matches PARSER or nothing; fails if PARSER fails after consuming input."
506 `(parse (or ,parser (t ,value))))
507
508;;;--------------------------------------------------------------------------
509;;; Pluggable parsers.
510
511(export 'call-pluggable-parser)
512(defun call-pluggable-parser (symbol &rest args)
513 "Call the pluggable parser denoted by SYMBOL.
514
515 A `pluggable parser' is an indirection point at which a number of
516 alternative parsers can be attached dynamically. The parsers are tried in
517 some arbitrary order, so one should be careful to avoid ambiguities; each
518 is paseed the given ARGS.
519
520 If any parser succeeds then it determines the result; if any parser fails
521 having consumed input then the pluggable parser fails immediately. If all
522 of the parsers fail without consuming input then the pluggable parser
523 fails with the union of the individual failure indicators."
524
525 (let ((expected nil))
526 (dolist (item (get symbol 'parser))
527 (multiple-value-bind (value winp consumedp) (apply (cdr item) args)
528 (when (or winp consumedp)
529 (return-from call-pluggable-parser (values value winp consumedp)))
530 (push value expected)))
531 (values (combine-parser-failures expected) nil nil)))
532
533(export 'plug)
534(defparse plug (symbol &rest args)
535 "Call the pluggable parser denoted by SYMBOL.
536
537 This is just like the function `call-pluggable-parser', but the SYMBOL is
538 not evaluated."
539 `(call-pluggable-parser ',symbol ,@args))
540
541(export 'pluggable-parser-add)
542(defun pluggable-parser-add (symbol tag parser)
543 "Adds an element to a pluggable parser.
544
545 The pluggable parser itself is denoted by SYMBOL; the TAG is any `eql'-
546 comparable object which identifies the element. The PARSER is a parser
547 function; it will be passed arguments via `pluggable-parser'.
548
549 If a parser with the given TAG is already attached to SYMBOL then the new
550 parser replaces the old one; otherwise it is added to the collection."
551
552 (let ((alist (get symbol 'parser)))
553 (aif (assoc tag alist)
554 (setf (cdr it) parser)
555 (setf (get symbol 'parser) (acons tag parser alist)))))
556
557(export 'define-pluggable-parser)
558(defmacro define-pluggable-parser (symbol tag (&rest bvl) &body body)
559 "Adds an element to a pluggable parser.
560
561 The pluggable parser itself is denoted by SYMBOL; the TAG is any `eql'-
562 comparable object which identifies the element. Neither SYMBOL nor TAG is
563 evaluated. The BODY is a parser expression; the BVL is a lambda list
bf090e02 564 describing how to bind the arguments supplied via `pluggable-parser'.
dea4d055
MW
565
566 If a parser with the given TAG is already attached to SYMBOL then the new
567 parser replaces the old one; otherwise it is added to the collection."
568
569 `(pluggable-parser-add ',symbol ',tag (lambda ,bvl ,@body)))
570
571;;;--------------------------------------------------------------------------
572;;; Rewindable parser context protocol.
573
574(eval-when (:compile-toplevel :load-toplevel :execute)
575
576 (export 'parser-capture-place)
577 (defgeneric parser-capture-place (context)
578 (:documentation
579 "Capture the current position of a parser CONTEXT.
580
581 The return value may later be used with `parser-restore-place'. Be
582 careful: all of this is happening at macro-expansion time.")
583 (:method (context)
584 (error "Parser context ~S doesn't support rewinding." context)))
585
586 (export 'parser-restore-place)
587 (defgeneric parser-restore-place (context place)
588 (:documentation
589 "`Rewind' the parser CONTEXT back to the captured PLACE.
590
591 The place was previously captured by `parser-capture-place'. Be careful:
592 all of this is happening at macro-expansion time."))
593
594 (export 'parser-release-place)
595 (defgeneric parser-release-place (context place)
596 (:documentation
597 "Release a PLACE captured from the parser CONTEXT.
598
599 The place was previously captured by `parser-capture-place'. The
600 underlying scanner can use this call to determine whether there are
601 outstanding captured places, and thereby optimize its behaviour. Be
602 careful: all of this is happening at macro-expansion time.")
603 (:method (context place) nil))
604
605 (export 'parser-places-must-be-released-p)
606 (defgeneric parser-places-must-be-released-p (context)
607 (:documentation
608 "Answer whether places captured from the parser CONTEXT need releasing.
609
610 Some contexts -- well, actually, their run-time counterparts -- work
611 better if they can keep track of which places are captured, or at least if
612 there are captured places outstanding. If this function returns true
613 (which is the default) then `with-parser-place' (and hence parser macros
614 such as `peek') will expand to `unwind-protect' forms in order to perform
615 the correct cleanup. If it returns false, then the `unwind-protect' is
616 omitted so that the runtime code does't have to register cleanup
617 handlers.")
618 (:method (context) t)))
619
620(export 'with-parser-place)
621(defmacro with-parser-place ((place context) &body body)
622 "Evaluate BODY surrounded with a binding of PLACE to a captured place.
623
624 The surrounding code will release the PLACE properly on exit from the body
625 forms. This is all happening at macro-expansion time."
626 ;; ... which means that it's a bit hairy. Fortunately, the nested
627 ;; backquotes aren't that bad.
628 (once-only (context)
629 (with-gensyms (bodyfunc)
630 `(with-gensyms (,place)
631 (flet ((,bodyfunc () ,@body))
632 `(let ((,,place ,(parser-capture-place ,context)))
633 ,(if (parser-places-must-be-released-p ,context)
634 `(unwind-protect ,(,bodyfunc)
635 ,(parser-release-place ,context ,place))
636 (,bodyfunc))))))))
637
638(export 'peek)
639(defparse peek (:context (context t) parser)
640 "Attempt to run PARSER, but rewind the underlying source if it fails."
641 (with-gensyms (value win consumedp)
642 (with-parser-place (place context)
643 `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser)
644 (cond (,win
645 (values ,value ,win ,consumedp))
646 (t
647 ,(parser-restore-place context place)
648 (values ,value ,win nil)))))))
649
650;;;--------------------------------------------------------------------------
651;;; Character parser context protocol.
652
653(export 'character-parser-context)
654(defclass character-parser-context ()
655 ()
656 (:documentation
657 "Base class for parsers which read one character at a time."))
658
659(export 'parser-current-char)
660(defgeneric parser-current-char (context)
661 (:documentation
662 "Return the parser's current character.
663
664 It is an error to invoke this operation if the parser is at end-of-file;
665 you must check this first. Be careful: all of this is happening at
666 macro-expansion time."))
667
bf090e02 668(export 'if-char)
dea4d055
MW
669(defparse if-char (:context (context character-parser-context)
670 (&optional (char 'it)) condition consequent alternative)
671 "Basic character-testing parser.
672
673 If there is a current character, bind it to CHAR and evaluate the
bf090e02
MW
674 CONDITION; if that is true, then evaluate CONSEQUENT and step the parser
675 (in that order); otherwise, if either we're at EOF or the CONDITION
676 returns false, evaluate ALTERNATIVE. The result of `if-char' are the
677 values returned by CONSEQUENT or ALTERNATIVE."
dea4d055
MW
678
679 (with-gensyms (block)
680 `(block ,block
681 (unless ,(parser-at-eof-p context)
682 (let ((,char ,(parser-current-char context)))
683 (when ,condition
bf090e02
MW
684 (return-from ,block
685 (multiple-value-prog1 ,consequent
686 ,(parser-step context))))))
dea4d055
MW
687 ,alternative)))
688
689(defmethod expand-parser-spec
690 ((context character-parser-context) (spec (eql :any)))
691 "Matches any character; result is the character.
692
693 The failure indicator is the keyword `:any'."
694 (expand-parser-spec context
695 '(if-char () t
696 (values it t t)
697 (values '(:any) nil nil))))
698
699(export 'char)
700(defparse char (:context (context character-parser-context) char)
701 "Matches the character CHAR (evaluated); result is the character.
702
703 The failure indicator is CHAR."
704
705 (once-only (char)
706 (with-gensyms (it)
707 (expand-parser-spec context
708 `(if-char (,it) (char= ,it ,char)
709 (values ,it t t)
710 (values (list ,char) nil nil))))))
711
712(defmethod expand-parser-spec
713 ((context character-parser-context) (char character))
714 (expand-parser-spec context `(char ,char)))
715
716(export 'satisfies)
717(defparse satisfies (:context (context character-parser-context) predicate)
718 "Matches a character that satisfies the PREDICATE
719
720 The PREDICATE is a function designator. On success, the result is the
721 character. The failure indicator is PREDICATE; you probably want to apply
722 a `label'."
723
724 (with-gensyms (it)
725 (expand-parser-spec context
726 `(if-char (,it) (,predicate ,it)
727 (values ,it t t)
728 (values '(,predicate) nil nil)))))
729
730(export 'not)
731(defparse not (:context (context character-parser-context) char)
732 "Matches any character other than CHAR; result is the character.
733
734 The failure indicator is (not CHAR)."
735
736 (once-only (char)
737 (with-gensyms (it)
738 (expand-parser-spec context
739 `(if-char (,it) (char/= ,it ,char)
740 (values ,it t t)
741 (values `((not ,,char)) nil nil))))))
742
743(export 'filter)
744(defparse filter (:context (context character-parser-context) predicate)
745 "Matches a character that satisfies the PREDICATE; result is the output of
746 PREDICATE.
747
748 The failure indicator is PREDICATE; you probably want to apply a `label'."
749
750 ;; Can't do this one with `if-char'.
751 (with-gensyms (block value)
752 `(block ,block
753 (unless ,(parser-at-eof-p context)
754 (let ((,value (,predicate ,(parser-current-char context))))
755 (when ,value
756 ,(parser-step context)
757 (return-from ,block (values ,value t t)))))
758 (values '(,predicate) nil nil))))
759
760(defmethod expand-parser-spec
761 ((context character-parser-context) (spec (eql :whitespace)))
762 "Matches any sequence of whitespace; result is nil.
763
764 Cannot fail."
765
766 `(progn
767 (cond ((and (not ,(parser-at-eof-p context))
768 (whitespace-char-p ,(parser-current-char context)))
769 (loop
770 ,(parser-step context)
771 (when (or ,(parser-at-eof-p context)
772 (not (whitespace-char-p
773 ,(parser-current-char context))))
774 (return (values nil t t)))))
775 (t
776 (values nil t nil)))))
777
778(defmethod expand-parser-spec
779 ((context character-parser-context) (string string))
780 "Matches the constituent characters of STRING; result is the string.
781
782 The failure indicator is STRING; on failure, the input is rewound, so this
783 only works on rewindable contexts."
784
785 (with-gensyms (i)
786 (unless (typep string 'simple-string)
787 (setf string (make-array (length string) :initial-contents string)))
788 (with-parser-place (place context)
789 `(dotimes (,i ,(length string) (values ,string t
790 ,(plusp (length string))))
791 (when (or ,(parser-at-eof-p context)
792 (char/= ,(parser-current-char context)
793 (schar ,string ,i)))
794 ,(parser-restore-place context place)
795 (return (values '(,string) nil nil)))
796 ,(parser-step context)))))
797
798;;;--------------------------------------------------------------------------
799;;; Token parser context protocol.
800
801(export 'token-parser-context)
802(defclass token-parser-context ()
803 ()
804 (:documentation
805 "Base class for parsers which read tokens with associated semantic values.
806
807 A token, according to the model suggested by this class, has a /type/,
808 which classifies the token and is the main contributer to guiding the
809 parse, and a /value/, which carries additional semantic information.
810
811 This may seem redundant given Lisp's dynamic type system; but it's not
812 actually capable of drawing sufficiently fine distinctions easily. For
813 example, we can represent a symbol either as a string or a symbol; but
814 using strings conflicts with being able to represent string literals, and
815 using symbols looks ugly and they don't get GCed. Similarly, it'd be
816 convenient to represent punctuation as characters, but that conflicts with
817 using them for character literals. So, we introduce our own notion of
818 token type.
819
820 Token scanners are expected to signal end-of-file with a token of type
821 `:eof'."))
822
823(export 'parser-token-type)
824(defgeneric parser-token-type (context)
825 (:documentation
826 "Return the parser's current token type."))
827
828(export 'parser-token-value)
829(defgeneric parser-token-value (context)
830 (:documentation
831 "Return the parser's current token's semantic value."))
832
833(export 'token)
834(defparse token (:context (context token-parser-context)
835 type &optional (value nil valuep) &key peekp)
836 "Match tokens of a particular type.
837
838 A token matches under the following conditions:
839
840 * If the value of TYPE is `t' then the match succeeds if and only if the
c91b90c3 841 parser is not at end-of-file.
dea4d055
MW
842
843 * If the value of TYPE is not `eql' to the token type then the match
844 fails.
845
846 * If VALUE is specified, and the value of VALUE is not `equal' to the
847 token semantic value then the match fails.
848
849 * Otherwise the match succeeds.
850
851 If the match is successful and the parser is not at end-of-file, and the
852 value of PEEKP is nil then the parser advances to the next token; the
853 result of the match is the token's value.
854
855 If the match fails then the failure indicator is either TYPE or (TYPE
856 VALUE), depending on whether a VALUE was specified."
857
858 (once-only (type value peekp)
859 (with-gensyms (tokty tokval)
860 `(let ((,tokty ,(parser-token-type context))
861 (,tokval ,(parser-token-value context)))
862 (if ,(if (eq type t)
863 `(not (eq ,tokty :eof))
864 (flet ((check-value (cond)
865 (if valuep
866 `(and ,cond (equal ,tokval ,value))
867 cond)))
868 (if (constantp type)
869 (check-value `(eql ,tokty ,type))
870 `(if (eq ,type t)
871 (not (eq ,tokty :eof))
872 ,(check-value `(eql ,tokty ,type))))))
873 ,(let* ((result `(values ,tokval t ,(if (constantp peekp)
874 (not peekp)
875 `(not ,peekp))))
876 (step (parser-step context)))
877 (cond ((not (constantp peekp))
878 `(multiple-value-prog1 ,result
879 (unless ,peekp ,step)))
880 (peekp
881 result)
882 (t
883 `(multiple-value-prog1 ,result
884 ,step))))
885 (values (list ,(if valuep `(list ,type ,value) type))
886 nil nil))))))
887
888(defmethod expand-parser-spec ((context token-parser-context) spec)
889 (if (atom spec)
890 (expand-parser-spec context `(token ,spec))
891 (call-next-method)))
892
893(defmethod expand-parser-spec ((context token-parser-context) (spec string))
894 (expand-parser-spec context `(token :id ,spec)))
895
896;;;----- That's all, folks --------------------------------------------------