+;;; -*-lisp-*-
+;;;
+;;; Protocol for parsing.
+;;;
+;;; (c) 2009 Straylight/Edgeware
+;;;
+
+;;;----- Licensing notice ---------------------------------------------------
+;;;
+;;; This file is part of the Sensble Object Design, an object system for C.
+;;;
+;;; SOD is free software; you can redistribute it and/or modify
+;;; it under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 2 of the License, or
+;;; (at your option) any later version.
+;;;
+;;; SOD is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with SOD; if not, write to the Free Software Foundation,
+;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+;;;--------------------------------------------------------------------------
+;;; Parser protocol discussion.
+;;;
+;;; Other languages, notably Haskell and ML, have `parser combinator
+;;; libraries', which allow one to construct recursive descent parsers using
+;;; approximately pleasant syntax. While attempts have been made to
+;;; introduce the benefits of these libraries to Lisp, they've not been
+;;; altogether successful; this seems due to Lisp's lack of features such as
+;;; pattern matching, currying and lazy evaluation. Rather than fight with
+;;; Lisp's weaknesses, this library plays to its strength, making heavy use
+;;; of macros. Effectively, the `combinators' we build here are /compile-
+;;; time/ combinators, not run-time ones.
+;;;
+;;; A `parser' is simply an expression which returns three values.
+;;;
+;;; * If the second value is nil, then the parser is said to have /failed/,
+;;; and the first value is a list describing the things that the parser
+;;; expected to find but didn't. (The precise details of the list items
+;;; are important to error-reporting functions, but not to the low-level
+;;; machinery, and are left up to higher-level protocols to nail down
+;;; harder.)
+;;;
+;;; * If the second value is not nil, then the parser is said to have
+;;; /succeeded/, and the first value is its /result/.
+;;;
+;;; * The third value indicates whether the parser consumed any of its
+;;; input. Parsers don't backtrack implicitly (to avoid space leaks and
+;;; bad performance), so the `consumedp' return value is used to decide
+;;; whether the parser has `committed' to a particular branch. If the
+;;; parser context supports place-capture (many do) then `peek' can be
+;;; used to suppress consumption of input in the case of parser failure.
+;;;
+;;; The functions and macros here are simply ways of gluing together
+;;; expressions which obey this protocol.
+;;;
+;;; The main contribution of this file is a macro WITH-PARSER-CONTEXT which
+;;; embeds a parsing-specific S-expressions language entered using the new
+;;; macro PARSE. The behaviour of this macro is controlled by a pair of
+;;; compile-time generic functions EXPAND-PARSER-SPEC and EXPAND-PARSER-FORM.
+;;; As well as the parser expression they're meant to process, these
+;;; functions dispatch on a `context' argument, which is intended to help
+;;; `leaf' parsers find the terminal symbols which they're meant to process.
+;;;
+;;; Note that the context is a compile-time object, constructed by the PARSE
+;;; macro expansion function, though the idea is that it will contain the
+;;; name or names of variables holding the run-time parser state (which will
+;;; typically be a lexical analyser or an input stream or suchlike).
+
+(cl:in-package #:sod-parser)
+
+;;;--------------------------------------------------------------------------
+;;; Utilities.
+
+(defun combine-parser-failures (failures)
+ "Combine the failure indicators listed in FAILURES.
+
+ (Note that this means that FAILURES is a list of lists.)"
+
+ (reduce (lambda (f ff) (union f ff :test #'equal))
+ failures
+ :initial-value nil))
+
+;;;--------------------------------------------------------------------------
+;;; Basic protocol.
+
+(eval-when (:compile-toplevel :load-toplevel :execute)
+
+ (export 'expand-parser-spec)
+ (defgeneric expand-parser-spec (context spec)
+ (:documentation
+ "Expand a parser specifier SPEC in a particular parser CONTEXT.")
+ (:method (context (spec list))
+ (expand-parser-form context (car spec) (cdr spec))))
+
+ (export 'expand-parser-form)
+ (defgeneric expand-parser-form (context head tail)
+ (:documentation
+ "Expand a parser list-form given by HEAD and TAIL, in CONTEXT.")
+ (:method (context head tail)
+ (cons head tail)))
+
+ (export 'wrap-parser)
+ (defgeneric wrap-parser (context form)
+ (:documentation
+ "Enclose FORM in whatever is necessary to make the parser work.")
+ (:method (context form) form)))
+
+(export 'defparse)
+(defmacro defparse (name bvl &body body)
+ "Define a new parser form.
+
+ The full syntax is hairier than it looks:
+
+ defparse NAME ( [[ :context (CTX SPEC) ]] . BVL )
+ { FORM }*
+
+ The macro defines a new parser form (NAME ...) which is expanded by the
+ body FORMs. The BVL is a destructuring lambda-list to be applied to the
+ tail of the form. The body forms are enclosed in a block called NAME.
+
+ Within the FORMs, a function `expand' is available: it takes a parser
+ specifier as its argument and returns its expansion in the parser's
+ context.
+
+ If the :context key is provided, then the parser form is specialized on a
+ particular class of parser contexts SPEC; specialized expanders take
+ priority over less specialized or unspecialized expanders -- so you can
+ use this to override the built-in forms safely if they don't seem to be
+ doing the right thing for you. Also, the context -- which is probably
+ interesting to you if you've bothered to specialize -- is bound to the
+ variable CTX."
+
+ ;; BUG! misplaces declarations: if you declare the CONTEXT argument
+ ;; `special' it won't be bound properly. I'm really not at all sure I know
+ ;; how to fix this.
+
+ (with-gensyms (head tail context)
+ (let ((ctxclass t))
+ (loop
+ (unless (and bvl (keywordp (car bvl))) (return))
+ (ecase (pop bvl)
+ (:context (destructuring-bind (name spec) (pop bvl)
+ (setf ctxclass spec context name)))))
+ (multiple-value-bind (doc decls body) (parse-body body)
+ `(defmethod expand-parser-form
+ ((,context ,ctxclass) (,head (eql ',name)) ,tail)
+ ,@doc
+ (block ,name
+ (destructuring-bind ,bvl ,tail
+ ,@decls
+ ,@body)))))))
+
+(export '(with-parser-context parse))
+(defmacro with-parser-context ((class &rest initargs) &body body)
+ "Evaluate BODY with a macro `parse' which expands parser forms.
+
+ Evaluate BODY as an implicit progn. At compile time, a parser context is
+ constructed by (apply #'make-instance CLASS INITARGS). The BODY can make
+ use of the macro `parse':
+
+ parse SPEC
+
+ which parses the input in the manner described by SPEC, in the context of
+ the parser context."
+
+ (let ((context (apply #'make-instance class initargs)))
+ (wrap-parser context
+ `(macrolet ((parse (form)
+ (expand-parser-spec ',context form)))
+ ,@body))))
+
+;;;--------------------------------------------------------------------------
+;;; Common parser context protocol.
+
+(export 'parser-at-eof-p)
+(defgeneric parser-at-eof-p (context)
+ (:documentation
+ "Return whether the parser has reached the end of its input.
+
+ Be careful: all of this is happening at macro expansion time."))
+
+(export 'parser-step)
+(defgeneric parser-step (context)
+ (:documentation
+ "Advance the parser to the next character.
+
+ Be careful: all of this is happening at macro-expansion time."))
+
+(defmethod expand-parser-spec (context (spec (eql :eof)))
+ "Tests succeeds if the parser has reached the end of its input.
+
+ The failure indicator is the keyword `:eof'."
+
+ `(if ,(parser-at-eof-p context)
+ (values :eof t nil)
+ (values '(:eof) nil nil)))
+
+;;;--------------------------------------------------------------------------
+;;; Useful macros for dealing with parsers.
+
+(export 'it)
+(export 'if-parse)
+(defmacro if-parse ((&key (result 'it) expected (consumedp (gensym "CP")))
+ parser consequent &optional (alternative nil altp))
+ "Conditional parsing construction.
+
+ If PARSER succeeds, then evaluate CONSEQUENT with RESULT bound to the
+ result; otherwise evaluate ALTERNATIVE with EXPECTED bound to the
+ expected-item list. If ALTERNATIVE is omitted, then propagate the failure
+ following the parser protocol."
+
+ (with-gensyms (value win)
+ `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser)
+ (declare (ignorable ,consumedp))
+ (if ,win
+ (let ((,result ,value))
+ (declare (ignorable ,result))
+ ,consequent)
+ ,(cond ((not altp)
+ `(values ,value nil ,consumedp))
+ (expected
+ `(let ((,expected ,value)) ,alternative))
+ (t
+ alternative))))))
+
+(export 'when-parse)
+(defmacro when-parse ((&optional (result 'it)) parser &body body)
+ "Convenience macro for conditional parsing.
+
+ If PARSER succeeds then evaluate BODY with RESULT bound to the result;
+ otherwise propagate the failure."
+ `(if-parse (:result ,result) ,parser (progn ,@body)))
+
+(export 'cond-parse)
+(defmacro cond-parse ((&key (result 'it) expected
+ (consumedp (gensym "CP")))
+ &body clauses)
+ "Frightening conditional parsing construct.
+
+ Each of the CLAUSES has the form (PARSER &rest FORMS); the special `fake'
+ parser form `t' may be used to describe a default action. If the PARSER
+ succeeds then evaluate FORMS in order with RESULT bound to the parser
+ result (if there are no forms, then propagate the success); if the PARSER
+ fails without consuming input, then move onto the next clause.
+
+ If the default clause (if any) is reached, or a parser fails after
+ consuming input, then EXPECTED is bound to a list of failure indicators
+ and the default clause's FORMS are evaluated and with CONSUMEDP bound to a
+ generalized boolean indicating whether any input was consumed. If there
+ is no default clause, and either some parser fails after consuming input,
+ or all of the parsers fail without consuming, then a failure is reported
+ and the input-consumption indicator is propagated.
+
+ If a parser fails after consuming input, then the failure indicators are
+ whatever that parser reported; if all the parsers fail without consuming
+ then the failure indicators are the union of the indicators reported by
+ the various parsers."
+
+ (with-gensyms (block fail failarg)
+ (labels ((walk (clauses failures)
+ (cond ((null clauses)
+ (values `(,fail nil (list ,@(reverse failures)))
+ `(values (combine-parser-failures ,failarg)
+ nil
+ ,consumedp)))
+ ((eq (caar clauses) t)
+ (values `(,fail nil (list ,@(reverse failures)))
+ `(,@(if expected
+ `(let ((,expected
+ (combine-parser-failures
+ ,failarg))))
+ `(progn))
+ ,@(cdar clauses))))
+ (t
+ (with-gensyms (value win cp)
+ (multiple-value-bind (inner failbody)
+ (walk (cdr clauses) (cons value failures))
+ (values `(multiple-value-bind (,value ,win ,cp)
+ (parse ,(caar clauses))
+ (when ,win
+ (return-from ,block
+ (let ((,result ,value)
+ (,consumedp ,cp))
+ (declare (ignorable ,result
+ ,consumedp))
+ ,@(cdar clauses))))
+ (when ,cp
+ (,fail t (list ,value)))
+ ,inner)
+ failbody)))))))
+ (multiple-value-bind (inner failbody) (walk clauses nil)
+ `(block ,block
+ (flet ((,fail (,consumedp ,failarg)
+ (declare (ignorable ,consumedp ,failarg))
+ ,failbody))
+ ,inner))))))
+
+(export 'parser)
+(defmacro parser (bvl &body parser)
+ "Functional abstraction for parsers."
+ (multiple-value-bind (doc decls body) (parse-body parser)
+ `(lambda ,bvl ,@doc ,@decls (parse ,@body))))
+
+;;;--------------------------------------------------------------------------
+;;; Standard parser forms.
+
+(export 'label)
+(defparse label (label parser)
+ "If PARSER fails, use LABEL as the expected outcome.
+
+ The LABEL is only evaluated if necessary."
+ (with-gensyms (value win consumedp)
+ `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser)
+ (if ,win
+ (values ,value ,win ,consumedp)
+ (values (list ,label) nil ,consumedp)))))
+
+(defparse t (value)
+ "Succeed, without consuming input, with result VALUE."
+ `(values ,value t nil))
+
+(defparse when (cond &body parser)
+ "If CONDITION is true, then match PARSER; otherwise fail."
+ `(if ,cond (parse ,@parser) (values nil nil nil)))
+
+(defmethod expand-parser-spec (context (spec (eql t)))
+ "Always matches without consuming input."
+ '(values t t nil))
+
+(export 'seq)
+(defparse seq (binds &body body)
+ "Parse a sequence of heterogeneous items.
+
+ Syntax:
+
+ seq ( { ATOMIC-PARSER-FORM | ([VAR] PARSER-FORM) }* )
+ { FORM }*
+
+ The behaviour is similar to `let*'. The PARSER-FORMs are processed in
+ order, left to right. If a parser succeeds, then its value is bound to
+ the corresponding VAR, and available within Lisp forms enclosed within
+ subsequent PARSER-FORMs and/or the body FORMs. If any parser fails, then
+ the entire sequence fails. If all of the parsers succeeds, then the FORMs
+ are evaluated as an implicit progn, and the sequence will succeed with the
+ result computed by the final FORM."
+
+ (with-gensyms (block consumedp)
+ (labels ((walk (binds lets ignores)
+ (if (endp binds)
+ `(let* ((,consumedp nil)
+ ,@(nreverse lets))
+ ,@(and ignores
+ `((declare (ignore ,@(nreverse ignores)))))
+ (values (progn ,@body) t ,consumedp))
+ (destructuring-bind (x &optional (y nil yp))
+ (if (listp (car binds))
+ (car binds)
+ (list (car binds)))
+ (with-gensyms (var value win cp)
+ (multiple-value-bind (var parser ignores)
+ (if (and yp x)
+ (values x y ignores)
+ (values var (if yp y x) (cons var ignores)))
+ (walk (cdr binds)
+ (cons `(,var (multiple-value-bind
+ (,value ,win ,cp)
+ (parse ,parser)
+ (when ,cp (setf ,consumedp t))
+ (unless ,win
+ (return-from ,block
+ (values ,value ,nil
+ ,consumedp)))
+ ,value))
+ lets)
+ ignores)))))))
+ `(block ,block ,(walk binds nil nil)))))
+
+(export 'and)
+(defparse and (:context (context t) &rest parsers)
+ "Parse a sequence of heterogeneous items, but ignore their values.
+
+ This is just like (and is implemented using) `seq' with all the bindings
+ set to `nil'. The result is `nil'."
+ (with-gensyms (last)
+ (if (null parsers)
+ '(seq () nil)
+ (expand-parser-spec context
+ `(seq (,@(mapcar (lambda (parser)
+ `(nil ,parser))
+ (butlast parsers))
+ (,last ,(car (last parsers))))
+ ,last)))))
+
+(export 'lisp)
+(defparse lisp (&rest forms)
+ "Evaluate FORMs, which should obey the parser protocol."
+ `(progn ,@forms))
+
+(export 'many)
+(defparse many ((acc init update
+ &key (new 'it) (final acc) (min nil minp) max (commitp t))
+ parser &optional (sep nil sepp))
+ "Parse a sequence of homogeneous items.
+
+ The behaviour is similar to `do'. Initially an accumulator ACC is
+ established, and bound to the value of INIT. The PARSER is then evaluated
+ repeatedly. Each time it succeeds, UPDATE is evaluated with NEW (defaults
+ to `it') bound to the result of the parse, and the value returned by
+ UPDATE is stored back into ACC. If the PARSER fails, then the parse
+ ends. The scope of ACC includes the UPDATE and FINAL forms, and the
+ PARSER and SEP parsers; it is updated by side effects, not rebound.
+
+ If a SEP parser is provided, then the behaviour changes as follows.
+ Before each attempt to parse a new item using PARSER, the parser SEP is
+ invoked. If SEP fails then the parse ends; if SEP succeeds, and COMMITP
+ is true, then the PARSER must also succeed or the overall parse will
+ fail. If COMMITP is false then a trailing SEP is permitted and ignored.
+
+ If MAX (which will be evaluated) is not nil, then it must be a number: the
+ parse ends automatically after PARSER has succeeded MAX times. When the
+ parse has ended, if the PARSER succeeded fewer than MIN (which will be
+ evaluated) times then the parse fails. Otherwise, the FINAL form (which
+ defaults to simply returning ACC) is evaluated and its value becomes the
+ result of the parse. MAX defaults to nil -- i.e., no maximum; MIN
+ defaults to 1 if a SEP parser is given, or 0 if not.
+
+ Note that `many' cannot fail if MIN is zero."
+
+ ;; Once upon a time, this was a macro of almost infinite hairiness which
+ ;; tried to do everything itself, including inspecting its arguments for
+ ;; constant-ness to decide whether it could elide bits of code. This
+ ;; became unsustainable. Nowadays, it packages up its parser arguments
+ ;; into functions and calls some primitive functions to do the heavy
+ ;; lifting.
+ ;;
+ ;; The precise protocol between this macro and the backend functions is
+ ;; subject to change: don't rely on it.
+
+ (let* ((accvar (or acc (gensym "ACC-")))
+ (func (if sepp '%many-sep '%many)))
+ `(let ((,accvar ,init))
+ (declare (ignorable ,accvar))
+ (,func (lambda (,new)
+ (declare (ignorable ,new))
+ (setf ,accvar ,update))
+ (lambda ()
+ ,final)
+ (parser () ,parser)
+ ,@(and sepp (list `(parser () ,sep)))
+ ,@(and minp `(:min ,min))
+ ,@(and max `(:max ,max))
+ ,@(and (not (eq commitp t)) `(:commitp ,commitp))))))
+
+(export 'list)
+(defparse list ((&rest keys) parser &optional (sep nil sepp))
+ "Like MANY, but simply returns a list of the parser results."
+ (with-gensyms (acc)
+ `(parse (many (,acc nil (cons it ,acc) :final (nreverse ,acc) ,@keys)
+ ,parser ,@(and sepp (list sep))))))
+
+(export 'skip-many)
+(defparse skip-many ((&rest keys) parser &optional (sep nil sepp))
+ "Like MANY, but ignores the results."
+ `(parse (many (nil nil nil ,@keys)
+ ,parser ,@(and sepp (list sep)))))
+
+(export 'or)
+(defparse or (&rest parsers)
+ "Try a number of alternative parsers.
+
+ Each of the PARSERS in turn is tried. If any succeeds, then its result
+ becomes the result of the parse. If any parser fails after consuming
+ input, or if all of the parsers fail, then the overall parse fails, with
+ the union of the expected items from the individual parses."
+
+ (with-gensyms (fail cp failarg)
+ (labels ((walk (parsers failures)
+ (if (null parsers)
+ `(,fail nil (list ,@(reverse failures)))
+ (with-gensyms (value win consumedp)
+ `(multiple-value-bind (,value ,win ,consumedp)
+ (parse ,(car parsers))
+ (cond (,win
+ (values ,value ,win ,consumedp))
+ (,consumedp
+ (,fail t (list ,value)))
+ (t
+ ,(walk (cdr parsers)
+ (cons value failures)))))))))
+ `(flet ((,fail (,cp ,failarg)
+ (values (combine-parser-failures ,failarg) nil ,cp)))
+ ,(walk parsers nil)))))
+
+(export '?)
+(defparse ? (parser &optional (value nil))
+ "Matches PARSER or nothing; fails if PARSER fails after consuming input."
+ `(parse (or ,parser (t ,value))))
+
+;;;--------------------------------------------------------------------------
+;;; Pluggable parsers.
+
+(export 'call-pluggable-parser)
+(defun call-pluggable-parser (symbol &rest args)
+ "Call the pluggable parser denoted by SYMBOL.
+
+ A `pluggable parser' is an indirection point at which a number of
+ alternative parsers can be attached dynamically. The parsers are tried in
+ some arbitrary order, so one should be careful to avoid ambiguities; each
+ is paseed the given ARGS.
+
+ If any parser succeeds then it determines the result; if any parser fails
+ having consumed input then the pluggable parser fails immediately. If all
+ of the parsers fail without consuming input then the pluggable parser
+ fails with the union of the individual failure indicators."
+
+ (let ((expected nil))
+ (dolist (item (get symbol 'parser))
+ (multiple-value-bind (value winp consumedp) (apply (cdr item) args)
+ (when (or winp consumedp)
+ (return-from call-pluggable-parser (values value winp consumedp)))
+ (push value expected)))
+ (values (combine-parser-failures expected) nil nil)))
+
+(export 'plug)
+(defparse plug (symbol &rest args)
+ "Call the pluggable parser denoted by SYMBOL.
+
+ This is just like the function `call-pluggable-parser', but the SYMBOL is
+ not evaluated."
+ `(call-pluggable-parser ',symbol ,@args))
+
+(export 'pluggable-parser-add)
+(defun pluggable-parser-add (symbol tag parser)
+ "Adds an element to a pluggable parser.
+
+ The pluggable parser itself is denoted by SYMBOL; the TAG is any `eql'-
+ comparable object which identifies the element. The PARSER is a parser
+ function; it will be passed arguments via `pluggable-parser'.
+
+ If a parser with the given TAG is already attached to SYMBOL then the new
+ parser replaces the old one; otherwise it is added to the collection."
+
+ (let ((alist (get symbol 'parser)))
+ (aif (assoc tag alist)
+ (setf (cdr it) parser)
+ (setf (get symbol 'parser) (acons tag parser alist)))))
+
+(export 'define-pluggable-parser)
+(defmacro define-pluggable-parser (symbol tag (&rest bvl) &body body)
+ "Adds an element to a pluggable parser.
+
+ The pluggable parser itself is denoted by SYMBOL; the TAG is any `eql'-
+ comparable object which identifies the element. Neither SYMBOL nor TAG is
+ evaluated. The BODY is a parser expression; the BVL is a lambda list
+ describing how to bind the argumens supplied via `pluggable-parser'.
+
+ If a parser with the given TAG is already attached to SYMBOL then the new
+ parser replaces the old one; otherwise it is added to the collection."
+
+ `(pluggable-parser-add ',symbol ',tag (lambda ,bvl ,@body)))
+
+;;;--------------------------------------------------------------------------
+;;; Rewindable parser context protocol.
+
+(eval-when (:compile-toplevel :load-toplevel :execute)
+
+ (export 'parser-capture-place)
+ (defgeneric parser-capture-place (context)
+ (:documentation
+ "Capture the current position of a parser CONTEXT.
+
+ The return value may later be used with `parser-restore-place'. Be
+ careful: all of this is happening at macro-expansion time.")
+ (:method (context)
+ (error "Parser context ~S doesn't support rewinding." context)))
+
+ (export 'parser-restore-place)
+ (defgeneric parser-restore-place (context place)
+ (:documentation
+ "`Rewind' the parser CONTEXT back to the captured PLACE.
+
+ The place was previously captured by `parser-capture-place'. Be careful:
+ all of this is happening at macro-expansion time."))
+
+ (export 'parser-release-place)
+ (defgeneric parser-release-place (context place)
+ (:documentation
+ "Release a PLACE captured from the parser CONTEXT.
+
+ The place was previously captured by `parser-capture-place'. The
+ underlying scanner can use this call to determine whether there are
+ outstanding captured places, and thereby optimize its behaviour. Be
+ careful: all of this is happening at macro-expansion time.")
+ (:method (context place) nil))
+
+ (export 'parser-places-must-be-released-p)
+ (defgeneric parser-places-must-be-released-p (context)
+ (:documentation
+ "Answer whether places captured from the parser CONTEXT need releasing.
+
+ Some contexts -- well, actually, their run-time counterparts -- work
+ better if they can keep track of which places are captured, or at least if
+ there are captured places outstanding. If this function returns true
+ (which is the default) then `with-parser-place' (and hence parser macros
+ such as `peek') will expand to `unwind-protect' forms in order to perform
+ the correct cleanup. If it returns false, then the `unwind-protect' is
+ omitted so that the runtime code does't have to register cleanup
+ handlers.")
+ (:method (context) t)))
+
+(export 'with-parser-place)
+(defmacro with-parser-place ((place context) &body body)
+ "Evaluate BODY surrounded with a binding of PLACE to a captured place.
+
+ The surrounding code will release the PLACE properly on exit from the body
+ forms. This is all happening at macro-expansion time."
+ ;; ... which means that it's a bit hairy. Fortunately, the nested
+ ;; backquotes aren't that bad.
+ (once-only (context)
+ (with-gensyms (bodyfunc)
+ `(with-gensyms (,place)
+ (flet ((,bodyfunc () ,@body))
+ `(let ((,,place ,(parser-capture-place ,context)))
+ ,(if (parser-places-must-be-released-p ,context)
+ `(unwind-protect ,(,bodyfunc)
+ ,(parser-release-place ,context ,place))
+ (,bodyfunc))))))))
+
+(export 'peek)
+(defparse peek (:context (context t) parser)
+ "Attempt to run PARSER, but rewind the underlying source if it fails."
+ (with-gensyms (value win consumedp)
+ (with-parser-place (place context)
+ `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser)
+ (cond (,win
+ (values ,value ,win ,consumedp))
+ (t
+ ,(parser-restore-place context place)
+ (values ,value ,win nil)))))))
+
+;;;--------------------------------------------------------------------------
+;;; Character parser context protocol.
+
+(export 'character-parser-context)
+(defclass character-parser-context ()
+ ()
+ (:documentation
+ "Base class for parsers which read one character at a time."))
+
+(export 'parser-current-char)
+(defgeneric parser-current-char (context)
+ (:documentation
+ "Return the parser's current character.
+
+ It is an error to invoke this operation if the parser is at end-of-file;
+ you must check this first. Be careful: all of this is happening at
+ macro-expansion time."))
+
+(defparse if-char (:context (context character-parser-context)
+ (&optional (char 'it)) condition consequent alternative)
+ "Basic character-testing parser.
+
+ If there is a current character, bind it to CHAR and evaluate the
+ CONDITION; if that is true, then step the parser and evaluate CONSEQUENT;
+ otherwise, if either we're at EOF or the CONDITION returns false, evaluate
+ ALTERNATIVE. The result of `if-char' are the values returned by
+ CONSEQUENT or ALTERNATIVE."
+
+ (with-gensyms (block)
+ `(block ,block
+ (unless ,(parser-at-eof-p context)
+ (let ((,char ,(parser-current-char context)))
+ (when ,condition
+ ,(parser-step context)
+ (return-from ,block ,consequent))))
+ ,alternative)))
+
+(defmethod expand-parser-spec
+ ((context character-parser-context) (spec (eql :any)))
+ "Matches any character; result is the character.
+
+ The failure indicator is the keyword `:any'."
+ (expand-parser-spec context
+ '(if-char () t
+ (values it t t)
+ (values '(:any) nil nil))))
+
+(export 'char)
+(defparse char (:context (context character-parser-context) char)
+ "Matches the character CHAR (evaluated); result is the character.
+
+ The failure indicator is CHAR."
+
+ (once-only (char)
+ (with-gensyms (it)
+ (expand-parser-spec context
+ `(if-char (,it) (char= ,it ,char)
+ (values ,it t t)
+ (values (list ,char) nil nil))))))
+
+(defmethod expand-parser-spec
+ ((context character-parser-context) (char character))
+ (expand-parser-spec context `(char ,char)))
+
+(export 'satisfies)
+(defparse satisfies (:context (context character-parser-context) predicate)
+ "Matches a character that satisfies the PREDICATE
+
+ The PREDICATE is a function designator. On success, the result is the
+ character. The failure indicator is PREDICATE; you probably want to apply
+ a `label'."
+
+ (with-gensyms (it)
+ (expand-parser-spec context
+ `(if-char (,it) (,predicate ,it)
+ (values ,it t t)
+ (values '(,predicate) nil nil)))))
+
+(export 'not)
+(defparse not (:context (context character-parser-context) char)
+ "Matches any character other than CHAR; result is the character.
+
+ The failure indicator is (not CHAR)."
+
+ (once-only (char)
+ (with-gensyms (it)
+ (expand-parser-spec context
+ `(if-char (,it) (char/= ,it ,char)
+ (values ,it t t)
+ (values `((not ,,char)) nil nil))))))
+
+(export 'filter)
+(defparse filter (:context (context character-parser-context) predicate)
+ "Matches a character that satisfies the PREDICATE; result is the output of
+ PREDICATE.
+
+ The failure indicator is PREDICATE; you probably want to apply a `label'."
+
+ ;; Can't do this one with `if-char'.
+ (with-gensyms (block value)
+ `(block ,block
+ (unless ,(parser-at-eof-p context)
+ (let ((,value (,predicate ,(parser-current-char context))))
+ (when ,value
+ ,(parser-step context)
+ (return-from ,block (values ,value t t)))))
+ (values '(,predicate) nil nil))))
+
+(defmethod expand-parser-spec
+ ((context character-parser-context) (spec (eql :whitespace)))
+ "Matches any sequence of whitespace; result is nil.
+
+ Cannot fail."
+
+ `(progn
+ (cond ((and (not ,(parser-at-eof-p context))
+ (whitespace-char-p ,(parser-current-char context)))
+ (loop
+ ,(parser-step context)
+ (when (or ,(parser-at-eof-p context)
+ (not (whitespace-char-p
+ ,(parser-current-char context))))
+ (return (values nil t t)))))
+ (t
+ (values nil t nil)))))
+
+(defmethod expand-parser-spec
+ ((context character-parser-context) (string string))
+ "Matches the constituent characters of STRING; result is the string.
+
+ The failure indicator is STRING; on failure, the input is rewound, so this
+ only works on rewindable contexts."
+
+ (with-gensyms (i)
+ (unless (typep string 'simple-string)
+ (setf string (make-array (length string) :initial-contents string)))
+ (with-parser-place (place context)
+ `(dotimes (,i ,(length string) (values ,string t
+ ,(plusp (length string))))
+ (when (or ,(parser-at-eof-p context)
+ (char/= ,(parser-current-char context)
+ (schar ,string ,i)))
+ ,(parser-restore-place context place)
+ (return (values '(,string) nil nil)))
+ ,(parser-step context)))))
+
+;;;--------------------------------------------------------------------------
+;;; Token parser context protocol.
+
+(export 'token-parser-context)
+(defclass token-parser-context ()
+ ()
+ (:documentation
+ "Base class for parsers which read tokens with associated semantic values.
+
+ A token, according to the model suggested by this class, has a /type/,
+ which classifies the token and is the main contributer to guiding the
+ parse, and a /value/, which carries additional semantic information.
+
+ This may seem redundant given Lisp's dynamic type system; but it's not
+ actually capable of drawing sufficiently fine distinctions easily. For
+ example, we can represent a symbol either as a string or a symbol; but
+ using strings conflicts with being able to represent string literals, and
+ using symbols looks ugly and they don't get GCed. Similarly, it'd be
+ convenient to represent punctuation as characters, but that conflicts with
+ using them for character literals. So, we introduce our own notion of
+ token type.
+
+ Token scanners are expected to signal end-of-file with a token of type
+ `:eof'."))
+
+(export 'parser-token-type)
+(defgeneric parser-token-type (context)
+ (:documentation
+ "Return the parser's current token type."))
+
+(export 'parser-token-value)
+(defgeneric parser-token-value (context)
+ (:documentation
+ "Return the parser's current token's semantic value."))
+
+(export 'token)
+(defparse token (:context (context token-parser-context)
+ type &optional (value nil valuep) &key peekp)
+ "Match tokens of a particular type.
+
+ A token matches under the following conditions:
+
+ * If the value of TYPE is `t' then the match succeeds if and only if the
+ parser it not at end-of-file.
+
+ * If the value of TYPE is not `eql' to the token type then the match
+ fails.
+
+ * If VALUE is specified, and the value of VALUE is not `equal' to the
+ token semantic value then the match fails.
+
+ * Otherwise the match succeeds.
+
+ If the match is successful and the parser is not at end-of-file, and the
+ value of PEEKP is nil then the parser advances to the next token; the
+ result of the match is the token's value.
+
+ If the match fails then the failure indicator is either TYPE or (TYPE
+ VALUE), depending on whether a VALUE was specified."
+
+ (once-only (type value peekp)
+ (with-gensyms (tokty tokval)
+ `(let ((,tokty ,(parser-token-type context))
+ (,tokval ,(parser-token-value context)))
+ (if ,(if (eq type t)
+ `(not (eq ,tokty :eof))
+ (flet ((check-value (cond)
+ (if valuep
+ `(and ,cond (equal ,tokval ,value))
+ cond)))
+ (if (constantp type)
+ (check-value `(eql ,tokty ,type))
+ `(if (eq ,type t)
+ (not (eq ,tokty :eof))
+ ,(check-value `(eql ,tokty ,type))))))
+ ,(let* ((result `(values ,tokval t ,(if (constantp peekp)
+ (not peekp)
+ `(not ,peekp))))
+ (step (parser-step context)))
+ (cond ((not (constantp peekp))
+ `(multiple-value-prog1 ,result
+ (unless ,peekp ,step)))
+ (peekp
+ result)
+ (t
+ `(multiple-value-prog1 ,result
+ ,step))))
+ (values (list ,(if valuep `(list ,type ,value) type))
+ nil nil))))))
+
+(defmethod expand-parser-spec ((context token-parser-context) spec)
+ (if (atom spec)
+ (expand-parser-spec context `(token ,spec))
+ (call-next-method)))
+
+(defmethod expand-parser-spec ((context token-parser-context) (spec string))
+ (expand-parser-spec context `(token :id ,spec)))
+
+;;;----- That's all, folks --------------------------------------------------