;;; -*-lisp-*- ;;; ;;; Protocol for parsing. ;;; ;;; (c) 2009 Straylight/Edgeware ;;; ;;;----- Licensing notice --------------------------------------------------- ;;; ;;; This file is part of the Sensible Object Design, an object system for C. ;;; ;;; SOD is free software; you can redistribute it and/or modify ;;; it under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 2 of the License, or ;;; (at your option) any later version. ;;; ;;; SOD is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with SOD; if not, write to the Free Software Foundation, ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ;;;-------------------------------------------------------------------------- ;;; Parser protocol discussion. ;;; ;;; Other languages, notably Haskell and ML, have `parser combinator ;;; libraries', which allow one to construct recursive descent parsers using ;;; approximately pleasant syntax. While attempts have been made to ;;; introduce the benefits of these libraries to Lisp, they've not been ;;; altogether successful; this seems due to Lisp's lack of features such as ;;; pattern matching, currying and lazy evaluation. Rather than fight with ;;; Lisp's weaknesses, this library plays to its strength, making heavy use ;;; of macros. Effectively, the `combinators' we build here are /compile- ;;; time/ combinators, not run-time ones. ;;; ;;; A `parser' is simply an expression which returns three values. ;;; ;;; * If the second value is nil, then the parser is said to have /failed/, ;;; and the first value is a list describing the things that the parser ;;; expected to find but didn't. (The precise details of the list items ;;; are important to error-reporting functions, but not to the low-level ;;; machinery, and are left up to higher-level protocols to nail down ;;; harder.) ;;; ;;; * If the second value is not nil, then the parser is said to have ;;; /succeeded/, and the first value is its /result/. ;;; ;;; * The third value indicates whether the parser consumed any of its ;;; input. Parsers don't backtrack implicitly (to avoid space leaks and ;;; bad performance), so the `consumedp' return value is used to decide ;;; whether the parser has `committed' to a particular branch. If the ;;; parser context supports place-capture (many do) then `peek' can be ;;; used to suppress consumption of input in the case of parser failure. ;;; ;;; The functions and macros here are simply ways of gluing together ;;; expressions which obey this protocol. ;;; ;;; The main contribution of this file is a macro `with-parser-context' which ;;; embeds a parsing-specific S-expressions language entered using the new ;;; macro `parse'. The behaviour of this macro is controlled by a pair of ;;; compile-time generic functions `expand-parser-spec' and ;;; `expand-parser-form'. As well as the parser expression they're meant to ;;; process, these functions dispatch on a `context' argument, which is ;;; intended to help `leaf' parsers find the terminal symbols which they're ;;; meant to process. ;;; ;;; Note that the context is a compile-time object, constructed by the ;;; `parse' macro expansion function, though the idea is that it will contain ;;; the name or names of variables holding the run-time parser state (which ;;; will typically be a lexical analyser or an input stream or suchlike). (cl:in-package #:sod-parser) ;;;-------------------------------------------------------------------------- ;;; Utilities. (export 'combine-parser-failures) (defun combine-parser-failures (failures) "Combine the failure indicators listed in FAILURES. (Note that this means that FAILURES is a list of lists.)" (reduce (lambda (f ff) (union f ff :test #'equal)) failures :initial-value nil)) (export 'parse-empty) (defun parse-empty (&optional value) "Return a parser which parses nothing, successfully. The parser returns VALUE and consumes nothing." (lambda () (values value t nil))) (export 'parse-fail) (defun parse-fail (indicator &optional consumedp) "Return a parser which fails. The parser reports the INDICATOR and (falsely) claims to have consumed input if CONSUMEDP is true." (lambda () (values indicator nil consumedp))) ;;;-------------------------------------------------------------------------- ;;; Basic protocol. (eval-when (:compile-toplevel :load-toplevel :execute) (export 'expand-parser-spec) (defgeneric expand-parser-spec (context spec) (:documentation "Expand a parser specifier SPEC in a particular parser CONTEXT.") (:method (context (spec list)) (expand-parser-form context (car spec) (cdr spec)))) (export 'expand-parser-form) (defgeneric expand-parser-form (context head tail) (:documentation "Expand a parser list-form given by HEAD and TAIL, in CONTEXT.") (:method (context head tail) (declare (ignore context)) (cons head tail))) (export 'wrap-parser) (defgeneric wrap-parser (context form) (:documentation "Enclose FORM in whatever is necessary to make the parser work.") (:method (context form) (declare (ignore context)) form))) (export 'defparse) (defmacro defparse (name bvl &body body) "Define a new parser form. The full syntax is hairier than it looks: defparse NAME ( [[ :context (CTX SPEC) ]] . BVL ) { FORM }* The macro defines a new parser form (NAME ...) which is expanded by the body FORMs. The BVL is a destructuring lambda-list to be applied to the tail of the form. The body forms are enclosed in a block called NAME. If the :context key is provided, then the parser form is specialized on a particular class of parser contexts SPEC; specialized expanders take priority over less specialized or unspecialized expanders -- so you can use this to override the built-in forms safely if they don't seem to be doing the right thing for you. Also, the context -- which is probably interesting to you if you've bothered to specialize -- is bound to the variable CTX." ;; BUG! misplaces declarations: if you declare the CONTEXT argument ;; `special' it won't be bound properly. I'm really not at all sure I know ;; how to fix this. (with-gensyms (head tail context) (let ((ctxclass t)) (loop (unless (and bvl (keywordp (car bvl))) (return)) (ecase (pop bvl) (:context (destructuring-bind (name spec) (pop bvl) (setf ctxclass spec context name))))) (multiple-value-bind (doc decls body) (parse-body body) `(defmethod expand-parser-form ((,context ,ctxclass) (,head (eql ',name)) ,tail) ,@doc (declare (ignorable ,context)) (destructuring-bind ,bvl ,tail ,@decls (block ,name ,@body))))))) (export '(with-parser-context parse)) (defmacro with-parser-context ((class &rest initargs) &body body) "Evaluate BODY with a macro `parse' which expands parser forms. Evaluate BODY as an implicit progn. At compile time, a parser context is constructed by (apply #'make-instance CLASS INITARGS). The BODY can make use of the macro `parse': parse SPEC which parses the input in the manner described by SPEC, in the context of the parser context." (let ((context (apply #'make-instance class initargs))) (wrap-parser context `(macrolet ((parse (form) (expand-parser-spec ',context form))) ,@body)))) ;;;-------------------------------------------------------------------------- ;;; Common parser context protocol. (export 'parser-at-eof-p) (defgeneric parser-at-eof-p (context) (:documentation "Return whether the parser has reached the end of its input. Be careful: all of this is happening at macro expansion time.")) (export 'parser-step) (defgeneric parser-step (context) (:documentation "Advance the parser to the next character. Be careful: all of this is happening at macro-expansion time.")) (defmethod expand-parser-spec (context (spec (eql :eof))) "Tests succeeds if the parser has reached the end of its input. The failure indicator is the keyword `:eof'." `(if ,(parser-at-eof-p context) (values :eof t nil) (values '(:eof) nil nil))) ;;;-------------------------------------------------------------------------- ;;; Useful macros for dealing with parsers. (export 'it) (export 'if-parse) (defmacro if-parse ((&key (result 'it) expected (consumedp (gensym "CP"))) parser consequent &optional (alternative nil altp)) "Conditional parsing construction. If PARSER succeeds, then evaluate CONSEQUENT with RESULT bound to the result; otherwise evaluate ALTERNATIVE with EXPECTED bound to the expected-item list. If ALTERNATIVE is omitted, then propagate the failure following the parser protocol." (with-gensyms (value win) `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser) (declare (ignorable ,consumedp)) (if ,win (let ((,result ,value)) (declare (ignorable ,result)) ,consequent) ,(cond ((not altp) `(values ,value nil ,consumedp)) (expected `(let ((,expected ,value)) ,alternative)) (t alternative)))))) (export 'when-parse) (defmacro when-parse ((&optional (result 'it)) parser &body body) "Convenience macro for conditional parsing. If PARSER succeeds then evaluate BODY with RESULT bound to the result; otherwise propagate the failure." `(if-parse (:result ,result) ,parser (progn ,@body))) (export 'cond-parse) (defmacro cond-parse ((&key (result 'it) expected (consumedp (gensym "CP"))) &body clauses) "Frightening conditional parsing construct. Each of the CLAUSES has the form (PARSER &rest FORMS); the special `fake' parser form `t' may be used to describe a default action. If the PARSER succeeds then evaluate FORMS in order with RESULT bound to the parser result (if there are no forms, then propagate the success); if the PARSER fails without consuming input, then move onto the next clause. If the default clause (if any) is reached, or a parser fails after consuming input, then EXPECTED is bound to a list of failure indicators and the default clause's FORMS are evaluated and with CONSUMEDP bound to a generalized boolean indicating whether any input was consumed. If there is no default clause, and either some parser fails after consuming input, or all of the parsers fail without consuming, then a failure is reported and the input-consumption indicator is propagated. If a parser fails after consuming input, then the failure indicators are whatever that parser reported; if all the parsers fail without consuming then the failure indicators are the union of the indicators reported by the various parsers." (with-gensyms (block fail failarg) (labels ((walk (clauses failures) (cond ((null clauses) (values `(,fail nil (list ,@(reverse failures))) `(values (combine-parser-failures ,failarg) nil ,consumedp))) ((eq (caar clauses) t) (values `(,fail nil (list ,@(reverse failures))) `(,@(if expected `(let ((,expected (combine-parser-failures ,failarg)))) `(progn)) ,@(cdar clauses)))) (t (with-gensyms (value win cp) (multiple-value-bind (inner failbody) (walk (cdr clauses) (cons value failures)) (values `(multiple-value-bind (,value ,win ,cp) (parse ,(caar clauses)) (when ,win (return-from ,block (let ((,result ,value) (,consumedp ,cp)) (declare (ignorable ,result ,consumedp)) ,@(cdar clauses)))) (when ,cp (,fail t (list ,value))) ,inner) failbody))))))) (multiple-value-bind (inner failbody) (walk clauses nil) `(block ,block (flet ((,fail (,consumedp ,failarg) (declare (ignorable ,consumedp ,failarg)) ,failbody)) ,inner)))))) (export 'parser) (defmacro parser (bvl &body parser) "Functional abstraction for parsers." (multiple-value-bind (doc decls body) (parse-body parser) `(lambda ,bvl ,@doc ,@decls (parse ,@body)))) ;;;-------------------------------------------------------------------------- ;;; Standard parser forms. (export 'label) (defparse label (label parser) "If PARSER fails, use LABEL as the expected outcome. The LABEL is only evaluated if necessary." (with-gensyms (value win consumedp) `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser) (if ,win (values ,value ,win ,consumedp) (values (list ,label) nil ,consumedp))))) (defparse t (value) "Succeed, without consuming input, with result VALUE." `(values ,value t nil)) (defparse nil (indicator) "Fail, without consuming input, with indicator VALUE." `(values (list ,indicator) nil nil)) (defparse when (cond &body parser) "If CONDITION is true, then match PARSER; otherwise fail." `(if ,cond (parse ,@parser) (values nil nil nil))) (defmethod expand-parser-spec (context (spec (eql t))) "Always matches without consuming input." (declare (ignore context)) '(values t t nil)) (defmethod expand-parser-spec (context (spec (eql nil))) "Always fails without consuming input. The failure indicator is `:fail'." (declare (ignore context)) '(values '(:fail) nil nil)) (export 'seq) (defparse seq (binds &body body) "Parse a sequence of heterogeneous items. Syntax: seq ( { ATOMIC-PARSER-FORM | ([VAR] PARSER-FORM) }* ) { FORM }* The behaviour is similar to `let*'. The PARSER-FORMs are processed in order, left to right. If a parser succeeds, then its value is bound to the corresponding VAR, and available within Lisp forms enclosed within subsequent PARSER-FORMs and/or the body FORMs. If any parser fails, then the entire sequence fails. If all of the parsers succeeds, then the FORMs are evaluated as an implicit progn, and the sequence will succeed with the result computed by the final FORM." (with-gensyms (block consumedp) (labels ((walk (binds lets ignores) (if (endp binds) `(let* ((,consumedp nil) ,@(nreverse lets)) ,@(and ignores `((declare (ignore ,@(nreverse ignores))))) (values (progn ,@body) t ,consumedp)) (destructuring-bind (x &optional (y nil yp)) (if (listp (car binds)) (car binds) (list (car binds))) (with-gensyms (var value win cp) (multiple-value-bind (var parser ignores) (if (and yp x) (values x y ignores) (values var (if yp y x) (cons var ignores))) (walk (cdr binds) (cons `(,var (multiple-value-bind (,value ,win ,cp) (parse ,parser) (when ,cp (setf ,consumedp t)) (unless ,win (return-from ,block (values ,value ,nil ,consumedp))) ,value)) lets) ignores))))))) `(block ,block ,(walk binds nil nil))))) (export 'and) (defparse and (:context (context t) &rest parsers) "Parse a sequence of heterogeneous items, but ignore their values. This is just like (and is implemented using) `seq' with all the bindings set to `nil'. The result is `nil'." (with-gensyms (last) (if (null parsers) '(seq () nil) (expand-parser-spec context `(seq (,@(mapcar (lambda (parser) `(nil ,parser)) (butlast parsers)) (,last ,(car (last parsers)))) ,last))))) (export 'lisp) (defparse lisp (&rest forms) "Evaluate FORMs, which should obey the parser protocol." `(progn ,@forms)) (export 'many) (defparse many ((acc init update &key (new 'it) (final acc) (min nil minp) max (commitp t)) parser &optional (sep nil sepp)) "Parse a sequence of homogeneous items. The behaviour is similar to `do'. Initially an accumulator ACC is established, and bound to the value of INIT. The PARSER is then evaluated repeatedly. Each time it succeeds, UPDATE is evaluated with NEW (defaults to `it') bound to the result of the parse, and the value returned by UPDATE is stored back into ACC. If the PARSER fails, then the parse ends. The scope of ACC includes the UPDATE and FINAL forms, and the PARSER and SEP parsers; it is updated by side effects, not rebound. If a SEP parser is provided, then the behaviour changes as follows. Before each attempt to parse a new item using PARSER, the parser SEP is invoked. If SEP fails then the parse ends; if SEP succeeds, and COMMITP is true, then the PARSER must also succeed or the overall parse will fail. If COMMITP is false then a trailing SEP is permitted and ignored. If MAX (which will be evaluated) is not nil, then it must be a number: the parse ends automatically after PARSER has succeeded MAX times. When the parse has ended, if the PARSER succeeded fewer than MIN (which will be evaluated) times then the parse fails. Otherwise, the FINAL form (which defaults to simply returning ACC) is evaluated and its value becomes the result of the parse. MAX defaults to nil -- i.e., no maximum; MIN defaults to 1 if a SEP parser is given, or 0 if not. Note that `many' cannot fail if MIN is zero." ;; Once upon a time, this was a macro of almost infinite hairiness which ;; tried to do everything itself, including inspecting its arguments for ;; constant-ness to decide whether it could elide bits of code. This ;; became unsustainable. Nowadays, it packages up its parser arguments ;; into functions and calls some primitive functions to do the heavy ;; lifting. ;; ;; The precise protocol between this macro and the backend functions is ;; subject to change: don't rely on it. (let* ((accvar (or acc (gensym "ACC-"))) (func (if sepp '%many-sep '%many))) `(let ((,accvar ,init)) (declare (ignorable ,accvar)) (,func (lambda (,new) (declare (ignorable ,new)) (setf ,accvar ,update)) (lambda () ,final) (parser () ,parser) ,@(and sepp (list `(parser () ,sep))) ,@(and minp `(:min ,min)) ,@(and max `(:max ,max)) ,@(and (not (eq commitp t)) `(:commitp ,commitp)))))) (export 'list) (defparse list ((&rest keys) parser &optional (sep nil sepp)) "Like `many', but simply returns a list of the parser results." (with-gensyms (acc) `(parse (many (,acc nil (cons it ,acc) :final (nreverse ,acc) ,@keys) ,parser ,@(and sepp (list sep)))))) (export 'skip-many) (defparse skip-many ((&rest keys) parser &optional (sep nil sepp)) "Like `many', but ignores the results." `(parse (many (nil nil nil ,@keys) ,parser ,@(and sepp (list sep))))) (export 'or) (defparse or (&rest parsers) "Try a number of alternative parsers. Each of the PARSERS in turn is tried. If any succeeds, then its result becomes the result of the parse. If any parser fails after consuming input, or if all of the parsers fail, then the overall parse fails, with the union of the expected items from the individual parses." (with-gensyms (fail cp failarg) (labels ((walk (parsers failures) (if (null parsers) `(,fail nil (list ,@(reverse failures))) (with-gensyms (value win consumedp) `(multiple-value-bind (,value ,win ,consumedp) (parse ,(car parsers)) (cond (,win (values ,value ,win ,consumedp)) (,consumedp (,fail t (list ,value))) (t ,(walk (cdr parsers) (cons value failures))))))))) `(flet ((,fail (,cp ,failarg) (values (combine-parser-failures ,failarg) nil ,cp))) ,(walk parsers nil))))) (export '?) (defparse ? (parser &optional (value nil)) "Matches PARSER or nothing; fails if PARSER fails after consuming input." `(parse (or ,parser (t ,value)))) ;;;-------------------------------------------------------------------------- ;;; Pluggable parsers. (export 'call-pluggable-parser) (defun call-pluggable-parser (symbol &rest args) "Call the pluggable parser denoted by SYMBOL. A `pluggable parser' is an indirection point at which a number of alternative parsers can be attached dynamically. The parsers are tried in some arbitrary order, so one should be careful to avoid ambiguities; each is paseed the given ARGS. If any parser succeeds then it determines the result; if any parser fails having consumed input then the pluggable parser fails immediately. If all of the parsers fail without consuming input then the pluggable parser fails with the union of the individual failure indicators." (let ((expected nil)) (dolist (item (get symbol 'parser)) (multiple-value-bind (value winp consumedp) (apply (cdr item) args) (when (or winp consumedp) (return-from call-pluggable-parser (values value winp consumedp))) (push value expected))) (values (combine-parser-failures expected) nil nil))) (export 'plug) (defparse plug (symbol &rest args) "Call the pluggable parser denoted by SYMBOL. This is just like the function `call-pluggable-parser', but the SYMBOL is not evaluated." `(call-pluggable-parser ',symbol ,@args)) (export 'pluggable-parser-add) (defun pluggable-parser-add (symbol tag parser) "Adds an element to a pluggable parser. The pluggable parser itself is denoted by SYMBOL; the TAG is any `eql'- comparable object which identifies the element. The PARSER is a parser function; it will be passed arguments via `pluggable-parser'. If a parser with the given TAG is already attached to SYMBOL then the new parser replaces the old one; otherwise it is added to the collection." (let ((alist (get symbol 'parser))) (aif (assoc tag alist) (setf (cdr it) parser) (setf (get symbol 'parser) (acons tag parser alist))))) (export 'define-pluggable-parser) (defmacro define-pluggable-parser (symbol tag (&rest bvl) &body body) "Adds an element to a pluggable parser. The pluggable parser itself is denoted by SYMBOL; the TAG is any `eql'- comparable object which identifies the element. Neither SYMBOL nor TAG is evaluated. The BODY is a parser expression; the BVL is a lambda list describing how to bind the arguments supplied via `pluggable-parser'. If a parser with the given TAG is already attached to SYMBOL then the new parser replaces the old one; otherwise it is added to the collection." (multiple-value-bind (docs decls body) (parse-body body) `(pluggable-parser-add ',symbol ',tag (lambda ,bvl ,@docs ,@decls (block ,symbol ,@body))))) ;;;-------------------------------------------------------------------------- ;;; Rewindable parser context protocol. (eval-when (:compile-toplevel :load-toplevel :execute) (export 'parser-capture-place) (defgeneric parser-capture-place (context) (:documentation "Capture the current position of a parser CONTEXT. The return value may later be used with `parser-restore-place'. Be careful: all of this is happening at macro-expansion time.") (:method (context) (error "Parser context ~S doesn't support rewinding." context))) (export 'parser-restore-place) (defgeneric parser-restore-place (context place) (:documentation "`Rewind' the parser CONTEXT back to the captured PLACE. The place was previously captured by `parser-capture-place'. Be careful: all of this is happening at macro-expansion time.")) (export 'parser-release-place) (defgeneric parser-release-place (context place) (:documentation "Release a PLACE captured from the parser CONTEXT. The place was previously captured by `parser-capture-place'. The underlying scanner can use this call to determine whether there are outstanding captured places, and thereby optimize its behaviour. Be careful: all of this is happening at macro-expansion time.") (:method (context place) (declare (ignore context place)) nil)) (export 'parser-places-must-be-released-p) (defgeneric parser-places-must-be-released-p (context) (:documentation "Answer whether places captured from the parser CONTEXT need releasing. Some contexts -- well, actually, their run-time counterparts -- work better if they can keep track of which places are captured, or at least if there are captured places outstanding. If this function returns true (which is the default) then `with-parser-place' (and hence parser macros such as `peek') will expand to `unwind-protect' forms in order to perform the correct cleanup. If it returns false, then the `unwind-protect' is omitted so that the runtime code does't have to register cleanup handlers.") (:method (context) (declare (ignore context)) t))) (export 'with-parser-place) (defmacro with-parser-place ((place context) &body body) "Evaluate BODY surrounded with a binding of PLACE to a captured place. The surrounding code will release the PLACE properly on exit from the body forms. This is all happening at macro-expansion time." ;; ... which means that it's a bit hairy. Fortunately, the nested ;; backquotes aren't that bad. (once-only (context) (with-gensyms (bodyfunc) `(with-gensyms (,place) (flet ((,bodyfunc () ,@body)) `(let ((,,place ,(parser-capture-place ,context))) ,(if (parser-places-must-be-released-p ,context) `(unwind-protect ,(,bodyfunc) (when ,,place ,(parser-release-place ,context ,place))) (,bodyfunc)))))))) (export 'peek) (defparse peek (:context (context t) parser) "Attempt to run PARSER, but rewind the underlying source if it fails." (with-gensyms (value win consumedp) (with-parser-place (place context) `(macrolet ((commit-peeked-place () `(progn ,',(parser-release-place context place) (setf ,',place nil)))) (multiple-value-bind (,value ,win ,consumedp) (parse ,parser) (cond ((or ,win (null ,place)) (values ,value ,win ,consumedp)) (t ,(parser-restore-place context place) (values ,value ,win nil)))))))) (defun commit-peeked-place () "Called by `commit' not lexically within `peek'." (error "`commit' is not within `peek'.")) (export 'commit) (defparse commit () "Commit to the current parse. This releases the place captured by the innermost lexically enclosing `peek'." '(progn (commit-peeked-place) (values nil t nil))) ;;;-------------------------------------------------------------------------- ;;; Character parser context protocol. (export 'character-parser-context) (defclass character-parser-context () () (:documentation "Base class for parsers which read one character at a time.")) (export 'parser-current-char) (defgeneric parser-current-char (context) (:documentation "Return the parser's current character. It is an error to invoke this operation if the parser is at end-of-file; you must check this first. Be careful: all of this is happening at macro-expansion time.")) (export 'if-char) (defparse if-char (:context (context character-parser-context) (&optional (char 'it)) condition consequent alternative) "Basic character-testing parser. If there is a current character, bind it to CHAR and evaluate the CONDITION; if that is true, then evaluate CONSEQUENT and step the parser (in that order); otherwise, if either we're at EOF or the CONDITION returns false, evaluate ALTERNATIVE. The result of `if-char' are the values returned by CONSEQUENT or ALTERNATIVE." (with-gensyms (block) `(block ,block (unless ,(parser-at-eof-p context) (let ((,char ,(parser-current-char context))) (when ,condition (return-from ,block (multiple-value-prog1 ,consequent ,(parser-step context)))))) ,alternative))) (defmethod expand-parser-spec ((context character-parser-context) (spec (eql :any))) "Matches any character; result is the character. The failure indicator is the keyword `:any'." (expand-parser-spec context '(if-char () t (values it t t) (values '(:any) nil nil)))) (export 'char) (defparse char (:context (context character-parser-context) char) "Matches the character CHAR (evaluated); result is the character. The failure indicator is CHAR." (once-only (char) (with-gensyms (it) (expand-parser-spec context `(if-char (,it) (char= ,it ,char) (values ,it t t) (values (list ,char) nil nil)))))) (defmethod expand-parser-spec ((context character-parser-context) (char character)) (expand-parser-spec context `(char ,char))) (export 'satisfies) (defparse satisfies (:context (context character-parser-context) predicate) "Matches a character that satisfies the PREDICATE The PREDICATE is a function designator. On success, the result is the character. The failure indicator is PREDICATE; you probably want to apply a `label'." (with-gensyms (it) (expand-parser-spec context `(if-char (,it) (,predicate ,it) (values ,it t t) (values '(,predicate) nil nil))))) (export 'not) (defparse not (:context (context character-parser-context) char) "Matches any character other than CHAR; result is the character. The failure indicator is (not CHAR)." (once-only (char) (with-gensyms (it) (expand-parser-spec context `(if-char (,it) (char/= ,it ,char) (values ,it t t) (values `((not ,,char)) nil nil)))))) (export 'filter) (defparse filter (:context (context character-parser-context) predicate) "Matches a character that satisfies the PREDICATE; result is the output of PREDICATE. The failure indicator is PREDICATE; you probably want to apply a `label'." ;; Can't do this one with `if-char'. (with-gensyms (block value) `(block ,block (unless ,(parser-at-eof-p context) (let ((,value (,predicate ,(parser-current-char context)))) (when ,value ,(parser-step context) (return-from ,block (values ,value t t))))) (values '(,predicate) nil nil)))) (defmethod expand-parser-spec ((context character-parser-context) (spec (eql :whitespace))) "Matches any sequence of whitespace; result is nil. Cannot fail." `(progn (cond ((and (not ,(parser-at-eof-p context)) (whitespace-char-p ,(parser-current-char context))) (loop ,(parser-step context) (when (or ,(parser-at-eof-p context) (not (whitespace-char-p ,(parser-current-char context)))) (return (values nil t t))))) (t (values nil t nil))))) (defmethod expand-parser-spec ((context character-parser-context) (string string)) "Matches the constituent characters of STRING; result is the string. The failure indicator is STRING; on failure, the input is rewound, so this only works on rewindable contexts." (with-gensyms (i) (unless (typep string 'simple-string) (setf string (make-array (length string) :initial-contents string))) (with-parser-place (place context) `(dotimes (,i ,(length string) (values ,string t ,(plusp (length string)))) (when (or ,(parser-at-eof-p context) (char/= ,(parser-current-char context) (schar ,string ,i))) ,(parser-restore-place context place) (return (values '(,string) nil nil))) ,(parser-step context))))) ;;;-------------------------------------------------------------------------- ;;; Token parser context protocol. (export 'token-parser-context) (defclass token-parser-context () () (:documentation "Base class for parsers which read tokens with associated semantic values. A token, according to the model suggested by this class, has a /type/, which classifies the token and is the main contributer to guiding the parse, and a /value/, which carries additional semantic information. This may seem redundant given Lisp's dynamic type system; but it's not actually capable of drawing sufficiently fine distinctions easily. For example, we can represent a symbol either as a string or a symbol; but using strings conflicts with being able to represent string literals, and using symbols looks ugly and they don't get GCed. Similarly, it'd be convenient to represent punctuation as characters, but that conflicts with using them for character literals. So, we introduce our own notion of token type. Token scanners are expected to signal end-of-file with a token of type `:eof'.")) (export 'parser-token-type) (defgeneric parser-token-type (context) (:documentation "Return the parser's current token type.")) (export 'parser-token-value) (defgeneric parser-token-value (context) (:documentation "Return the parser's current token's semantic value.")) (export 'token) (defparse token (:context (context token-parser-context) type &optional (value nil valuep) &key peekp) "Match tokens of a particular type. A token matches under the following conditions: * If the value of TYPE is `t' then the match succeeds if and only if the parser is not at end-of-file. * If the value of TYPE is not `eql' to the token type then the match fails. * If VALUE is specified, and the value of VALUE is not `equal' to the token semantic value then the match fails. * Otherwise the match succeeds. If the match is successful and the parser is not at end-of-file, and the value of PEEKP is nil then the parser advances to the next token; the result of the match is the token's value. If the match fails then the failure indicator is either TYPE or (TYPE VALUE), depending on whether a VALUE was specified." (once-only (type value peekp) (with-gensyms (tokty tokval) `(let ((,tokty ,(parser-token-type context)) (,tokval ,(parser-token-value context))) (if ,(if (eq type t) `(not (eq ,tokty :eof)) (flet ((check-value (cond) (if valuep `(and ,cond (equal ,tokval ,value)) cond))) (if (constantp type) (check-value `(eql ,tokty ,type)) `(if (eq ,type t) (not (eq ,tokty :eof)) ,(check-value `(eql ,tokty ,type)))))) ,(let* ((result `(values ,tokval t ,(if (constantp peekp) (not peekp) `(not ,peekp)))) (step (parser-step context))) (cond ((not (constantp peekp)) `(multiple-value-prog1 ,result (unless ,peekp ,step))) (peekp result) (t `(multiple-value-prog1 ,result ,step)))) (values (list ,(if valuep `(list ,type ,value) type)) nil nil)))))) (defmethod expand-parser-spec ((context token-parser-context) spec) (if (atom spec) (expand-parser-spec context `(token ,spec)) (call-next-method))) (defmethod expand-parser-spec ((context token-parser-context) (spec string)) (expand-parser-spec context `(token :id ,spec))) ;;;----- That's all, folks --------------------------------------------------