;;; -*-lisp-*- ;;; ;;; Protocol for lexical analysis ;;; ;;; (c) 2009 Straylight/Edgeware ;;; ;;;----- Licensing notice --------------------------------------------------- ;;; ;;; This file is part of the Sensible Object Design, an object system for C. ;;; ;;; SOD is free software; you can redistribute it and/or modify ;;; it under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 2 of the License, or ;;; (at your option) any later version. ;;; ;;; SOD is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with SOD; if not, write to the Free Software Foundation, ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. (cl:in-package #:sod) ;;;-------------------------------------------------------------------------- ;;; Class definition. (export 'sod-token-scanner) (defclass sod-token-scanner (token-scanner) ((char-scanner :initarg :char-scanner :reader token-scanner-char-scanner)) (:documentation "A token scanner for SOD input files. Not a lot here, apart from a character scanner to read from and the standard token scanner infrastructure.")) ;;;-------------------------------------------------------------------------- ;;; Indicators and error messages. (defvar *indicator-map* (make-hash-table) "Hash table mapping indicator objects to human-readable descriptions.") (export 'define-indicator) (defun define-indicator (indicator description) "Associate an INDICATOR with its textual DESCRIPTION. Updates the the `*indicator-map*'." (setf (gethash indicator *indicator-map*) description) indicator) (export 'syntax-error) (define-condition syntax-error (parser-error base-syntax-error) ((found :type cons)) (:report (lambda (error stream) (labels ((show-token (type value) (if (characterp type) (show-char type) (case type (:id (format nil "" value)) (:int "") (:string "") (:char "") (:eof "") (:ellipsis "`...'") (t (format nil "" type value))))) (show-expected (thing) (acond ((gethash thing *indicator-map*) it) ((atom thing) (show-token thing nil)) ((eq (car thing) :id) (format nil "`~A'" (cadr thing))) (t (format nil "" thing))))) (report-parser-error error stream #'show-expected (lambda (found) (show-token (car found) (cdr found)))))))) (defun syntax-error (scanner expected &key (continuep t) location) "Signal a (maybe) continuable syntax error." (funcall (if continuep #'cerror*-with-location #'error-with-location) (or location scanner) 'syntax-error :expected expected :found (cons (token-type scanner) (token-value scanner)))) (export 'lexer-error) (define-condition lexer-error (parser-error base-lexer-error) ((found :type (or character nil))) (:report (lambda (error stream) (flet ((show-expected (exp) (typecase exp (character (show-char exp)) (string (format nil "`~A'" exp)) ((cons (eql :digit) *) (format nil "" (cadr exp))) ((eql :eof) "") ((eql :any) "") (t (format nil "" exp))))) (report-parser-error error stream #'show-expected #'show-char))))) (defun lexer-error (char-scanner expected &key location) "Signal a continuable lexical error." (cerror*-with-location (or location char-scanner) 'lexer-error :expected expected :found (and (not (scanner-at-eof-p char-scanner)) (scanner-current-char char-scanner)))) (export 'skip-until) (defparse skip-until (:context (context token-scanner-context) (&key (keep-end nil keep-end-p)) &rest token-types) "Discard tokens until we find one listed in TOKEN-TYPES. Each of the TOKEN-TYPES is an expression which evaluates to either a two-item list (TYPE VALUE), or a singleton TYPE; the latter is equivalent to a list (TYPE t). Such a pair matches a token with the corresponding TYPE and VALUE, except that a VALUE of `t' matches any token value. If KEEP-END is true then retain the found token for later; otherwise discard it. KEEP-END defaults to true if multiple TOKEN-TYPES are given; otherwise false. If end-of-file is encountered then the indicator list is simply the list of TOKEN-TYPES; otherwise the result is `nil'." `(%skip-until ,(parser-scanner context) (list ,@token-types) :keep-end ,(if keep-end-p keep-end (> (length token-types) 1)))) (export 'error) (defparse error (:context (context token-scanner-context) (&key ignore-unconsumed force-progress) sub &optional (recover t) &body body) "Try to parse SUB; if it fails then report an error, and parse RECOVER. This is the main way to recover from errors and continue parsing. Even then, it's not especially brilliant. If the SUB parser succeeds then just propagate its result: it's like we were never here. Otherwise, try to recover in a sensible way so we can continue parsing. The details of this recovery are subject to change, but the final action is generally to invoke the RECOVER parser and return its result. If IGNORE-UNCONSUMED evaluates non-nil, then just propagate a failure of SUB if it didn't consume input. (This makes it suitable for use where the parser containing `error' might be optional.)" `(parse-error-recover ,(parser-scanner context) (parser () ,sub) (parser () ,recover) :ignore-unconsumed ,ignore-unconsumed :force-progress ,force-progress :action ,(and body `(lambda () ,@body)))) (export 'must) (defparse must (:context (context token-scanner-context) sub &optional default) "Try to parse SUB; if it fails, report an error, and return DEFAULT. This parser can't actually fail." `(parse (error () ,sub (t ,default)))) ;;;-------------------------------------------------------------------------- ;;; Lexical analysis utilities. (export 'scan-comment) (defun scan-comment (char-scanner) "Scan a comment (either `/* ... */' or `// ...') from CHAR-SCANNER. The result isn't interesting." (with-parser-context (character-scanner-context :scanner char-scanner) (let ((start (file-location char-scanner))) (parse (or (and "/*" (lisp (let ((state nil)) (loop (cond ((scanner-at-eof-p char-scanner) (lexer-error char-scanner (list "*/")) (info-with-location start "Comment started here") (return (values nil t t))) ((char= (scanner-current-char char-scanner) #\*) (setf state '*) (scanner-step char-scanner)) ((and (eq state '*) (char= (scanner-current-char char-scanner) #\/)) (scanner-step char-scanner) (return (values nil t t))) (t (setf state nil) (scanner-step char-scanner))))))) (and "//" (skip-many () (not #\newline)) (? #\newline))))))) ;;;----- That's all, folks --------------------------------------------------