;;; -*-lisp-*- ;;; ;;; Lexical analysis for input parser ;;; ;;; (c) 2009 Straylight/Edgeware ;;; ;;;----- Licensing notice --------------------------------------------------- ;;; ;;; This file is part of the Sensble Object Design, an object system for C. ;;; ;;; SOD is free software; you can redistribute it and/or modify ;;; it under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 2 of the License, or ;;; (at your option) any later version. ;;; ;;; SOD is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with SOD; if not, write to the Free Software Foundation, ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. (cl:in-package #:sod) ;;;-------------------------------------------------------------------------- ;;; Class definition. (export 'sod-token-scanner) (defclass sod-token-scanner (token-scanner) ((char-scanner :initarg :char-scanner :reader token-scanner-char-scanner)) (:documentation "A token scanner for SOD input files. Not a lot here, apart from a character scanner to read from and the standard token scanner infrastructure.")) (defmethod shared-initialize :after ((scanner sod-token-scanner) slot-names &key) (default-slot (scanner 'sod-parser::filename slot-names) (scanner-filename (token-scanner-char-scanner scanner)))) ;;;-------------------------------------------------------------------------- ;;; Utilities. (defun show-char (stream char &optional colonp atsignp) "Format CHAR to STREAM in a readable way. Usable in `format''s ~/.../ command." (declare (ignore colonp atsignp)) (cond ((null char) (write-string "" stream)) ((and (graphic-char-p char) (char/= char #\space)) (format stream "`~C'" char)) (t (format stream "<~(~:C~)>" char)))) (defun scan-comment (scanner) "Scan a comment (either `/* ... */' or `// ...') from SCANNER. The result isn't interesting." (with-parser-context (character-scanner-context :scanner scanner) (parse (or (and "/*" (and (skip-many () (and (skip-many () (not #\*)) (label "*/" (skip-many (:min 1) #\*))) (not #\/)) #\/)) (and "//" (skip-many () (not #\newline)) (? #\newline)))))) (defmethod make-scanner-stream ((scanner sod-token-scanner)) (make-scanner-stream (token-scanner-char-scanner scanner))) ;;;-------------------------------------------------------------------------- ;;; Error reporting. (defvar *indicator-map* (make-hash-table) "Hash table mapping indicator objects to human-readable descriptions.") (defun define-indicator (indicator description) (setf (gethash indicator *indicator-map*) description) indicator) (export 'syntax-error) (defun syntax-error (scanner expected &key (continuep t)) "Signal a (maybe) continuable syntax error." (labels ((show-token (type value) (if (characterp type) (format nil "~/sod::show-char/" type) (case type (:id (format nil "" value)) (:string "") (:char "") (:eof "") (:ellipsis "`...'") (t (format nil "" type value))))) (show-expected (thing) (acond ((gethash thing *indicator-map*) it) ((atom thing) (show-token thing nil)) ((eq (car thing) :id) (format nil "`~A'" (cadr thing))) (t (format nil "" thing))))) (funcall (if continuep #'cerror* #'error) "Syntax error: ~ expected ~{~#[~;~A~;~A or ~A~:;~A, ~]~} ~ but found ~A" (mapcar #'show-expected expected) (show-token (token-type scanner) (token-value scanner))))) (export 'lexer-error) (defun lexer-error (char-scanner expected consumedp) "Signal a continuable lexical error." (cerror* "Lexical error: ~ expected ~{~#[~;~A~;~A or ~A~;:~A, ~]~} ~ but found ~/sod::show-char/~ ~@[ at ~A~]" (mapcar (lambda (exp) (typecase exp (character (format nil "~/sod::show-char/" exp)) (string (format nil "`~A'" exp)) ((cons (eql :digit) *) (format nil "" (cadr exp))) ((eql :eof) "") ((eql :any) "") (t (format nil "" exp)))) expected) (and (not (scanner-at-eof-p char-scanner)) (scanner-current-char char-scanner)) (and consumedp (file-location char-scanner)))) ;;;-------------------------------------------------------------------------- ;;; Token scanner protocol implementation. (defmethod scanner-token ((scanner sod-token-scanner)) (with-slots (char-scanner line column) scanner (with-parser-context (character-scanner-context :scanner char-scanner) (flet ((scan-digits (&key (radix 10) (min 1) (init 0)) ;; Scan an return a sequence of digits. (parse (many (acc init (+ (* acc radix) it) :min min) (label (list :digit radix) (filter (lambda (ch) (digit-char-p ch radix)))))))) ;; Skip initial junk, and remember the place. (loop (setf (scanner-line scanner) (scanner-line char-scanner) (scanner-column scanner) (scanner-column char-scanner)) (cond-parse (:consumedp cp :expected exp) ((satisfies whitespace-char-p) (parse :whitespace)) ((scan-comment char-scanner)) (t (if cp (lexer-error char-scanner exp cp) (return))))) ;; Now parse something. (cond-parse (:consumedp cp :expected exp) ;; Alphanumerics mean we read an identifier. ((or #\_ (satisfies alpha-char-p)) (values :id (with-output-to-string (out) (write-char it out) (parse (many (nil nil (write-char it out)) (or #\_ (satisfies alphanumericp))))))) ;; Quotes introduce a literal. ((seq ((quote (or #\" #\')) (contents (many (out (make-string-output-stream) (progn (write-char it out) out) :final (get-output-stream-string out)) (or (and #\\ :any) (not quote)))) (nil (char quote))) (ecase quote (#\" contents) (#\' (case (length contents) (1 (char contents 0)) (0 (cerror* "Empty character literal") #\?) (t (cerror* "Too many characters in literal") (char contents 0)))))) (values (etypecase it (character :char) (string :string)) it)) ;; Zero introduces a chosen-radix integer. ((and #\0 (or (and (or #\b #\B) (scan-digits :radix 2)) (and (or #\o #\O) (scan-digits :radix 8)) (and (or #\x #\X) (scan-digits :radix 16)) (scan-digits :radix 8 :min 0))) (values :int it)) ;; Any other digit forces radix-10. ((seq ((d (filter digit-char-p)) (i (scan-digits :radix 10 :min 0 :init d))) i) (values :int it)) ;; Some special punctuation sequences are single tokens. ("..." (values :ellipsis nil)) ;; Any other character is punctuation. (:any (values it nil)) ;; End of file means precisely that. (:eof (values :eof nil)) ;; Report errors and try again. Because we must have consumed some ;; input in order to get here (we've matched both :any and :eof) we ;; must make progress on every call. (t (assert cp) (lexer-error char-scanner exp cp) (scanner-token scanner))))))) ;;;----- That's all, folks --------------------------------------------------