;;;----- Licensing notice ---------------------------------------------------
;;;
-;;; This file is part of the Sensble Object Design, an object system for C.
+;;; This file is part of the Sensible Object Design, an object system for C.
;;;
;;; SOD is free software; you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
(cl:in-package #:sod)
;;;--------------------------------------------------------------------------
-;;; Accessors.
+;;; Class definition.
-(export 'lexer-char)
-(defgeneric lexer-char (lexer)
+(export 'sod-token-scanner)
+(defclass sod-token-scanner (token-scanner)
+ ((char-scanner :initarg :char-scanner :reader token-scanner-char-scanner))
(:documentation
- "Return the current lookahead character from the LEXER.
+ "A token scanner for SOD input files.
- When the lexer is first created, there is no lookahead character: you must
- `prime the pump' by calling `next-char'. The lexer represents
- encountering the end of its input stream by setting the lookahead
- character to nil. At this point it is still possible to push back
- characters."))
+ Not a lot here, apart from a character scanner to read from and the
+ standard token scanner infrastructure."))
;;;--------------------------------------------------------------------------
-;;; Formatting tokens.
-
-(defgeneric format-token (token-type &optional token-value)
- (:documentation
- "Return a string describing a token with the specified type and value.")
- (:method ((token-type (eql :eof)) &optional token-value)
- (declare (ignore token-value))
- "<end-of-file>")
- (:method ((token-type (eql :string)) &optional token-value)
- (declare (ignore token-value))
- "<string-literal>")
- (:method ((token-type (eql :char)) &optional token-value)
- (declare (ignore token-value))
- "<character-literal>")
- (:method ((token-type (eql :id)) &optional token-value)
- (format nil "<identifier~@[ `~A'~]>" token-value))
- (:method ((token-type symbol) &optional token-value)
- (declare (ignore token-value))
- (check-type token-type keyword)
- (format nil "`~(~A~)'" token-type))
- (:method ((token-type character) &optional token-value)
- (declare (ignore token-value))
- (format nil "~:[<~:C>~;`~C'~]"
- (and (graphic-char-p token-type)
- (char/= token-type #\space))
- token-type)))
-
-;;;--------------------------------------------------------------------------
-;;; Reading and pushing back characters.
-
-(export 'next-char)
-(defgeneric next-char (lexer)
- (:documentation
- "Fetch the next character from the LEXER's input stream.
-
- Read a character from the input stream, and store it in the LEXER's CHAR
- slot. The character stored is returned. If characters have been pushed
- back then pushed-back characters are used instead of the input stream. If
- there are no more characters to be read then the lookahead character is
- nil. Returns the new lookahead character.
-
- (This function is primarily intended for the use of lexer subclasses.)"))
-
-(export 'pushback-char)
-(defgeneric pushback-char (lexer char)
- (:documentation
- "Push the CHAR back into the lexer.
-
- Make CHAR be the current lookahead character (stored in the LEXER's CHAR
- slot). The previous lookahead character is pushed down, and will be made
- available again once this character is consumed by NEXT-CHAR.
-
- (This function is primarily intended for the use of lexer subclasses.)"))
-
-(defgeneric fixup-stream* (lexer thunk)
- (:documentation
- "Helper function for `with-lexer-stream'.
-
- This function does the main work for `with-lexer-stream'. The THUNK is
- invoked on a single argument, the LEXER's underlying STREAM."))
-
-(export 'with-lexer-stream)
-(defmacro with-lexer-stream ((streamvar lexer) &body body)
- "Evaluate BODY with STREAMVAR bound to the LEXER's input stream.
-
- The STREAM is fixed up so that the next character read (e.g., using
- `read-char') will be the lexer's current lookahead character. Once the
- BODY completes, the next character in the stream is read and set as the
- lookahead character. It is an error if the lexer has pushed-back
- characters (since these can't be pushed back into the input stream
- properly)."
-
- `(fixup-stream* ,lexer (lambda (,streamvar) ,@body)))
+;;; Indicators and error messages.
+
+(defvar *indicator-map* (make-hash-table)
+ "Hash table mapping indicator objects to human-readable descriptions.")
+
+(export 'define-indicator)
+(defun define-indicator (indicator description)
+ "Associate an INDICATOR with its textual DESCRIPTION.
+
+ Updates the the `*indicator-map*'."
+ (setf (gethash indicator *indicator-map*) description)
+ indicator)
+
+(export 'syntax-error)
+(defun syntax-error (scanner expected &key (continuep t) location)
+ "Signal a (maybe) continuable syntax error."
+ (labels ((show-token (type value)
+ (if (characterp type)
+ (format nil "~/sod::show-char/" type)
+ (case type
+ (:id (format nil "<identifier~@[ `~A'~]>" value))
+ (:int "<integer-literal>")
+ (:string "<string-literal>")
+ (:char "<character-literal>")
+ (:eof "<end-of-file>")
+ (:ellipsis "`...'")
+ (t (format nil "<? ~S~@[ ~S~]>" type value)))))
+ (show-expected (thing)
+ (acond ((gethash thing *indicator-map*) it)
+ ((atom thing) (show-token thing nil))
+ ((eq (car thing) :id)
+ (format nil "`~A'" (cadr thing)))
+ (t (format nil "<? ~S>" thing)))))
+ (funcall (if continuep #'cerror*-with-location #'error-with-location)
+ (or location scanner)
+ "Syntax error: ~
+ expected ~{~#[<bug>~;~A~;~A or ~A~:;~A, ~]~} ~
+ but found ~A"
+ (mapcar #'show-expected expected)
+ (show-token (token-type scanner) (token-value scanner)))))
+
+(export 'lexer-error)
+(defun lexer-error (char-scanner expected &key location)
+ "Signal a continuable lexical error."
+ (cerror*-with-location (or location char-scanner)
+ "Lexical error: ~
+ expected ~{~#[<bug>~;~A~;~A or ~A~:;~A, ~]~} ~
+ but found ~/sod::show-char/"
+ (mapcar (lambda (exp)
+ (typecase exp
+ (character (format nil "~/sod::show-char/" exp))
+ (string (format nil "`~A'" exp))
+ ((cons (eql :digit) *) (format nil "<radix-~A digit>"
+ (cadr exp)))
+ ((eql :eof) "<end-of-file>")
+ ((eql :any) "<character>")
+ (t (format nil "<? ~S>" exp))))
+ expected)
+ (and (not (scanner-at-eof-p char-scanner))
+ (scanner-current-char char-scanner))))
+
+(export 'skip-until)
+(defparse skip-until (:context (context token-scanner-context)
+ (&key (keep-end nil keep-end-p))
+ &rest token-types)
+ "Discard tokens until we find one listed in TOKEN-TYPES.
+
+ If KEEP-END is true then retain the found token for later; otherwise
+ discard it. KEEP-END defaults to true if multiple TOKEN-TYPES are given;
+ otherwise false. If end-of-file is encountered then the indicator list is
+ simply the list of TOKEN-TYPES; otherwise the result is `nil'."
+ `(skip-until ,(parser-scanner context)
+ (list ,@token-types)
+ :keep-end ,(if keep-end-p keep-end
+ (> (length token-types) 1))))
+
+(export 'error)
+(defparse error (:context (context token-scanner-context)
+ (&key ignore-unconsumed)
+ sub &optional (recover t))
+ "Try to parse SUB; if it fails then report an error, and parse RECOVER.
+
+ This is the main way to recover from errors and continue parsing. Even
+ then, it's not especially brilliant.
+
+ If the SUB parser succeeds then just propagate its result: it's like we
+ were never here. Otherwise, try to recover in a sensible way so we can
+ continue parsing. The details of this recovery are subject to change, but
+ the final action is generally to invoke the RECOVER parser and return its
+ result.
+
+ If IGNORE-UNCONSUMED evaluates non-nil, then just propagate a failure of
+ SUB if it didn't consume input. (This makes it suitable for use where the
+ parser containing `error' might be optional.)"
+ `(parse-error-recover ,(parser-scanner context)
+ (parser () ,sub)
+ (parser () ,recover)
+ :ignore-unconsumed ,ignore-unconsumed))
;;;--------------------------------------------------------------------------
-;;; Reading and pushing back tokens.
-
-(export 'scan-token)
-(defgeneric scan-token (lexer)
- (:documentation
- "Internal protocol for scanning tokens from an input stream.
-
- Implementing a method on this function is the main responsibility of LEXER
- subclasses; it is called by the user-facing `next-token' function.
-
- The method should consume characters (using `next-char') as necessary, and
- return two values: a token type and token value. These will be stored in
- the corresponding slots in the lexer object in order to provide the user
- with one-token lookahead."))
-
-(export 'next-token)
-(defgeneric next-token (lexer)
- (:documentation
- "Scan a token from an input stream.
-
- This function scans a token from an input stream. Two values are
- returned: a `token type' and a `token value'. These are opaque to the
- LEXER base class, but the intent is that the token type be significant to
- determining the syntax of the input, while the token value carries any
- additional information about the token's semantic content. The token type
- and token value are also made available for lookahead via accessors
- TOKEN-TYPE and TOKEN-VALUE on the `lexer' object.
-
- The new lookahead token type and value are returned as two separate
- values.
-
- If tokens have been pushed back (see `pushback-token') then they are
- returned one by one instead of scanning the stream."))
-
-(export 'pushback-token)
-(defgeneric pushback-token (lexer token-type &optional token-value location)
- (:documentation
- "Push a token back into the lexer.
-
- Make the given TOKEN-TYPE and TOKEN-VALUE be the current lookahead token.
- The previous lookahead token is pushed down, and will be made available
- agan once this new token is consumed by NEXT-TOKEN. If LOCATION is
- non-nil then `file-location' is saved and replaced by LOCATION. The
- TOKEN-TYPE and TOKEN-VALUE can be anything at all: for instance, they need
- not be values which can actually be returned by NEXT-TOKEN."))
-
-;;;--------------------------------------------------------------------------
-;;; Utilities.
-
-(export 'skip-spaces)
-(defgeneric skip-spaces (lexer)
- (:documentation
- "Skip over whitespace characters in the LEXER.
-
- There must be a lookahead character; when the function returns, the
- lookahead character will be a non-whitespace character or nil if there
- were no non-whitespace characters remaining. Returns the new lookahead
- character."))
-
-(export 'require-token)
-(defun require-token
- (lexer wanted-token-type &key (errorp t) (consumep t) default)
- "Require a particular token to appear.
-
- If the LEXER's current lookahead token has type `wanted-token-type' then
- consume it (using `next-token') and return its value. Otherwise, if the
- token doesn't have the requested type then signal a continuable error
- describing the situation and return DEFAULT (which defaults to nil).
-
- If ERRORP is false then no error is signalled; this is useful for
- consuming or checking for optional punctuation. If CONSUMEP is false then
- a matching token is not consumed; non-matching tokens are never consumed."
-
- (with-slots (token-type token-value) lexer
- (cond ((eql token-type wanted-token-type)
- (prog1 token-value
- (when consumep (next-token lexer))))
- (errorp
- (cerror* "Expected ~A but found ~A"
- (format-token wanted-token-type)
- (format-token token-type token-value))
- default)
- (t
- default))))
+;;; Lexical analysis utilities.
+
+(export 'scan-comment)
+(defun scan-comment (char-scanner)
+ "Scan a comment (either `/* ... */' or `// ...') from CHAR-SCANNER.
+
+ The result isn't interesting."
+ (with-parser-context (character-scanner-context :scanner char-scanner)
+ (parse (or (and "/*"
+ (and (skip-many ()
+ (and (skip-many () (not #\*))
+ (label "*/" (skip-many (:min 1) #\*)))
+ (not #\/))
+ #\/))
+ (and "//"
+ (skip-many () (not #\newline))
+ (? #\newline))))))
;;;----- That's all, folks --------------------------------------------------