-;;; Formatting tokens.
-
-(defgeneric format-token (token-type &optional token-value)
- (:documentation
- "Return a string describing a token with the specified type and value.")
- (:method ((token-type (eql :eof)) &optional token-value)
- (declare (ignore token-value))
- "<end-of-file>")
- (:method ((token-type (eql :string)) &optional token-value)
- (declare (ignore token-value))
- "<string-literal>")
- (:method ((token-type (eql :char)) &optional token-value)
- (declare (ignore token-value))
- "<character-literal>")
- (:method ((token-type (eql :id)) &optional token-value)
- (format nil "<identifier~@[ `~A'~]>" token-value))
- (:method ((token-type symbol) &optional token-value)
- (declare (ignore token-value))
- (check-type token-type keyword)
- (format nil "`~(~A~)'" token-type))
- (:method ((token-type character) &optional token-value)
- (declare (ignore token-value))
- (format nil "~:[<~:C>~;`~C'~]"
- (and (graphic-char-p token-type)
- (char/= token-type #\space))
- token-type)))
-
-;;;--------------------------------------------------------------------------
-;;; Reading and pushing back characters.
-
-(export 'next-char)
-(defgeneric next-char (lexer)
- (:documentation
- "Fetch the next character from the LEXER's input stream.
-
- Read a character from the input stream, and store it in the LEXER's CHAR
- slot. The character stored is returned. If characters have been pushed
- back then pushed-back characters are used instead of the input stream. If
- there are no more characters to be read then the lookahead character is
- nil. Returns the new lookahead character.
-
- (This function is primarily intended for the use of lexer subclasses.)"))
-
-(export 'pushback-char)
-(defgeneric pushback-char (lexer char)
- (:documentation
- "Push the CHAR back into the lexer.
-
- Make CHAR be the current lookahead character (stored in the LEXER's CHAR
- slot). The previous lookahead character is pushed down, and will be made
- available again once this character is consumed by NEXT-CHAR.
-
- (This function is primarily intended for the use of lexer subclasses.)"))
-
-(defgeneric fixup-stream* (lexer thunk)
- (:documentation
- "Helper function for `with-lexer-stream'.
-
- This function does the main work for `with-lexer-stream'. The THUNK is
- invoked on a single argument, the LEXER's underlying STREAM."))
-
-(export 'with-lexer-stream)
-(defmacro with-lexer-stream ((streamvar lexer) &body body)
- "Evaluate BODY with STREAMVAR bound to the LEXER's input stream.
-
- The STREAM is fixed up so that the next character read (e.g., using
- `read-char') will be the lexer's current lookahead character. Once the
- BODY completes, the next character in the stream is read and set as the
- lookahead character. It is an error if the lexer has pushed-back
- characters (since these can't be pushed back into the input stream
- properly)."
-
- `(fixup-stream* ,lexer (lambda (,streamvar) ,@body)))
+;;; Indicators and error messages.
+
+(defvar *indicator-map* (make-hash-table)
+ "Hash table mapping indicator objects to human-readable descriptions.")
+
+(export 'define-indicator)
+(defun define-indicator (indicator description)
+ "Associate an INDICATOR with its textual DESCRIPTION.
+
+ Updates the the `*indicator-map*'."
+ (setf (gethash indicator *indicator-map*) description)
+ indicator)
+
+(export 'syntax-error)
+(defun syntax-error (scanner expected &key (continuep t))
+ "Signal a (maybe) continuable syntax error."
+ (labels ((show-token (type value)
+ (if (characterp type)
+ (format nil "~/sod::show-char/" type)
+ (case type
+ (:id (format nil "<identifier~@[ `~A'~]>" value))
+ (:int "<integer-literal>")
+ (:string "<string-literal>")
+ (:char "<character-literal>")
+ (:eof "<end-of-file>")
+ (:ellipsis "`...'")
+ (t (format nil "<? ~S~@[ ~S~]>" type value)))))
+ (show-expected (thing)
+ (acond ((gethash thing *indicator-map*) it)
+ ((atom thing) (show-token thing nil))
+ ((eq (car thing) :id)
+ (format nil "`~A'" (cadr thing)))
+ (t (format nil "<? ~S>" thing)))))
+ (funcall (if continuep #'cerror* #'error)
+ "Syntax error: ~
+ expected ~{~#[<bug>~;~A~;~A or ~A~:;~A, ~]~} ~
+ but found ~A"
+ (mapcar #'show-expected expected)
+ (show-token (token-type scanner) (token-value scanner)))))
+
+(export 'lexer-error)
+(defun lexer-error (char-scanner expected consumedp)
+ "Signal a continuable lexical error."
+ (cerror* "Lexical error: ~
+ expected ~{~#[<bug>~;~A~;~A or ~A~:;~A, ~]~} ~
+ but found ~/sod::show-char/~
+ ~@[ at ~A~]"
+ (mapcar (lambda (exp)
+ (typecase exp
+ (character (format nil "~/sod::show-char/" exp))
+ (string (format nil "`~A'" exp))
+ ((cons (eql :digit) *) (format nil "<radix-~A digit>"
+ (cadr exp)))
+ ((eql :eof) "<end-of-file>")
+ ((eql :any) "<character>")
+ (t (format nil "<? ~S>" exp))))
+ expected)
+ (and (not (scanner-at-eof-p char-scanner))
+ (scanner-current-char char-scanner))
+ (and consumedp (file-location char-scanner))))
+
+(export 'skip-until)
+(defparse skip-until (:context (context token-scanner-context)
+ (&key (keep-end nil keep-end-p))
+ &rest token-types)
+ "Discard tokens until we find one listed in TOKEN-TYPES.
+
+ If KEEP-END is true then retain the found token for later; otherwise
+ discard it. KEEP-END defaults to true if multiple TOKEN-TYPES are given;
+ otherwise false. If end-of-file is encountered then the indicator list is
+ simply the list of TOKEN-TYPES; otherwise the result is `nil'."
+ `(skip-until ,(parser-scanner context)
+ (list ,@token-types)
+ :keep-end ,(if keep-end-p keep-end
+ (> (length token-types) 1))))
+
+(export 'error)
+(defparse error (:context (context token-scanner-context)
+ (&key ignore-unconsumed)
+ sub &optional (recover t))
+ "Try to parse SUB; if it fails then report an error, and parse RECOVER.
+
+ This is the main way to recover from errors and continue parsing. Even
+ then, it's not especially brilliant.
+
+ If the SUB parser succeeds then just propagate its result: it's like we
+ were never here. Otherwise, try to recover in a sensible way so we can
+ continue parsing. The details of this recovery are subject to change, but
+ the final action is generally to invoke the RECOVER parser and return its
+ result.
+
+ If IGNORE-UNCONSUMED evaluates non-nil, then just propagate a failure of
+ SUB if it didn't consume input. (This makes it suitable for use where the
+ parser containing `error' might be optional.)"
+ `(parse-error-recover ,(parser-scanner context)
+ (parser () ,sub)
+ (parser () ,recover)
+ :ignore-unconsumed ,ignore-unconsumed))