takes into account comments (both C and C++ style), string and character
literals."
- (let ((char-scanner (token-scanner-char-scanner scanner))
- (delim nil)
- (stack nil))
+ (let* ((char-scanner (token-scanner-char-scanner scanner))
+ (delim-match nil) (delim-found nil) (delim-loc nil)
+ (stack nil) (start nil) (tokstart nil) (eofwhine t))
(with-parser-context (character-scanner-context :scanner char-scanner)
;; Hack. If the first character is a newline then discard it
(parse #\newline)
;; This seems the easiest way of gathering stuff.
+ (setf start (file-location char-scanner))
(with-scanner-place (place char-scanner)
- (flet ((push-delim (d)
- (push delim stack)
- (setf delim d))
+ (flet ((push-delim (found match)
+ (push (list delim-found delim-match delim-loc) stack)
+ (setf delim-found found
+ delim-match match
+ delim-loc tokstart))
+
+ (pop-delim ()
+ (destructuring-bind (found match loc) (pop stack)
+ (setf delim-found found
+ delim-match match
+ delim-loc loc)))
(result ()
(let* ((output (scanner-interval char-scanner place))
;; March through characters until we reach the end.
(loop
+ (setf tokstart (file-location char-scanner))
(cond-parse (:consumedp cp :expected exp)
;; Whitespace and comments are universally dull.
;; See if we've reached the end. We must leave the delimiter
;; in the scanner, so `if-char' and its various friends aren't
;; appropriate.
- ((lisp (if (and (null delim)
+ ((lisp (if (and (null delim-match)
+ (not (scanner-at-eof-p char-scanner))
(member (scanner-current-char char-scanner)
end-chars))
(values (result) t t)
(values end-chars nil nil)))
(return (values it t t)))
(:eof
- (lexer-error char-scanner '(:any))
+ (when eofwhine
+ (lexer-error char-scanner nil))
+ (loop
+ (unless delim-found (return))
+ (info-with-location delim-loc
+ "Unmatched `~C' found here" delim-found)
+ (pop-delim))
+ (info-with-location start "C fragment started here")
(return (values (result) t t)))
;; Opening and closing brackets. Opening brackets push things
- ;; onto a stack; closing brackets pop things off again.
- (#\( (push-delim #\)))
- (#\[ (push-delim #\]))
- (#\{ (push-delim #\}))
- ((or #\) #\] #\})
- (if (eql it delim)
- (setf delim (pop stack))
- (cerror* "Unmatched `~C'" it)))
+ ;; onto a stack; closing brackets pop things off again. Pop a
+ ;; bracket even if it doesn't match, to encourage progress
+ ;; towards finding an end-delimiter.
+ (#\( (push-delim #\( #\)))
+ (#\[ (push-delim #\[ #\]))
+ (#\{ (push-delim #\{ #\}))
+ ((lisp (let ((char (scanner-current-char char-scanner)))
+ (case char
+ ((#\) #\] #\})
+ (unless (eql char delim-match)
+ (lexer-error char-scanner
+ (and delim-match
+ (list delim-match)))
+ (when delim-loc
+ (info-with-location
+ delim-loc
+ "Mismatched `~C' found here" delim-found)))
+ (scanner-step char-scanner)
+ (when delim-match (pop-delim))
+ (values char t t))
+ (t
+ (values '(#\) #\] #\}) nil nil))))))
;; String and character literals.
((seq ((quote (or #\" #\'))
(nil (skip-many ()
- (or (and #\\ :any) (not quote))))
- (nil (char quote)))))
+ (or (and #\\ :any) (not quote))))
+ (nil (or (char quote)
+ (seq (:eof)
+ (lexer-error char-scanner (list quote))
+ (info-with-location tokstart
+ "Literal started here")
+ (setf eofwhine nil)))))))
;; Anything else.
(:any)
;; This really shouldn't be able to happen.
(t
(assert cp)
+ (when (scanner-at-eof-p char-scanner)
+ (setf eofwhine nil))
(lexer-error char-scanner exp)))))))))
(export 'parse-delimited-fragment)
(parse (many (acc init (+ (* acc radix) it) :min min)
(label (list :digit radix)
(filter (lambda (ch)
- (digit-char-p ch radix))))))))
+ (digit-char-p ch radix)))))))
+ (start-floc ()
+ ;; This is a little nasty. We scan the first token during
+ ;; instance initialization, as a result of `shared-initialize'
+ ;; on `token-scanner'. Unfortunately, this happens before
+ ;; we've had a chance to initialize our own `filename' slot.
+ ;; This means that we can't use the SCANNER as a file
+ ;; location, however tempting it might be. So we have this
+ ;; hack.
+ (make-file-location (scanner-filename char-scanner)
+ (scanner-line scanner)
+ (scanner-column scanner))))
;; Skip initial junk, and remember the place.
(loop
(progn (write-char it out) out)
:final (get-output-stream-string out))
(or (and #\\ :any) (not quote))))
- (nil (char quote)))
+ (nil (or (char quote)
+ (seq (:eof)
+ (lexer-error char-scanner (list quote))
+ (info-with-location
+ (start-floc) "Literal started here")))))
(ecase quote
(#\" contents)
(#\' (case (length contents)
(1 (char contents 0))
- (0 (cerror* "Empty character literal") #\?)
- (t (cerror* "Too many characters in literal")
+ (0 (cerror*-with-location (start-floc)
+ "Lexical error: ~
+ empty character literal")
+ #\?)
+ (t (cerror*-with-location (start-floc)
+ "Lexical error: ~
+ too many characters ~
+ in literal")
(char contents 0))))))
(values (etypecase it
(character :char)
The result isn't interesting."
(with-parser-context (character-scanner-context :scanner char-scanner)
- (parse (or (and "/*"
- (and (skip-many ()
- (and (skip-many () (not #\*))
- (label "*/" (skip-many (:min 1) #\*)))
- (not #\/))
- #\/))
- (and "//"
- (skip-many () (not #\newline))
- (? #\newline))))))
+ (let ((start (file-location char-scanner)))
+ (parse (or (and "/*"
+ (lisp (let ((state nil))
+ (loop (cond ((scanner-at-eof-p char-scanner)
+ (lexer-error char-scanner
+ (list "*/"))
+ (info-with-location
+ start "Comment started here")
+ (return (values nil t t)))
+ ((char= (scanner-current-char
+ char-scanner)
+ #\*)
+ (setf state '*)
+ (scanner-step char-scanner))
+ ((and (eq state '*)
+ (char= (scanner-current-char
+ char-scanner)
+ #\/))
+ (scanner-step char-scanner)
+ (return (values nil t t)))
+ (t
+ (setf state nil)
+ (scanner-step char-scanner)))))))
+ (and "//"
+ (skip-many () (not #\newline))
+ (? #\newline)))))))
;;;----- That's all, folks --------------------------------------------------