From: Mark Wooding Date: Thu, 26 May 2016 08:26:09 +0000 (+0100) Subject: src/fragment-parse.lisp, src/lexer-{impl,proto}.lisp: Better errors. X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~mdw/git/sod/commitdiff_plain/e046c3f65a8f7241889fb9b6005aac21e2aad1a8?ds=sidebyside src/fragment-parse.lisp, src/lexer-{impl,proto}.lisp: Better errors. Improve error reporting for comments, string and character literals, and C fragments. Most significantly, if we encounter end-of-file while trying to read one of these, then also report where the construct started. For C fragments, if we encounter a mismatching closing bracket, then report where the opening bracket was; and if we hit end- of-file while brackets are still open, then report where they were. Also, deal with a subtlety in the initialization procedure for `sod-token-scanner': the first token is read to `prime the pump' as part of the behaviour of `token-scanner', which is done before `sod-token- scanner' itself has had a chance to initialize its own slots -- such as the filename to report to `file-location'. There are several ways I could have dealt with this -- e.g., introducing some deliberately-less- specific superclass to initialize the filename for me -- but the path of least resistance seems to be to cope with this problem carefully in `scanner-token'. This is anyway an improvement because the function no longer depends on a sensible ambient error-location object being set. --- diff --git a/src/fragment-parse.lisp b/src/fragment-parse.lisp index c958da3..fcaa92e 100644 --- a/src/fragment-parse.lisp +++ b/src/fragment-parse.lisp @@ -40,9 +40,9 @@ (defun scan-c-fragment (scanner end-chars) takes into account comments (both C and C++ style), string and character literals." - (let ((char-scanner (token-scanner-char-scanner scanner)) - (delim nil) - (stack nil)) + (let* ((char-scanner (token-scanner-char-scanner scanner)) + (delim-match nil) (delim-found nil) (delim-loc nil) + (stack nil) (start nil) (tokstart nil) (eofwhine t)) (with-parser-context (character-scanner-context :scanner char-scanner) ;; Hack. If the first character is a newline then discard it @@ -51,11 +51,20 @@ (defun scan-c-fragment (scanner end-chars) (parse #\newline) ;; This seems the easiest way of gathering stuff. + (setf start (file-location char-scanner)) (with-scanner-place (place char-scanner) - (flet ((push-delim (d) - (push delim stack) - (setf delim d)) + (flet ((push-delim (found match) + (push (list delim-found delim-match delim-loc) stack) + (setf delim-found found + delim-match match + delim-loc tokstart)) + + (pop-delim () + (destructuring-bind (found match loc) (pop stack) + (setf delim-found found + delim-match match + delim-loc loc))) (result () (let* ((output (scanner-interval char-scanner place)) @@ -71,6 +80,7 @@ (defun scan-c-fragment (scanner end-chars) ;; March through characters until we reach the end. (loop + (setf tokstart (file-location char-scanner)) (cond-parse (:consumedp cp :expected exp) ;; Whitespace and comments are universally dull. @@ -80,31 +90,58 @@ (defun scan-c-fragment (scanner end-chars) ;; See if we've reached the end. We must leave the delimiter ;; in the scanner, so `if-char' and its various friends aren't ;; appropriate. - ((lisp (if (and (null delim) + ((lisp (if (and (null delim-match) + (not (scanner-at-eof-p char-scanner)) (member (scanner-current-char char-scanner) end-chars)) (values (result) t t) (values end-chars nil nil))) (return (values it t t))) (:eof - (lexer-error char-scanner '(:any)) + (when eofwhine + (lexer-error char-scanner nil)) + (loop + (unless delim-found (return)) + (info-with-location delim-loc + "Unmatched `~C' found here" delim-found) + (pop-delim)) + (info-with-location start "C fragment started here") (return (values (result) t t))) ;; Opening and closing brackets. Opening brackets push things - ;; onto a stack; closing brackets pop things off again. - (#\( (push-delim #\))) - (#\[ (push-delim #\])) - (#\{ (push-delim #\})) - ((or #\) #\] #\}) - (if (eql it delim) - (setf delim (pop stack)) - (cerror* "Unmatched `~C'" it))) + ;; onto a stack; closing brackets pop things off again. Pop a + ;; bracket even if it doesn't match, to encourage progress + ;; towards finding an end-delimiter. + (#\( (push-delim #\( #\))) + (#\[ (push-delim #\[ #\])) + (#\{ (push-delim #\{ #\})) + ((lisp (let ((char (scanner-current-char char-scanner))) + (case char + ((#\) #\] #\}) + (unless (eql char delim-match) + (lexer-error char-scanner + (and delim-match + (list delim-match))) + (when delim-loc + (info-with-location + delim-loc + "Mismatched `~C' found here" delim-found))) + (scanner-step char-scanner) + (when delim-match (pop-delim)) + (values char t t)) + (t + (values '(#\) #\] #\}) nil nil)))))) ;; String and character literals. ((seq ((quote (or #\" #\')) (nil (skip-many () - (or (and #\\ :any) (not quote)))) - (nil (char quote))))) + (or (and #\\ :any) (not quote)))) + (nil (or (char quote) + (seq (:eof) + (lexer-error char-scanner (list quote)) + (info-with-location tokstart + "Literal started here") + (setf eofwhine nil))))))) ;; Anything else. (:any) @@ -112,6 +149,8 @@ (defun scan-c-fragment (scanner end-chars) ;; This really shouldn't be able to happen. (t (assert cp) + (when (scanner-at-eof-p char-scanner) + (setf eofwhine nil)) (lexer-error char-scanner exp))))))))) (export 'parse-delimited-fragment) diff --git a/src/lexer-impl.lisp b/src/lexer-impl.lisp index f00994a..48109b1 100644 --- a/src/lexer-impl.lisp +++ b/src/lexer-impl.lisp @@ -101,7 +101,18 @@ (defmethod scanner-token ((scanner sod-token-scanner)) (parse (many (acc init (+ (* acc radix) it) :min min) (label (list :digit radix) (filter (lambda (ch) - (digit-char-p ch radix)))))))) + (digit-char-p ch radix))))))) + (start-floc () + ;; This is a little nasty. We scan the first token during + ;; instance initialization, as a result of `shared-initialize' + ;; on `token-scanner'. Unfortunately, this happens before + ;; we've had a chance to initialize our own `filename' slot. + ;; This means that we can't use the SCANNER as a file + ;; location, however tempting it might be. So we have this + ;; hack. + (make-file-location (scanner-filename char-scanner) + (scanner-line scanner) + (scanner-column scanner)))) ;; Skip initial junk, and remember the place. (loop @@ -128,13 +139,23 @@ (defmethod scanner-token ((scanner sod-token-scanner)) (progn (write-char it out) out) :final (get-output-stream-string out)) (or (and #\\ :any) (not quote)))) - (nil (char quote))) + (nil (or (char quote) + (seq (:eof) + (lexer-error char-scanner (list quote)) + (info-with-location + (start-floc) "Literal started here"))))) (ecase quote (#\" contents) (#\' (case (length contents) (1 (char contents 0)) - (0 (cerror* "Empty character literal") #\?) - (t (cerror* "Too many characters in literal") + (0 (cerror*-with-location (start-floc) + "Lexical error: ~ + empty character literal") + #\?) + (t (cerror*-with-location (start-floc) + "Lexical error: ~ + too many characters ~ + in literal") (char contents 0)))))) (values (etypecase it (character :char) diff --git a/src/lexer-proto.lisp b/src/lexer-proto.lisp index 5405da7..122da75 100644 --- a/src/lexer-proto.lisp +++ b/src/lexer-proto.lisp @@ -150,14 +150,31 @@ (defun scan-comment (char-scanner) The result isn't interesting." (with-parser-context (character-scanner-context :scanner char-scanner) - (parse (or (and "/*" - (and (skip-many () - (and (skip-many () (not #\*)) - (label "*/" (skip-many (:min 1) #\*))) - (not #\/)) - #\/)) - (and "//" - (skip-many () (not #\newline)) - (? #\newline)))))) + (let ((start (file-location char-scanner))) + (parse (or (and "/*" + (lisp (let ((state nil)) + (loop (cond ((scanner-at-eof-p char-scanner) + (lexer-error char-scanner + (list "*/")) + (info-with-location + start "Comment started here") + (return (values nil t t))) + ((char= (scanner-current-char + char-scanner) + #\*) + (setf state '*) + (scanner-step char-scanner)) + ((and (eq state '*) + (char= (scanner-current-char + char-scanner) + #\/)) + (scanner-step char-scanner) + (return (values nil t t))) + (t + (setf state nil) + (scanner-step char-scanner))))))) + (and "//" + (skip-many () (not #\newline)) + (? #\newline))))))) ;;;----- That's all, folks --------------------------------------------------