chiark / gitweb /
src/fragment-parse.lisp, src/lexer-{impl,proto}.lisp: Better errors.
authorMark Wooding <mdw@distorted.org.uk>
Thu, 26 May 2016 08:26:09 +0000 (09:26 +0100)
committerMark Wooding <mdw@distorted.org.uk>
Fri, 8 Jun 2018 18:58:28 +0000 (19:58 +0100)
Improve error reporting for comments, string and character literals, and
C fragments.  Most significantly, if we encounter end-of-file while
trying to read one of these, then also report where the construct
started.  For C fragments, if we encounter a mismatching closing
bracket, then report where the opening bracket was; and if we hit end-
of-file while brackets are still open, then report where they were.

Also, deal with a subtlety in the initialization procedure for
`sod-token-scanner': the first token is read to `prime the pump' as part
of the behaviour of `token-scanner', which is done before `sod-token-
scanner' itself has had a chance to initialize its own slots -- such as
the filename to report to `file-location'.  There are several ways I
could have dealt with this -- e.g., introducing some deliberately-less-
specific superclass to initialize the filename for me -- but the path of
least resistance seems to be to cope with this problem carefully in
`scanner-token'.  This is anyway an improvement because the function no
longer depends on a sensible ambient error-location object being set.

src/fragment-parse.lisp
src/lexer-impl.lisp
src/lexer-proto.lisp

index c958da350ee6605f2f22b1827ceafc3ad6e787cd..fcaa92e43308454f0fc090e00d709f054f8f46f9 100644 (file)
@@ -40,9 +40,9 @@ (defun scan-c-fragment (scanner end-chars)
    takes into account comments (both C and C++ style), string and character
    literals."
 
-  (let ((char-scanner (token-scanner-char-scanner scanner))
-       (delim nil)
-       (stack nil))
+  (let* ((char-scanner (token-scanner-char-scanner scanner))
+        (delim-match nil) (delim-found nil) (delim-loc nil)
+        (stack nil) (start nil) (tokstart nil) (eofwhine t))
     (with-parser-context (character-scanner-context :scanner char-scanner)
 
       ;; Hack.  If the first character is a newline then discard it
@@ -51,11 +51,20 @@ (defun scan-c-fragment (scanner end-chars)
       (parse #\newline)
 
       ;; This seems the easiest way of gathering stuff.
+      (setf start (file-location char-scanner))
       (with-scanner-place (place char-scanner)
 
-       (flet ((push-delim (d)
-                (push delim stack)
-                (setf delim d))
+       (flet ((push-delim (found match)
+                (push (list delim-found delim-match delim-loc) stack)
+                (setf delim-found found
+                      delim-match match
+                      delim-loc tokstart))
+
+              (pop-delim ()
+                (destructuring-bind (found match loc) (pop stack)
+                  (setf delim-found found
+                        delim-match match
+                        delim-loc loc)))
 
               (result ()
                 (let* ((output (scanner-interval char-scanner place))
@@ -71,6 +80,7 @@ (defun scan-c-fragment (scanner end-chars)
 
          ;; March through characters until we reach the end.
          (loop
+           (setf tokstart (file-location char-scanner))
            (cond-parse (:consumedp cp :expected exp)
 
              ;; Whitespace and comments are universally dull.
@@ -80,31 +90,58 @@ (defun scan-c-fragment (scanner end-chars)
              ;; See if we've reached the end.  We must leave the delimiter
              ;; in the scanner, so `if-char' and its various friends aren't
              ;; appropriate.
-             ((lisp (if (and (null delim)
+             ((lisp (if (and (null delim-match)
+                             (not (scanner-at-eof-p char-scanner))
                              (member (scanner-current-char char-scanner)
                                      end-chars))
                         (values (result) t t)
                         (values end-chars nil nil)))
               (return (values it t t)))
              (:eof
-              (lexer-error char-scanner '(:any))
+              (when eofwhine
+                (lexer-error char-scanner nil))
+              (loop
+                (unless delim-found (return))
+                (info-with-location delim-loc
+                                    "Unmatched `~C' found here" delim-found)
+                (pop-delim))
+              (info-with-location start "C fragment started here")
               (return (values (result) t t)))
 
              ;; Opening and closing brackets.  Opening brackets push things
-             ;; onto a stack; closing brackets pop things off again.
-             (#\( (push-delim #\)))
-             (#\[ (push-delim #\]))
-             (#\{ (push-delim #\}))
-             ((or #\) #\] #\})
-              (if (eql it delim)
-                  (setf delim (pop stack))
-                  (cerror* "Unmatched `~C'" it)))
+             ;; onto a stack; closing brackets pop things off again.  Pop a
+             ;; bracket even if it doesn't match, to encourage progress
+             ;; towards finding an end-delimiter.
+             (#\( (push-delim #\( #\)))
+             (#\[ (push-delim #\[ #\]))
+             (#\{ (push-delim #\{ #\}))
+             ((lisp (let ((char (scanner-current-char char-scanner)))
+                      (case char
+                        ((#\) #\] #\})
+                         (unless (eql char delim-match)
+                           (lexer-error char-scanner
+                                        (and delim-match
+                                             (list delim-match)))
+                           (when delim-loc
+                             (info-with-location
+                              delim-loc
+                              "Mismatched `~C' found here" delim-found)))
+                         (scanner-step char-scanner)
+                         (when delim-match (pop-delim))
+                         (values char t t))
+                        (t
+                         (values '(#\) #\] #\}) nil nil))))))
 
              ;; String and character literals.
              ((seq ((quote (or #\" #\'))
                     (nil (skip-many ()
-                             (or (and #\\ :any) (not quote))))
-                    (nil (char quote)))))
+                           (or (and #\\ :any) (not quote))))
+                    (nil (or (char quote)
+                             (seq (:eof)
+                               (lexer-error char-scanner (list quote))
+                               (info-with-location tokstart
+                                                   "Literal started here")
+                               (setf eofwhine nil)))))))
 
              ;; Anything else.
              (:any)
@@ -112,6 +149,8 @@ (defun scan-c-fragment (scanner end-chars)
              ;; This really shouldn't be able to happen.
              (t
               (assert cp)
+              (when (scanner-at-eof-p char-scanner)
+                (setf eofwhine nil))
               (lexer-error char-scanner exp)))))))))
 
 (export 'parse-delimited-fragment)
index f00994a4913afbf5e8dd854efe95cff97a5cf1ba..48109b18154a8b353f7b3f97a1ed4940bd26d4bb 100644 (file)
@@ -101,7 +101,18 @@ (defmethod scanner-token ((scanner sod-token-scanner))
               (parse (many (acc init (+ (* acc radix) it) :min min)
                        (label (list :digit radix)
                               (filter (lambda (ch)
-                                        (digit-char-p ch radix))))))))
+                                        (digit-char-p ch radix)))))))
+            (start-floc ()
+              ;; This is a little nasty.  We scan the first token during
+              ;; instance initialization, as a result of `shared-initialize'
+              ;; on `token-scanner'.  Unfortunately, this happens before
+              ;; we've had a chance to initialize our own `filename' slot.
+              ;; This means that we can't use the SCANNER as a file
+              ;; location, however tempting it might be.  So we have this
+              ;; hack.
+              (make-file-location (scanner-filename char-scanner)
+                                  (scanner-line scanner)
+                                  (scanner-column scanner))))
 
        ;; Skip initial junk, and remember the place.
        (loop
@@ -128,13 +139,23 @@ (defmethod scanner-token ((scanner sod-token-scanner))
                                      (progn (write-char it out) out)
                                      :final (get-output-stream-string out))
                             (or (and #\\ :any) (not quote))))
-                (nil (char quote)))
+                (nil (or (char quote)
+                         (seq (:eof)
+                           (lexer-error char-scanner (list quote))
+                           (info-with-location
+                            (start-floc) "Literal started here")))))
             (ecase quote
               (#\" contents)
               (#\' (case (length contents)
                      (1 (char contents 0))
-                     (0 (cerror* "Empty character literal") #\?)
-                     (t (cerror* "Too many characters in literal")
+                     (0 (cerror*-with-location (start-floc)
+                                               "Lexical error: ~
+                                                empty character literal")
+                        #\?)
+                     (t (cerror*-with-location (start-floc)
+                                               "Lexical error: ~
+                                                too many characters ~
+                                                in literal")
                         (char contents 0))))))
           (values (etypecase it
                     (character :char)
index 5405da7c28fa3e76ef8a826c4a01fa797d8e4ca7..122da753d99051275570d4f62ac457c32eefd169 100644 (file)
@@ -150,14 +150,31 @@ (defun scan-comment (char-scanner)
 
    The result isn't interesting."
   (with-parser-context (character-scanner-context :scanner char-scanner)
-    (parse (or (and "/*"
-                   (and (skip-many ()
-                          (and (skip-many () (not #\*))
-                               (label "*/" (skip-many (:min 1) #\*)))
-                          (not #\/))
-                        #\/))
-              (and "//"
-                   (skip-many () (not #\newline))
-                   (? #\newline))))))
+    (let ((start (file-location char-scanner)))
+      (parse (or (and "/*"
+                     (lisp (let ((state nil))
+                             (loop (cond ((scanner-at-eof-p char-scanner)
+                                          (lexer-error char-scanner
+                                                       (list "*/"))
+                                          (info-with-location
+                                           start "Comment started here")
+                                          (return (values nil t t)))
+                                         ((char= (scanner-current-char
+                                                  char-scanner)
+                                                 #\*)
+                                          (setf state '*)
+                                          (scanner-step char-scanner))
+                                         ((and (eq state '*)
+                                               (char= (scanner-current-char
+                                                       char-scanner)
+                                                      #\/))
+                                          (scanner-step char-scanner)
+                                          (return (values nil t t)))
+                                         (t
+                                          (setf state nil)
+                                          (scanner-step char-scanner)))))))
+                (and "//"
+                     (skip-many () (not #\newline))
+                     (? #\newline)))))))
 
 ;;;----- That's all, folks --------------------------------------------------