chiark / gitweb /
src/lexer-{proto,impl}.lisp: Add explicit recovery action to `error'.
[sod] / src / lexer-proto.lisp
CommitLineData
dea4d055
MW
1;;; -*-lisp-*-
2;;;
3;;; Protocol for lexical analysis
4;;;
5;;; (c) 2009 Straylight/Edgeware
6;;;
7
8;;;----- Licensing notice ---------------------------------------------------
9;;;
e0808c47 10;;; This file is part of the Sensible Object Design, an object system for C.
dea4d055
MW
11;;;
12;;; SOD is free software; you can redistribute it and/or modify
13;;; it under the terms of the GNU General Public License as published by
14;;; the Free Software Foundation; either version 2 of the License, or
15;;; (at your option) any later version.
16;;;
17;;; SOD is distributed in the hope that it will be useful,
18;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;;; GNU General Public License for more details.
21;;;
22;;; You should have received a copy of the GNU General Public License
23;;; along with SOD; if not, write to the Free Software Foundation,
24;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26(cl:in-package #:sod)
27
28;;;--------------------------------------------------------------------------
239fa5bd 29;;; Class definition.
dea4d055 30
239fa5bd
MW
31(export 'sod-token-scanner)
32(defclass sod-token-scanner (token-scanner)
33 ((char-scanner :initarg :char-scanner :reader token-scanner-char-scanner))
dea4d055 34 (:documentation
239fa5bd 35 "A token scanner for SOD input files.
dea4d055 36
239fa5bd
MW
37 Not a lot here, apart from a character scanner to read from and the
38 standard token scanner infrastructure."))
dea4d055 39
dea4d055 40;;;--------------------------------------------------------------------------
239fa5bd
MW
41;;; Indicators and error messages.
42
1d8cc67a
MW
43(defvar *indicator-map* (make-hash-table)
44 "Hash table mapping indicator objects to human-readable descriptions.")
45
239fa5bd
MW
46(export 'define-indicator)
47(defun define-indicator (indicator description)
48 "Associate an INDICATOR with its textual DESCRIPTION.
49
50 Updates the the `*indicator-map*'."
51 (setf (gethash indicator *indicator-map*) description)
52 indicator)
53
54(export 'syntax-error)
40d95de7
MW
55(define-condition syntax-error (parser-error base-syntax-error)
56 ((found :type cons))
57 (:report (lambda (error stream)
58 (labels ((show-token (type value)
59 (if (characterp type) (show-char type)
60 (case type
61 (:id (format nil "<identifier~@[ `~A'~]>"
62 value))
63 (:int "<integer-literal>")
64 (:string "<string-literal>")
65 (:char "<character-literal>")
66 (:eof "<end-of-file>")
67 (:ellipsis "`...'")
68 (t (format nil "<? ~S~@[ ~S~]>" type value)))))
69 (show-expected (thing)
70 (acond ((gethash thing *indicator-map*) it)
71 ((atom thing) (show-token thing nil))
72 ((eq (car thing) :id)
73 (format nil "`~A'" (cadr thing)))
74 (t (format nil "<? ~S>" thing)))))
75 (report-parser-error error stream
76 #'show-expected
77 (lambda (found)
78 (show-token (car found)
79 (cdr found))))))))
d63df20a 80(defun syntax-error (scanner expected &key (continuep t) location)
239fa5bd 81 "Signal a (maybe) continuable syntax error."
40d95de7
MW
82 (funcall (if continuep #'cerror*-with-location #'error-with-location)
83 (or location scanner) 'syntax-error
84 :expected expected
85 :found (cons (token-type scanner) (token-value scanner))))
239fa5bd
MW
86
87(export 'lexer-error)
40d95de7
MW
88(define-condition lexer-error (parser-error base-lexer-error)
89 ((found :type (or character nil)))
90 (:report (lambda (error stream)
91 (flet ((show-expected (exp)
92 (typecase exp
93 (character (show-char exp))
94 (string (format nil "`~A'" exp))
95 ((cons (eql :digit) *)
96 (format nil "<radix-~A digit>" (cadr exp)))
97 ((eql :eof) "<end-of-file>")
98 ((eql :any) "<character>")
99 (t (format nil "<? ~S>" exp)))))
100 (report-parser-error error stream
101 #'show-expected #'show-char)))))
d63df20a 102(defun lexer-error (char-scanner expected &key location)
239fa5bd 103 "Signal a continuable lexical error."
40d95de7
MW
104 (cerror*-with-location (or location char-scanner) 'lexer-error
105 :expected expected
106 :found (and (not (scanner-at-eof-p char-scanner))
107 (scanner-current-char char-scanner))))
dea4d055 108
33b5686f 109(export 'skip-until)
048d0b2d
MW
110(defparse skip-until (:context (context token-scanner-context)
111 (&key (keep-end nil keep-end-p))
112 &rest token-types)
113 "Discard tokens until we find one listed in TOKEN-TYPES.
114
1c2db39a
MW
115 Each of the TOKEN-TYPES is an expression which evaluates to either a
116 two-item list (TYPE VALUE), or a singleton TYPE; the latter is equivalent
117 to a list (TYPE t). Such a pair matches a token with the corresponding
118 TYPE and VALUE, except that a VALUE of `t' matches any token value.
119
048d0b2d
MW
120 If KEEP-END is true then retain the found token for later; otherwise
121 discard it. KEEP-END defaults to true if multiple TOKEN-TYPES are given;
122 otherwise false. If end-of-file is encountered then the indicator list is
123 simply the list of TOKEN-TYPES; otherwise the result is `nil'."
0ad3667e 124 `(%skip-until ,(parser-scanner context)
048d0b2d
MW
125 (list ,@token-types)
126 :keep-end ,(if keep-end-p keep-end
127 (> (length token-types) 1))))
128
33b5686f 129(export 'error)
048d0b2d 130(defparse error (:context (context token-scanner-context)
450a4be6 131 (&key ignore-unconsumed force-progress)
b5911ce8 132 sub &optional (recover t) &body body)
048d0b2d
MW
133 "Try to parse SUB; if it fails then report an error, and parse RECOVER.
134
135 This is the main way to recover from errors and continue parsing. Even
136 then, it's not especially brilliant.
137
138 If the SUB parser succeeds then just propagate its result: it's like we
139 were never here. Otherwise, try to recover in a sensible way so we can
140 continue parsing. The details of this recovery are subject to change, but
141 the final action is generally to invoke the RECOVER parser and return its
012554e1
MW
142 result.
143
144 If IGNORE-UNCONSUMED evaluates non-nil, then just propagate a failure of
145 SUB if it didn't consume input. (This makes it suitable for use where the
146 parser containing `error' might be optional.)"
048d0b2d
MW
147 `(parse-error-recover ,(parser-scanner context)
148 (parser () ,sub)
012554e1 149 (parser () ,recover)
450a4be6 150 :ignore-unconsumed ,ignore-unconsumed
b5911ce8
MW
151 :force-progress ,force-progress
152 :action ,(and body `(lambda () ,@body))))
048d0b2d 153
ae7a3c8f
MW
154(export 'must)
155(defparse must (:context (context token-scanner-context)
156 sub &optional default)
157 "Try to parse SUB; if it fails, report an error, and return DEFAULT.
158
159 This parser can't actually fail."
160 `(parse (error () ,sub (t ,default))))
161
dea4d055 162;;;--------------------------------------------------------------------------
239fa5bd
MW
163;;; Lexical analysis utilities.
164
33b5686f 165(export 'scan-comment)
239fa5bd
MW
166(defun scan-comment (char-scanner)
167 "Scan a comment (either `/* ... */' or `// ...') from CHAR-SCANNER.
168
169 The result isn't interesting."
170 (with-parser-context (character-scanner-context :scanner char-scanner)
e046c3f6
MW
171 (let ((start (file-location char-scanner)))
172 (parse (or (and "/*"
173 (lisp (let ((state nil))
174 (loop (cond ((scanner-at-eof-p char-scanner)
175 (lexer-error char-scanner
176 (list "*/"))
177 (info-with-location
178 start "Comment started here")
179 (return (values nil t t)))
180 ((char= (scanner-current-char
181 char-scanner)
182 #\*)
183 (setf state '*)
184 (scanner-step char-scanner))
185 ((and (eq state '*)
186 (char= (scanner-current-char
187 char-scanner)
188 #\/))
189 (scanner-step char-scanner)
190 (return (values nil t t)))
191 (t
192 (setf state nil)
193 (scanner-step char-scanner)))))))
194 (and "//"
195 (skip-many () (not #\newline))
196 (? #\newline)))))))
dea4d055
MW
197
198;;;----- That's all, folks --------------------------------------------------