chiark / gitweb /
src/lexer-{proto,impl}.lisp: Add explicit recovery action to `error'.
[sod] / src / lexer-impl.lisp
CommitLineData
dea4d055
MW
1;;; -*-lisp-*-
2;;;
3;;; Implementation of lexical analysis protocol.
4;;;
5;;; (c) 2009 Straylight/Edgeware
6;;;
7
8;;;----- Licensing notice ---------------------------------------------------
9;;;
e0808c47 10;;; This file is part of the Sensible Object Design, an object system for C.
dea4d055
MW
11;;;
12;;; SOD is free software; you can redistribute it and/or modify
13;;; it under the terms of the GNU General Public License as published by
14;;; the Free Software Foundation; either version 2 of the License, or
15;;; (at your option) any later version.
16;;;
17;;; SOD is distributed in the hope that it will be useful,
18;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;;; GNU General Public License for more details.
21;;;
22;;; You should have received a copy of the GNU General Public License
23;;; along with SOD; if not, write to the Free Software Foundation,
24;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26(cl:in-package #:sod)
27
28;;;--------------------------------------------------------------------------
239fa5bd 29;;; Class implementation.
dea4d055 30
239fa5bd
MW
31(defmethod shared-initialize :after
32 ((scanner sod-token-scanner) slot-names &key)
33 (default-slot (scanner 'sod-parser::filename slot-names)
34 (scanner-filename (token-scanner-char-scanner scanner))))
dea4d055 35
239fa5bd
MW
36(defmethod make-scanner-stream ((scanner sod-token-scanner))
37 (make-scanner-stream (token-scanner-char-scanner scanner)))
dea4d055
MW
38
39;;;--------------------------------------------------------------------------
239fa5bd 40;;; Indicators and error messages.
dea4d055 41
40d95de7
MW
42(defun show-char (char)
43 "Format CHAR as a string in a readable way."
44 (cond ((null char) "<end-of-file>")
239fa5bd 45 ((and (graphic-char-p char) (char/= char #\space))
40d95de7
MW
46 (format nil "`~C'" char))
47 (t (format nil "<~(~:C~)>" char))))
dea4d055 48
d0be8052
MW
49(defun %skip-until (scanner token-types
50 &key (keep-end (not (null (cdr token-types)))))
048d0b2d
MW
51 "This is the implementation of the `skip-until' parser."
52 (do ((consumedp nil t))
1c2db39a
MW
53 ((let ((type (token-type scanner))
54 (value (token-value scanner)))
55 (some (lambda (spec)
56 (multiple-value-bind (want-type want-value)
57 (cond ((listp spec) (values (car spec) (cadr spec)))
58 (t (values spec t)))
59 (and (eq want-type type)
60 (or (eq want-value t)
61 (equal want-value value)))))
62 token-types))
048d0b2d
MW
63 (unless keep-end (scanner-step scanner))
64 (values nil t (or keep-end consumedp)))
65 (when (scanner-at-eof-p scanner)
66 (return (values token-types nil consumedp)))
67 (scanner-step scanner)))
68
450a4be6 69(defun parse-error-recover (scanner parser recover
b5911ce8 70 &key ignore-unconsumed force-progress action)
048d0b2d
MW
71 "This is the implementation of the `error' parser."
72 (multiple-value-bind (result win consumedp) (funcall parser)
012554e1
MW
73 (cond ((or win
74 (and (not consumedp)
75 (or ignore-unconsumed
76 (scanner-at-eof-p scanner))))
77 ;; If we succeeded, or if we didn't consume any tokens and the
78 ;; caller's OK with that, then there's nothing for us to do here.
79 ;; On the other hand, if we failed, didn't consume any tokens, and
80 ;; we're at end-of-file, then there's not much hope of making
81 ;; onward progress, so in this case we propagate the failure
82 ;; rather than trying to recover. And we assume that the
83 ;; continuation will somehow arrange to report the problem, and
84 ;; avoid inundating the user with error reports.
048d0b2d
MW
85 (values result win consumedp))
86 (t
87 ;; Now we have to do some kind of sensible error recovery. The
88 ;; important thing to do here is to make sure that we make some
89 ;; progress. If we consumed any tokens then we're fine, and we'll
90 ;; just try the provided recovery strategy. Otherwise, if we're
91 ;; not at EOF, then we can ensure progress by discarding the
92 ;; current token. Finally, if we are at EOF then our best bet is
93 ;; simply to propagate the current failure back to the caller, but
94 ;; we handled that case above.
450a4be6 95 (syntax-error scanner result)
b5911ce8 96 (when action (funcall action))
450a4be6 97 (when (and force-progress (not consumedp)) (scanner-step scanner))
048d0b2d
MW
98 (funcall recover)))))
99
239fa5bd
MW
100;;;--------------------------------------------------------------------------
101;;; Token scanning.
102
103(defmethod scanner-token ((scanner sod-token-scanner))
104 (with-slots (char-scanner line column) scanner
105 (with-parser-context (character-scanner-context :scanner char-scanner)
106
107 (flet ((scan-digits (&key (radix 10) (min 1) (init 0))
108 ;; Scan and return a sequence of digits.
109 (parse (many (acc init (+ (* acc radix) it) :min min)
110 (label (list :digit radix)
111 (filter (lambda (ch)
e046c3f6
MW
112 (digit-char-p ch radix)))))))
113 (start-floc ()
114 ;; This is a little nasty. We scan the first token during
115 ;; instance initialization, as a result of `shared-initialize'
116 ;; on `token-scanner'. Unfortunately, this happens before
117 ;; we've had a chance to initialize our own `filename' slot.
118 ;; This means that we can't use the SCANNER as a file
119 ;; location, however tempting it might be. So we have this
120 ;; hack.
121 (make-file-location (scanner-filename char-scanner)
122 (scanner-line scanner)
123 (scanner-column scanner))))
239fa5bd
MW
124
125 ;; Skip initial junk, and remember the place.
126 (loop
127 (setf (scanner-line scanner) (scanner-line char-scanner)
128 (scanner-column scanner) (scanner-column char-scanner))
129 (cond-parse (:consumedp cp :expected exp)
130 ((satisfies whitespace-char-p) (parse :whitespace))
131 ((scan-comment char-scanner))
26c5ecfe 132 (t (if cp (lexer-error char-scanner exp) (return)))))
239fa5bd
MW
133
134 ;; Now parse something.
135 (cond-parse (:consumedp cp :expected exp)
136
137 ;; Alphanumerics mean we read an identifier.
138 ((or #\_ (satisfies alpha-char-p))
139 (values :id (with-output-to-string (out)
140 (write-char it out)
141 (parse (many (nil nil (write-char it out))
142 (or #\_ (satisfies alphanumericp)))))))
143
144 ;; Quotes introduce a literal.
145 ((seq ((quote (or #\" #\'))
146 (contents (many (out (make-string-output-stream)
147 (progn (write-char it out) out)
148 :final (get-output-stream-string out))
149 (or (and #\\ :any) (not quote))))
e046c3f6
MW
150 (nil (or (char quote)
151 (seq (:eof)
152 (lexer-error char-scanner (list quote))
153 (info-with-location
154 (start-floc) "Literal started here")))))
239fa5bd
MW
155 (ecase quote
156 (#\" contents)
157 (#\' (case (length contents)
158 (1 (char contents 0))
e046c3f6 159 (0 (cerror*-with-location (start-floc)
40d95de7
MW
160 'simple-lexer-error
161 :format-control
162 "Empty character literal")
e046c3f6
MW
163 #\?)
164 (t (cerror*-with-location (start-floc)
40d95de7
MW
165 'simple-lexer-error
166 :format-control
167 "Too many characters ~
168 in character literal")
239fa5bd
MW
169 (char contents 0))))))
170 (values (etypecase it
171 (character :char)
172 (string :string))
173 it))
174
175 ;; Zero introduces a chosen-radix integer.
176 ((and #\0
177 (or (and (or #\b #\B) (scan-digits :radix 2))
178 (and (or #\o #\O) (scan-digits :radix 8))
179 (and (or #\x #\X) (scan-digits :radix 16))
180 (scan-digits :radix 8 :min 0)))
181 (values :int it))
182
183 ;; Any other digit forces radix-10.
184 ((seq ((d (filter digit-char-p))
185 (i (scan-digits :radix 10 :min 0 :init d)))
186 i)
187 (values :int it))
188
189 ;; Some special punctuation sequences are single tokens.
190 ("..." (values :ellipsis nil))
191
192 ;; Any other character is punctuation.
193 (:any (values it nil))
194
195 ;; End of file means precisely that.
196 (:eof (values :eof nil))
197
198 ;; Report errors and try again. Because we must have consumed some
199 ;; input in order to get here (we've matched both :any and :eof) we
200 ;; must make progress on every call.
201 (t
202 (assert cp)
26c5ecfe 203 (lexer-error char-scanner exp)
239fa5bd 204 (scanner-token scanner)))))))
dea4d055
MW
205
206;;;----- That's all, folks --------------------------------------------------