Commit | Line | Data |
---|---|---|
dea4d055 MW |
1 | ;;; -*-lisp-*- |
2 | ;;; | |
3 | ;;; Implementation of lexical analysis protocol. | |
4 | ;;; | |
5 | ;;; (c) 2009 Straylight/Edgeware | |
6 | ;;; | |
7 | ||
8 | ;;;----- Licensing notice --------------------------------------------------- | |
9 | ;;; | |
e0808c47 | 10 | ;;; This file is part of the Sensible Object Design, an object system for C. |
dea4d055 MW |
11 | ;;; |
12 | ;;; SOD is free software; you can redistribute it and/or modify | |
13 | ;;; it under the terms of the GNU General Public License as published by | |
14 | ;;; the Free Software Foundation; either version 2 of the License, or | |
15 | ;;; (at your option) any later version. | |
16 | ;;; | |
17 | ;;; SOD is distributed in the hope that it will be useful, | |
18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;;; GNU General Public License for more details. | |
21 | ;;; | |
22 | ;;; You should have received a copy of the GNU General Public License | |
23 | ;;; along with SOD; if not, write to the Free Software Foundation, | |
24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
25 | ||
26 | (cl:in-package #:sod) | |
27 | ||
28 | ;;;-------------------------------------------------------------------------- | |
239fa5bd | 29 | ;;; Class implementation. |
dea4d055 | 30 | |
239fa5bd MW |
31 | (defmethod shared-initialize :after |
32 | ((scanner sod-token-scanner) slot-names &key) | |
33 | (default-slot (scanner 'sod-parser::filename slot-names) | |
34 | (scanner-filename (token-scanner-char-scanner scanner)))) | |
dea4d055 | 35 | |
239fa5bd MW |
36 | (defmethod make-scanner-stream ((scanner sod-token-scanner)) |
37 | (make-scanner-stream (token-scanner-char-scanner scanner))) | |
dea4d055 MW |
38 | |
39 | ;;;-------------------------------------------------------------------------- | |
239fa5bd | 40 | ;;; Indicators and error messages. |
dea4d055 | 41 | |
40d95de7 MW |
42 | (defun show-char (char) |
43 | "Format CHAR as a string in a readable way." | |
44 | (cond ((null char) "<end-of-file>") | |
239fa5bd | 45 | ((and (graphic-char-p char) (char/= char #\space)) |
40d95de7 MW |
46 | (format nil "`~C'" char)) |
47 | (t (format nil "<~(~:C~)>" char)))) | |
dea4d055 | 48 | |
d0be8052 MW |
49 | (defun %skip-until (scanner token-types |
50 | &key (keep-end (not (null (cdr token-types))))) | |
048d0b2d MW |
51 | "This is the implementation of the `skip-until' parser." |
52 | (do ((consumedp nil t)) | |
1c2db39a MW |
53 | ((let ((type (token-type scanner)) |
54 | (value (token-value scanner))) | |
55 | (some (lambda (spec) | |
56 | (multiple-value-bind (want-type want-value) | |
57 | (cond ((listp spec) (values (car spec) (cadr spec))) | |
58 | (t (values spec t))) | |
59 | (and (eq want-type type) | |
60 | (or (eq want-value t) | |
61 | (equal want-value value))))) | |
62 | token-types)) | |
048d0b2d MW |
63 | (unless keep-end (scanner-step scanner)) |
64 | (values nil t (or keep-end consumedp))) | |
65 | (when (scanner-at-eof-p scanner) | |
66 | (return (values token-types nil consumedp))) | |
67 | (scanner-step scanner))) | |
68 | ||
450a4be6 | 69 | (defun parse-error-recover (scanner parser recover |
b5911ce8 | 70 | &key ignore-unconsumed force-progress action) |
048d0b2d MW |
71 | "This is the implementation of the `error' parser." |
72 | (multiple-value-bind (result win consumedp) (funcall parser) | |
012554e1 MW |
73 | (cond ((or win |
74 | (and (not consumedp) | |
75 | (or ignore-unconsumed | |
76 | (scanner-at-eof-p scanner)))) | |
77 | ;; If we succeeded, or if we didn't consume any tokens and the | |
78 | ;; caller's OK with that, then there's nothing for us to do here. | |
79 | ;; On the other hand, if we failed, didn't consume any tokens, and | |
80 | ;; we're at end-of-file, then there's not much hope of making | |
81 | ;; onward progress, so in this case we propagate the failure | |
82 | ;; rather than trying to recover. And we assume that the | |
83 | ;; continuation will somehow arrange to report the problem, and | |
84 | ;; avoid inundating the user with error reports. | |
048d0b2d MW |
85 | (values result win consumedp)) |
86 | (t | |
87 | ;; Now we have to do some kind of sensible error recovery. The | |
88 | ;; important thing to do here is to make sure that we make some | |
89 | ;; progress. If we consumed any tokens then we're fine, and we'll | |
90 | ;; just try the provided recovery strategy. Otherwise, if we're | |
91 | ;; not at EOF, then we can ensure progress by discarding the | |
92 | ;; current token. Finally, if we are at EOF then our best bet is | |
93 | ;; simply to propagate the current failure back to the caller, but | |
94 | ;; we handled that case above. | |
450a4be6 | 95 | (syntax-error scanner result) |
b5911ce8 | 96 | (when action (funcall action)) |
450a4be6 | 97 | (when (and force-progress (not consumedp)) (scanner-step scanner)) |
048d0b2d MW |
98 | (funcall recover))))) |
99 | ||
239fa5bd MW |
100 | ;;;-------------------------------------------------------------------------- |
101 | ;;; Token scanning. | |
102 | ||
103 | (defmethod scanner-token ((scanner sod-token-scanner)) | |
104 | (with-slots (char-scanner line column) scanner | |
105 | (with-parser-context (character-scanner-context :scanner char-scanner) | |
106 | ||
107 | (flet ((scan-digits (&key (radix 10) (min 1) (init 0)) | |
108 | ;; Scan and return a sequence of digits. | |
109 | (parse (many (acc init (+ (* acc radix) it) :min min) | |
110 | (label (list :digit radix) | |
111 | (filter (lambda (ch) | |
e046c3f6 MW |
112 | (digit-char-p ch radix))))))) |
113 | (start-floc () | |
114 | ;; This is a little nasty. We scan the first token during | |
115 | ;; instance initialization, as a result of `shared-initialize' | |
116 | ;; on `token-scanner'. Unfortunately, this happens before | |
117 | ;; we've had a chance to initialize our own `filename' slot. | |
118 | ;; This means that we can't use the SCANNER as a file | |
119 | ;; location, however tempting it might be. So we have this | |
120 | ;; hack. | |
121 | (make-file-location (scanner-filename char-scanner) | |
122 | (scanner-line scanner) | |
123 | (scanner-column scanner)))) | |
239fa5bd MW |
124 | |
125 | ;; Skip initial junk, and remember the place. | |
126 | (loop | |
127 | (setf (scanner-line scanner) (scanner-line char-scanner) | |
128 | (scanner-column scanner) (scanner-column char-scanner)) | |
129 | (cond-parse (:consumedp cp :expected exp) | |
130 | ((satisfies whitespace-char-p) (parse :whitespace)) | |
131 | ((scan-comment char-scanner)) | |
26c5ecfe | 132 | (t (if cp (lexer-error char-scanner exp) (return))))) |
239fa5bd MW |
133 | |
134 | ;; Now parse something. | |
135 | (cond-parse (:consumedp cp :expected exp) | |
136 | ||
137 | ;; Alphanumerics mean we read an identifier. | |
138 | ((or #\_ (satisfies alpha-char-p)) | |
139 | (values :id (with-output-to-string (out) | |
140 | (write-char it out) | |
141 | (parse (many (nil nil (write-char it out)) | |
142 | (or #\_ (satisfies alphanumericp))))))) | |
143 | ||
144 | ;; Quotes introduce a literal. | |
145 | ((seq ((quote (or #\" #\')) | |
146 | (contents (many (out (make-string-output-stream) | |
147 | (progn (write-char it out) out) | |
148 | :final (get-output-stream-string out)) | |
149 | (or (and #\\ :any) (not quote)))) | |
e046c3f6 MW |
150 | (nil (or (char quote) |
151 | (seq (:eof) | |
152 | (lexer-error char-scanner (list quote)) | |
153 | (info-with-location | |
154 | (start-floc) "Literal started here"))))) | |
239fa5bd MW |
155 | (ecase quote |
156 | (#\" contents) | |
157 | (#\' (case (length contents) | |
158 | (1 (char contents 0)) | |
e046c3f6 | 159 | (0 (cerror*-with-location (start-floc) |
40d95de7 MW |
160 | 'simple-lexer-error |
161 | :format-control | |
162 | "Empty character literal") | |
e046c3f6 MW |
163 | #\?) |
164 | (t (cerror*-with-location (start-floc) | |
40d95de7 MW |
165 | 'simple-lexer-error |
166 | :format-control | |
167 | "Too many characters ~ | |
168 | in character literal") | |
239fa5bd MW |
169 | (char contents 0)))))) |
170 | (values (etypecase it | |
171 | (character :char) | |
172 | (string :string)) | |
173 | it)) | |
174 | ||
175 | ;; Zero introduces a chosen-radix integer. | |
176 | ((and #\0 | |
177 | (or (and (or #\b #\B) (scan-digits :radix 2)) | |
178 | (and (or #\o #\O) (scan-digits :radix 8)) | |
179 | (and (or #\x #\X) (scan-digits :radix 16)) | |
180 | (scan-digits :radix 8 :min 0))) | |
181 | (values :int it)) | |
182 | ||
183 | ;; Any other digit forces radix-10. | |
184 | ((seq ((d (filter digit-char-p)) | |
185 | (i (scan-digits :radix 10 :min 0 :init d))) | |
186 | i) | |
187 | (values :int it)) | |
188 | ||
189 | ;; Some special punctuation sequences are single tokens. | |
190 | ("..." (values :ellipsis nil)) | |
191 | ||
192 | ;; Any other character is punctuation. | |
193 | (:any (values it nil)) | |
194 | ||
195 | ;; End of file means precisely that. | |
196 | (:eof (values :eof nil)) | |
197 | ||
198 | ;; Report errors and try again. Because we must have consumed some | |
199 | ;; input in order to get here (we've matched both :any and :eof) we | |
200 | ;; must make progress on every call. | |
201 | (t | |
202 | (assert cp) | |
26c5ecfe | 203 | (lexer-error char-scanner exp) |
239fa5bd | 204 | (scanner-token scanner))))))) |
dea4d055 MW |
205 | |
206 | ;;;----- That's all, folks -------------------------------------------------- |