3 ;;; Lexical analysis for input parser
5 ;;; (c) 2009 Straylight/Edgeware
8 ;;;----- Licensing notice ---------------------------------------------------
10 ;;; This file is part of the Sensble Object Design, an object system for C.
12 ;;; SOD is free software; you can redistribute it and/or modify
13 ;;; it under the terms of the GNU General Public License as published by
14 ;;; the Free Software Foundation; either version 2 of the License, or
15 ;;; (at your option) any later version.
17 ;;; SOD is distributed in the hope that it will be useful,
18 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;;; GNU General Public License for more details.
22 ;;; You should have received a copy of the GNU General Public License
23 ;;; along with SOD; if not, write to the Free Software Foundation,
24 ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
28 ;;;--------------------------------------------------------------------------
31 (export 'sod-token-scanner)
32 (defclass sod-token-scanner (token-scanner)
33 ((char-scanner :initarg :char-scanner :reader token-scanner-char-scanner))
35 "A token scanner for SOD input files.
37 Not a lot here, apart from a character scanner to read from and the
38 standard token scanner infrastructure."))
40 ;;;--------------------------------------------------------------------------
43 (defun show-char (stream char &optional colonp atsignp)
44 "Format CHAR to STREAM in a readable way.
46 Usable in `format''s ~/.../ command."
47 (declare (ignore colonp atsignp))
48 (cond ((null char) (write-string "<eof>" stream))
49 ((and (graphic-char-p char) (char/= char #\space))
50 (format stream "`~C'" char))
51 (t (format stream "<~(~:C~)>" char))))
53 (defun scan-comment (scanner)
54 "Scan a comment (either `/* ... */' or `// ...') from SCANNER.
56 The result isn't interesting."
57 (with-parser-context (character-scanner-context :scanner scanner)
60 (and (skip-many () (not #\*))
61 (label "*/" (skip-many (:min 1) #\*)))
65 (skip-many () (not #\newline))
68 ;;;--------------------------------------------------------------------------
71 (export 'syntax-error)
72 (defun syntax-error (scanner expected &key (continuep t))
73 "Signal a (maybe) continuable syntax error."
74 (labels ((show-token (type value)
76 (format nil "~/sod::show-char/" type)
78 (:id (format nil "<identifier~@[ `~A'~]>" value))
79 (:string "<string-literal>")
80 (:char "<character-literal>")
81 (:eof "<end-of-file>")
83 (t (format nil "<? ~S~@[ ~S~]>" type value)))))
84 (show-expected (thing)
85 (cond ((atom thing) (show-token thing nil))
87 (format nil "`~A'" (cadr thing)))
88 (t (format nil "<? ~S>" thing)))))
89 (funcall (if continuep #'cerror* #'error)
91 expected ~{#[<bug>~;~A~;~A or ~A~:;~A, ~]~} ~
93 (mapcar #'show-expected expected)
94 (show-token (token-type scanner) (token-value scanner)))))
96 ;;;--------------------------------------------------------------------------
97 ;;; Token scanner protocol implementation.
99 (defmethod scanner-token ((scanner sod-token-scanner))
100 (with-slots (char-scanner line column) scanner
101 (with-parser-context (character-scanner-context :scanner char-scanner)
103 (flet ((scan-digits (&key (radix 10) (min 1) (init 0))
104 ;; Scan an return a sequence of digits.
105 (parse (many (acc init (+ (* acc radix) it) :min min)
106 (label (list :digit radix)
108 (digit-char-p ch radix)))))))
110 (lexer-error (expected consumedp)
111 ;; Report a lexical error.
112 (cerror* "Lexical error: ~
113 expected ~{~#[<bug>~;~A~;~A or ~A~;:~A, ~]~} ~
114 but found ~/sod::show-char/~
116 (mapcar (lambda (exp)
119 (format nil "~/sod::show-char/" exp))
120 (string (format nil "`~A'" exp))
121 ((cons (eql :digit) *)
122 (format nil "<radix-~A digit>"
124 ((eql :eof) "<end-of-file>")
125 ((eql :any) "<character>")
126 (t (format nil "<? ~S>" exp))))
128 (and (not (scanner-at-eof-p char-scanner))
129 (scanner-current-char char-scanner))
130 (and consumedp (file-location char-scanner)))))
132 ;; Skip initial junk, and remember the place.
134 (setf (scanner-line scanner) (scanner-line char-scanner)
135 (scanner-column scanner) (scanner-column char-scanner))
136 (cond-parse (:consumedp cp :expected exp)
137 ((satisfies whitespace-char-p) (parse :whitespace))
138 ((scan-comment char-scanner))
139 (t (if cp (lexer-error exp cp) (return)))))
141 ;; Now parse something.
142 (cond-parse (:consumedp cp :expected exp)
144 ;; Alphanumerics mean we read an identifier.
145 ((or #\_ (satisfies alpha-char-p))
146 (values :id (with-output-to-string (out)
148 (parse (many (nil nil (write-char it out))
149 (or #\_ (satisfies alphanumericp)))))))
151 ;; Quotes introduce a literal.
152 ((seq ((quote (or #\" #\'))
153 (contents (many (out (make-string-output-stream)
154 (progn (write-char it out) out)
155 :final (get-output-stream-string out))
156 (or (and #\\ :any) (not quote))))
160 (#\' (case (length contents)
161 (1 (char contents 0))
162 (0 (cerror* "Empty character literal") #\?)
163 (t (cerror* "Too many characters in literal")
164 (char contents 0))))))
165 (values (etypecase it
170 ;; Zero introduces a chosen-radix integer.
172 (or (and (or #\b #\B) (scan-digits :radix 2))
173 (and (or #\o #\O) (scan-digits :radix 8))
174 (and (or #\x #\X) (scan-digits :radix 16))
175 (scan-digits :radix 8 :min 0)))
178 ;; Any other digit forces radix-10.
179 ((seq ((d (filter digit-char-p))
180 (i (scan-digits :radix 10 :min 0 :init d)))
184 ;; Some special punctuation sequences are single tokens.
185 ("..." (values :ellipsis nil))
187 ;; Any other character is punctuation.
188 (:any (values it nil))
190 ;; End of file means precisely that.
191 (:eof (values :eof nil))
193 ;; Report errors and try again. Because we must have consumed some
194 ;; input in order to get here (we've matched both :any and :eof) we
195 ;; must make progress on every call.
196 (t (assert cp) (lexer-error exp cp) (scanner-token scanner)))))))
198 ;;;----- That's all, folks --------------------------------------------------