chiark / gitweb /
src/lexer-proto.lisp: New parser `must'.
[sod] / src / lexer-proto.lisp
CommitLineData
dea4d055
MW
1;;; -*-lisp-*-
2;;;
3;;; Protocol for lexical analysis
4;;;
5;;; (c) 2009 Straylight/Edgeware
6;;;
7
8;;;----- Licensing notice ---------------------------------------------------
9;;;
e0808c47 10;;; This file is part of the Sensible Object Design, an object system for C.
dea4d055
MW
11;;;
12;;; SOD is free software; you can redistribute it and/or modify
13;;; it under the terms of the GNU General Public License as published by
14;;; the Free Software Foundation; either version 2 of the License, or
15;;; (at your option) any later version.
16;;;
17;;; SOD is distributed in the hope that it will be useful,
18;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;;; GNU General Public License for more details.
21;;;
22;;; You should have received a copy of the GNU General Public License
23;;; along with SOD; if not, write to the Free Software Foundation,
24;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26(cl:in-package #:sod)
27
28;;;--------------------------------------------------------------------------
239fa5bd 29;;; Class definition.
dea4d055 30
239fa5bd
MW
31(export 'sod-token-scanner)
32(defclass sod-token-scanner (token-scanner)
33 ((char-scanner :initarg :char-scanner :reader token-scanner-char-scanner))
dea4d055 34 (:documentation
239fa5bd 35 "A token scanner for SOD input files.
dea4d055 36
239fa5bd
MW
37 Not a lot here, apart from a character scanner to read from and the
38 standard token scanner infrastructure."))
dea4d055 39
dea4d055 40;;;--------------------------------------------------------------------------
239fa5bd
MW
41;;; Indicators and error messages.
42
1d8cc67a
MW
43(defvar *indicator-map* (make-hash-table)
44 "Hash table mapping indicator objects to human-readable descriptions.")
45
239fa5bd
MW
46(export 'define-indicator)
47(defun define-indicator (indicator description)
48 "Associate an INDICATOR with its textual DESCRIPTION.
49
50 Updates the the `*indicator-map*'."
51 (setf (gethash indicator *indicator-map*) description)
52 indicator)
53
54(export 'syntax-error)
d63df20a 55(defun syntax-error (scanner expected &key (continuep t) location)
239fa5bd
MW
56 "Signal a (maybe) continuable syntax error."
57 (labels ((show-token (type value)
58 (if (characterp type)
59 (format nil "~/sod::show-char/" type)
60 (case type
61 (:id (format nil "<identifier~@[ `~A'~]>" value))
048d0b2d 62 (:int "<integer-literal>")
239fa5bd
MW
63 (:string "<string-literal>")
64 (:char "<character-literal>")
65 (:eof "<end-of-file>")
66 (:ellipsis "`...'")
67 (t (format nil "<? ~S~@[ ~S~]>" type value)))))
68 (show-expected (thing)
69 (acond ((gethash thing *indicator-map*) it)
70 ((atom thing) (show-token thing nil))
71 ((eq (car thing) :id)
72 (format nil "`~A'" (cadr thing)))
73 (t (format nil "<? ~S>" thing)))))
d63df20a
MW
74 (funcall (if continuep #'cerror*-with-location #'error-with-location)
75 (or location scanner)
239fa5bd
MW
76 "Syntax error: ~
77 expected ~{~#[<bug>~;~A~;~A or ~A~:;~A, ~]~} ~
78 but found ~A"
79 (mapcar #'show-expected expected)
80 (show-token (token-type scanner) (token-value scanner)))))
81
82(export 'lexer-error)
d63df20a 83(defun lexer-error (char-scanner expected &key location)
239fa5bd 84 "Signal a continuable lexical error."
d63df20a
MW
85 (cerror*-with-location (or location char-scanner)
86 "Lexical error: ~
7524b4b2 87 ~:[unexpected~;~
3c46cb3a
MW
88 expected ~:*~{~#[~;~A~;~A or ~A~:;~
89 ~@{~A, ~#[~;or ~A~]~}~]~} ~
7524b4b2
MW
90 but found~] ~
91 ~/sod::show-char/"
239fa5bd
MW
92 (mapcar (lambda (exp)
93 (typecase exp
94 (character (format nil "~/sod::show-char/" exp))
95 (string (format nil "`~A'" exp))
96 ((cons (eql :digit) *) (format nil "<radix-~A digit>"
97 (cadr exp)))
98 ((eql :eof) "<end-of-file>")
99 ((eql :any) "<character>")
100 (t (format nil "<? ~S>" exp))))
101 expected)
102 (and (not (scanner-at-eof-p char-scanner))
26c5ecfe 103 (scanner-current-char char-scanner))))
dea4d055 104
33b5686f 105(export 'skip-until)
048d0b2d
MW
106(defparse skip-until (:context (context token-scanner-context)
107 (&key (keep-end nil keep-end-p))
108 &rest token-types)
109 "Discard tokens until we find one listed in TOKEN-TYPES.
110
111 If KEEP-END is true then retain the found token for later; otherwise
112 discard it. KEEP-END defaults to true if multiple TOKEN-TYPES are given;
113 otherwise false. If end-of-file is encountered then the indicator list is
114 simply the list of TOKEN-TYPES; otherwise the result is `nil'."
115 `(skip-until ,(parser-scanner context)
116 (list ,@token-types)
117 :keep-end ,(if keep-end-p keep-end
118 (> (length token-types) 1))))
119
33b5686f 120(export 'error)
048d0b2d 121(defparse error (:context (context token-scanner-context)
450a4be6 122 (&key ignore-unconsumed force-progress)
012554e1 123 sub &optional (recover t))
048d0b2d
MW
124 "Try to parse SUB; if it fails then report an error, and parse RECOVER.
125
126 This is the main way to recover from errors and continue parsing. Even
127 then, it's not especially brilliant.
128
129 If the SUB parser succeeds then just propagate its result: it's like we
130 were never here. Otherwise, try to recover in a sensible way so we can
131 continue parsing. The details of this recovery are subject to change, but
132 the final action is generally to invoke the RECOVER parser and return its
012554e1
MW
133 result.
134
135 If IGNORE-UNCONSUMED evaluates non-nil, then just propagate a failure of
136 SUB if it didn't consume input. (This makes it suitable for use where the
137 parser containing `error' might be optional.)"
048d0b2d
MW
138 `(parse-error-recover ,(parser-scanner context)
139 (parser () ,sub)
012554e1 140 (parser () ,recover)
450a4be6
MW
141 :ignore-unconsumed ,ignore-unconsumed
142 :force-progress ,force-progress))
048d0b2d 143
ae7a3c8f
MW
144(export 'must)
145(defparse must (:context (context token-scanner-context)
146 sub &optional default)
147 "Try to parse SUB; if it fails, report an error, and return DEFAULT.
148
149 This parser can't actually fail."
150 `(parse (error () ,sub (t ,default))))
151
dea4d055 152;;;--------------------------------------------------------------------------
239fa5bd
MW
153;;; Lexical analysis utilities.
154
33b5686f 155(export 'scan-comment)
239fa5bd
MW
156(defun scan-comment (char-scanner)
157 "Scan a comment (either `/* ... */' or `// ...') from CHAR-SCANNER.
158
159 The result isn't interesting."
160 (with-parser-context (character-scanner-context :scanner char-scanner)
e046c3f6
MW
161 (let ((start (file-location char-scanner)))
162 (parse (or (and "/*"
163 (lisp (let ((state nil))
164 (loop (cond ((scanner-at-eof-p char-scanner)
165 (lexer-error char-scanner
166 (list "*/"))
167 (info-with-location
168 start "Comment started here")
169 (return (values nil t t)))
170 ((char= (scanner-current-char
171 char-scanner)
172 #\*)
173 (setf state '*)
174 (scanner-step char-scanner))
175 ((and (eq state '*)
176 (char= (scanner-current-char
177 char-scanner)
178 #\/))
179 (scanner-step char-scanner)
180 (return (values nil t t)))
181 (t
182 (setf state nil)
183 (scanner-step char-scanner)))))))
184 (and "//"
185 (skip-many () (not #\newline))
186 (? #\newline)))))))
dea4d055
MW
187
188;;;----- That's all, folks --------------------------------------------------