5 ;;; (c) 2009 Straylight/Edgeware
8 ;;;----- Licensing notice ---------------------------------------------------
10 ;;; This file is part of the Simple Object Definition system.
12 ;;; SOD is free software; you can redistribute it and/or modify
13 ;;; it under the terms of the GNU General Public License as published by
14 ;;; the Free Software Foundation; either version 2 of the License, or
15 ;;; (at your option) any later version.
17 ;;; SOD is distributed in the hope that it will be useful,
18 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;;; GNU General Public License for more details.
22 ;;; You should have received a copy of the GNU General Public License
23 ;;; along with SOD; if not, write to the Free Software Foundation,
24 ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
28 ;;;--------------------------------------------------------------------------
29 ;;; Declaration specifiers.
31 ;;; This is a little messy. The C rules, which we're largely following,
32 ;;; allow declaration specifiers to be written in any oreder, and allows an
33 ;;; arbitrary number of the things. This is mainly an exercise in
34 ;;; book-keeping, but we make an effort to categorize the various kinds of
35 ;;; specifiers rather better than the C standard.
37 ;;; We consider four kinds of declaration specifiers:
39 ;;; * Type qualifiers: `const', `restrict', and `volatile'.
40 ;;; * Sign specifiers: `signed' and `unsigned'.
41 ;;; * Size specifiers: `short' and `long'.
42 ;;; * Type specifiers: `void', `char', `int', `float', and `double',
44 ;;; The C standard acknowledges the category of type qualifiers (6.7.3), but
45 ;;; groups the other three kinds together and calls them all `type
46 ;;; specifiers' (6.7.2).
48 ;; Let's not repeat ourselves.
49 (macrolet ((define-declaration-specifiers (&rest defs)
52 (hashvar (gensym "HASH"))
53 (keyvar (gensym "KEY"))
54 (valvar (gensym "VAL")))
56 (destructuring-bind (kind &rest clauses) def
57 (let ((maps (mapcar (lambda (clause)
60 (cons (string-downcase clause)
63 (push `(deftype ,(symbolicate 'decl- kind) ()
64 '(member ,@(mapcar #'cdr maps)))
66 (setf mappings (nconc (remove-if-not #'car maps)
70 (defparameter *declspec-map*
71 (let ((,hashvar (make-hash-table :test #'equal)))
72 (mapc (lambda (,keyvar ,valvar)
73 (setf (gethash ,keyvar ,hashvar) ,valvar))
74 ',(mapcar #'car mappings)
75 ',(mapcar #'cdr mappings))
77 (define-declaration-specifiers
78 (type :char :int :float :double :void)
79 (size :short :long (nil . :long-long))
80 (sign :signed :unsigned)
81 (qualifier :const :restrict :volatile)
82 (tagged :enum :struct :union)))
85 (:predicate declspecp))
86 "Represents a declaration specifier being built."
87 (qualifiers nil :type list)
88 (sign nil :type (or decl-sign null))
89 (size nil :type (or decl-size null))
90 (type nil :type (or decl-type c-type null)))
92 (defun check-declspec (spec)
93 "Check that the declaration specifiers in SPEC are a valid combination.
95 This is surprisingly hairy.
97 It could be even worse: at least validity is monotonic. Consider an
98 alternate language where `double' is a size specifier like `long' rather
99 than being a primary type specifier like `float' (so you'd be able to say
100 things like `long double float'). Then `long float' would be invalid, but
101 `long float double' would be OK. We'd therefore need an additional
102 argument to know whether we were preparing a final set of specifiers (in
103 which case we'd have to reject `long float') or whether this is an
104 intermediate step (in which case we'd have to tentatively allow it in the
105 hope that the user added the necessary `double' later)."
107 (let ((sign (declspec-sign spec))
108 (size (declspec-size spec))
109 (type (declspec-type spec)))
111 (and (loop for (good-type good-signs good-sizes) in
113 ;; The entries in this table have the form (GOOD-TYPE
114 ;; GOOD-SIGNS GOOD-SIZES). The GOOD-TYPE is either a keyword
115 ;; or T (matches anything); the GOOD-SIZES and GOOD-SIGNS are
116 ;; lists. The SPEC must match at least one entry, as follows:
117 ;; the type must be NIL or match GOOD-TYPE; and the size and
118 ;; sign must match one of the elements of the corresponding
120 '((:int (nil :signed :unsigned) (nil :short :long :long-long))
121 (:char (nil :signed :unsigned) (nil))
122 (:double (nil) (nil :long))
125 thereis (and (or (eq type nil)
128 (member sign good-signs)
129 (member size good-sizes)))
132 (defun update-declspec-qualifiers (spec qual)
133 "Update the qualifiers in SPEC by adding QUAL.
135 The new declspec is returned if it's valid; otherwise NIL. SPEC is not
138 (let ((new (copy-declspec spec)))
139 (pushnew qual (declspec-qualifiers new))
140 (check-declspec new)))
142 (defun update-declspec-sign (spec sign)
143 "Update the signedness in SPEC to be SIGN.
145 The new declspec is returned if it's valid; otherwise NIL. SPEC is not
148 (and (null (declspec-sign spec))
149 (let ((new (copy-declspec spec)))
150 (setf (declspec-sign new) sign)
151 (check-declspec new))))
153 (defun update-declspec-size (spec size)
154 "Update the size in SPEC according to SIZE.
156 The new declspec is returned if it's valid; otherwise NIL. (This is a
157 little subtle because :LONG in particular can modify an existing size
158 entry.) SPEC is not modified."
160 (let ((new-size (case (declspec-size spec)
162 (:long (if (eq size :long) :long-long nil)))))
164 (let ((new (copy-declspec spec)))
165 (setf (declspec-size new) new-size)
166 (check-declspec new)))))
168 (defun update-declspec-type (spec type)
169 "Update the type in SPEC to be TYPE.
171 The new declspec is returned if it's valid; otherwise NIL. SPEC is not
174 (and (null (declspec-type spec))
175 (let ((new (copy-declspec spec)))
176 (setf (declspec-type new) type)
177 (check-declspec new))))
179 (defun canonify-declspec (spec)
180 "Transform the declaration specifiers SPEC into a canonical form.
182 The idea is that, however grim the SPEC, we can turn it into something
183 vaguely idiomatic, and pick precisely one of the possible synonyms.
185 The rules are that we suppress `signed' when it's redundant, and suppress
186 `int' if a size or signedness specifier is present. (Note that `signed
187 char' is not the same as `char', so stripping `signed' is only correct
188 when the type is `int'.)
190 The qualifiers are sorted and uniquified here; the relative ordering of
191 the sign/size/type specifiers will be determined by DECLSPEC-KEYWORDS."
193 (let ((quals (declspec-qualifiers spec))
194 (sign (declspec-sign spec))
195 (size (declspec-size spec))
196 (type (declspec-type spec)))
197 (cond ((eq type :int)
198 (when (eq sign :signed)
199 (setf (declspec-sign spec) nil))
201 (setf (declspec-type spec) nil)))
202 ((not (or sign size type))
203 (setf (declspec-type spec) :int)))
204 (setf (declspec-qualifiers spec)
205 (delete-duplicates (sort (copy-list quals) #'string<)))
208 (defun declspec-keywords (spec &optional qualsp)
209 "Return a list of strings for the declaration specifiers SPEC.
211 If QUALSP then return the type qualifiers as well."
213 (let ((quals (declspec-qualifiers spec))
214 (sign (declspec-sign spec))
215 (size (declspec-size spec))
216 (type (declspec-type spec)))
217 (nconc (and qualsp (mapcar #'string-downcase quals))
218 (and sign (list (string-downcase sign)))
221 (:long-long (list "long long"))
222 (t (list (string-downcase size))))
225 (keyword (list (string-downcase type)))
226 (simple-c-type (list (c-type-name type)))
227 (tagged-c-type (list (string-downcase (c-tagged-type-kind type))
228 (c-type-tag type)))))))
230 (defun declspec-c-type (spec)
231 "Return a C-TYPE object corresponding to SPEC."
232 (canonify-declspec spec)
233 (let* ((type (declspec-type spec))
234 (base (etypecase type
235 (symbol (make-simple-type
236 (format nil "~{~A~^ ~}"
237 (declspec-keywords spec))))
239 (qualify-type base (declspec-qualifiers spec))))
241 (defun declaration-specifier-p (lexer)
242 "Answer whether the current token might be a declaration specifier."
243 (and (eq (token-type lexer) :id)
244 (let ((id (token-value lexer)))
245 (or (gethash id *declspec-map*)
246 (gethash id *type-map*)))))
248 (defun parse-c-type (lexer)
249 "Parse declaration specifiers from LEXER and return a C-TYPE."
251 (let ((spec (make-declspec))
254 (flet ((token (&optional (ty (next-token lexer)))
257 (gethash (token-value lexer) *declspec-map*))
260 (let ((new (funcall func spec value)))
261 (cond (new (setf spec new))
262 (t (cerror* "Invalid declaration specifier ~(~A~) ~
263 following `~{~A~^ ~}' (ignored)"
264 (format-token tok (token-value lexer))
265 (declspec-keywords spec t))
267 (token (token-type lexer))
270 (decl-qualifier (update #'update-declspec-qualifiers tok))
271 (decl-sign (when (update #'update-declspec-sign tok)
273 (decl-size (when (update #'update-declspec-size tok)
275 (decl-type (when (update #'update-declspec-type tok)
277 (decl-tagged (let ((class (ecase tok
279 (:struct 'c-struct-type)
280 (:union 'c-union-type))))
281 (let ((tag (require-token lexer :id)))
283 (update #'update-declspec-type
284 (make-instance class :tag tag))))))
285 ((eql :id) (let ((ty (gethash (token-value lexer) *type-map*)))
286 (when (or found-any (not ty))
288 (when (update #'update-declspec-type ty)
289 (setf found-any t))))
293 (cerror* "Missing type name (guessing at `int')"))
294 (declspec-c-type spec))))
296 ;;;--------------------------------------------------------------------------
297 ;;; Parsing declarators.
299 ;;; This is a whole different ball game. The syntax is simple enough, but
300 ;;; the semantics is inside-out in a particularly unpleasant way.
302 ;;; The basic idea is that declarator operators closer to the identifier (or
303 ;;; where the identifier would be) should be applied last (with postfix
304 ;;; operators being considered `closer' than prefix).
306 ;;; One might thing that we can process prefix operators immediately. For
307 ;;; outer prefix operators, this is indeed correct, but in `int (*id)[]', for
308 ;;; example, we must wait to process the array before applying the pointer.
310 ;;; We can translate each declarator operator into a function which, given a
311 ;;; type, returns the appropriate derived type. If we can arrange these
312 ;;; functions in the right order during the parse, we have only to compose
313 ;;; them together and apply them to the base type in order to finish the job.
315 ;;; Consider the following skeletal declarator, with <> as a parenthesized
316 ;;; subdeclarator within.
318 ;;; * * <> [] [] ---> a b d c z
321 ;;; The algorithm is therefore as follows. We first read the prefix
322 ;;; operators, translate them into closures, and push them onto a list. Each
323 ;;; parenthesized subdeclarator gets its own list, and we push those into a
324 ;;; stack each time we encounter a `('. We then parse the middle bit, which
325 ;;; is a little messy (see the comment there), and start an empty final list
326 ;;; of operators. Finally, we scan postfix operators; these get pushed onto
327 ;;; the front of the operator list as we find them. Each time we find a `)',
328 ;;; we reverse the current prefix-operators list, and attach it to the front
329 ;;; of the operator list, and pop a new prefix list off the stack: at this
330 ;;; point, the operator list reflects the type of the subdeclarator we've
331 ;;; just finished. Eventually we should reach the end with an empty stack
332 ;;; and a prefix list, which again we reverse and attach to the front of the
335 ;;; Finally, we apply the operator functions in order.
337 (defun parse-c-declarator (lexer type &key abstractp dottedp)
338 "Parse a declarator. Return two values: the complete type, and the name.
340 Parse a declarator from LEXER. The base type is given by TYPE. If
341 ABSTRACTP is NIL, then require a name; if T then forbid a name; if :MAYBE
342 then don't care either way. If no name is given, return NIL.
344 If DOTTEDP then the name may be a dotted item name `NICK.NAME', returned
345 as a cons (NICK . NAME)."
352 ;; Scan prefix operators.
354 (case (token-type lexer)
356 ;; Star: a pointer type.
357 (#\* (let ((quals nil)
358 (tok (next-token lexer)))
360 ;; Gather following qualifiers.
363 ((:const :volatile :restrict)
368 ;; And stash the item.
369 (setf quals (sort quals #'string<))
371 (make-instance 'c-pointer-type
376 ;; An open-paren: start a new level of nesting. Maybe. There's an
377 ;; unpleasant ambiguity (DR9, DR249) between a parenthesized
378 ;; subdeclarator and a postfix function argument list following an
379 ;; omitted name. If the next thing looks like it might appear as a
380 ;; declaration specifier then assume it is one, push the paren back,
381 ;; and leave; do the same if the parens are empty, because that's not
382 ;; allowed otherwise.
383 (#\( (let ((tok (next-token lexer)))
386 (declaration-specifier-p lexer)))
387 (pushback-token lexer #\()
392 ;; Anything else: we're done.
395 ;; We're now at the middle of the declarator. If there's an item name
396 ;; here, we want to snarf it.
397 (when (and (not (eq abstractp t))
398 (eq (token-type lexer) :id))
399 (let ((name (token-value lexer)))
402 (eq (token-type lexer) #\.))
403 (let ((sub (require-token :id :default (gensym))))
404 (setf item (cons name sub))))
408 ;; If we were meant to have a name, but weren't given one, make one up.
409 (when (and (null item)
411 (cerror* "Missing name; inventing one")
412 (setf item (gensym)))
414 ;; Finally scan the postfix operators.
416 (case (token-type lexer)
418 ;; Open-bracket: an array. The dimensions are probably some
419 ;; gods-awful C expressions which we'll just tuck away rather than
420 ;; thinking about too carefully. Our representation of C types is
421 ;; capable of thinking about multidimensional arrays, so we slurp up
422 ;; as many dimensions as we can.
423 (#\[ (let ((dims nil))
425 (let* ((frag (scan-c-fragment lexer '(#\])))
426 (dim (c-fragment-text frag)))
427 (push (if (plusp (length dim)) dim nil) dims))
429 (unless (eq (next-token lexer) #\[)
431 (setf dims (nreverse dims))
433 (when (typep ty 'c-function-type)
434 (error "Array element type cannot be ~
436 (make-instance 'c-array-type
441 ;; Open-paren: a function with arguments.
442 (#\( (let ((args nil))
443 (unless (eql (next-token lexer) #\))
446 ;; Grab an argument and stash it.
447 (cond ((eql (token-type lexer) :ellipsis)
448 (push :ellipsis args))
450 (let ((base-type (parse-c-type lexer)))
451 (multiple-value-bind (type name)
452 (parse-c-declarator lexer base-type
454 (push (make-argument name type) args)))))
456 ;; Decide whether to take another one.
457 (case (token-type lexer)
459 (#\, (next-token lexer))
460 (t (cerror* "Missing `)' inserted before ~A"
461 (format-token lexer))
465 ;; Catch: if the only thing in the list is `void' (with no
466 ;; identifier) then kill the whole thing.
470 (eq (argument-type (car args)) (c-type void))
471 (not (argument-name (car args))))
475 ;; Stash the operator.
477 (when (typep ty '(or c-function-type c-array-type))
478 (error "Function return type cannot be ~
479 a function or array type"))
480 (make-instance 'c-function-type
485 ;; Close-paren: exit a level of nesting. Prepend the current prefix
486 ;; list and pop a new level. If there isn't one, this isn't our
487 ;; paren, so we're done.
490 (setf ops (nreconc prefix ops)
494 ;; Anything else means we've finished.
497 ;; If we still have operators stacked then something went wrong.
498 (setf ops (nreconc prefix ops))
500 (cerror* "Missing `)'(s) inserted before ~A"
501 (format-token lexer))
502 (dolist (prefix stack)
503 (setf ops (nreconc prefix ops))))
505 ;; Finally, grind through the list of operations.
506 (do ((ops ops (cdr ops))
507 (type type (funcall (car ops) type)))
508 ((endp ops) (values type item)))))
510 ;;;--------------------------------------------------------------------------
514 (with-input-from-string (in "
515 // int stat(struct stat *st)
517 int vsnprintf(size_t n, char *buf, va_list ap)
519 // int (*signal(int sig, int (*handler)(int s)))(int t)
521 (let* ((stream (make-instance 'position-aware-input-stream
524 (lex (make-instance 'sod-lexer :stream stream)))
527 (let ((ty (parse-c-type lex)))
528 (multiple-value-bind (type name) (parse-c-declarator lex ty)
531 (with-output-to-string (out)
532 (pprint-c-type type out name)
533 (format-token lex)))))))
535 ;;;----- That's all, folks --------------------------------------------------