;;; -*-lisp-*- ;;; ;;; Parser for C types ;;; ;;; (c) 2009 Straylight/Edgeware ;;; ;;;----- Licensing notice --------------------------------------------------- ;;; ;;; This file is part of the Sensible Object Design, an object system for C. ;;; ;;; SOD is free software; you can redistribute it and/or modify ;;; it under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 2 of the License, or ;;; (at your option) any later version. ;;; ;;; SOD is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with SOD; if not, write to the Free Software Foundation, ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. (cl:in-package #:sod) ;;;-------------------------------------------------------------------------- ;;; Declaration specifiers. ;;; ;;; This stuff is distressingly complicated. ;;; ;;; Parsing a (single) declaration specifier is quite easy, and a declaration ;;; is just a sequence of these things. Except that there are a stack of ;;; rules about which ones are allowed to go together, and the language ;;; doesn't require them to appear in any particular order. ;;; ;;; A collection of declaration specifiers is carried about in a purpose-made ;;; object with a number of handy operations defined on it, and then I build ;;; some parsers in terms of them. The basic strategy is to parse ;;; declaration specifiers while they're valid, and keep track of what we've ;;; read. When I've reached the end, we'll convert what we've got into a ;;; `canonical form', and then convert that into a C type object of the ;;; appropriate kind. The whole business is rather more complicated than it ;;; really ought to be. ;; Firstly, a table of interesting things about the various declaration ;; specifiers that I might encounter. I categorize declaration specifiers ;; into four kinds. ;; ;; * `Type specifiers' describe the actual type, whether that's integer, ;; character, floating point, or some tagged or user-named type. ;; ;; * `Size specifiers' distinguish different sizes of the same basic type. ;; This is how we tell the difference between `int' and `long'. ;; ;; * `Sign specifiers' distinguish different signednesses. This is how we ;; tell the difference between `int' and `unsigned'. ;; ;; * `Qualifiers' are our old friends `const', `restrict' and `volatile'. ;; ;; These groupings are for my benefit here, in determining whether a ;; particular declaration specifier is valid in the current context. I don't ;; accept `function specifiers' (of which the only current example is ;; `inline') since it's meaningless to me. (defclass declspec () ;; Despite the fact that it looks pretty trivial, this can't be done with ;; `defstruct' for the simple reason that we add more methods to the ;; accessor functions later. ((label :type keyword :initarg :label :reader ds-label) (name :type string :initarg :name :reader ds-name) (kind :type (member type complexity sign size qualifier) :initarg :kind :reader ds-kind) (taggedp :type boolean :initarg :taggedp :initform nil :reader ds-taggedp)) (:documentation "Represents the important components of a declaration specifier. The only interesting instances of this class are in the table `*declspec-map*'.")) (defmethod shared-initialize :after ((ds declspec) slot-names &key) "If no name is provided then derive one from the label. Most declaration specifiers have simple names for which this works well." (default-slot (ds 'name slot-names) (string-downcase (ds-label ds)))) (defparameter *declspec-map* (let ((map (make-hash-table :test #'equal))) (dolist (item '((type :void :char :int :float :double (:bool :compat "_Bool")) (complexity (:complex :compat "_Complex") (:imaginary :compat "_Imaginary")) ((type :taggedp t) :enum :struct :union) (size :short :long (:long-long :name "long long")) (sign :signed :unsigned) (qualifier :const :restrict :volatile))) (destructuring-bind (kind &key (taggedp nil)) (let ((spec (car item))) (if (consp spec) spec (list spec))) (dolist (spec (cdr item)) (destructuring-bind (label &key (name (string-downcase label)) compat (taggedp taggedp)) (if (consp spec) spec (list spec)) (let ((ds (make-instance 'declspec :label label :name (or compat name) :kind kind :taggedp taggedp))) (setf (gethash name map) ds (gethash label map) ds) (when compat (setf (gethash compat map) ds))))))) map) "Maps symbolic labels and textual names to `declspec' instances.") ;; A collection of declaration specifiers, and how to merge them together. (defclass declspecs () ;; This could have been done with `defstruct' just as well, but a ;; `defclass' can be tweaked interactively, which is a win at the moment. ((type :initform nil :initarg :type :reader ds-type) (complexity :initform nil :initarg :complexity :reader ds-complexity) (sign :initform nil :initarg :sign :reader ds-sign) (size :initform nil :initarg :size :reader ds-size) (qualifier :initform nil :initarg :qualifiers :reader ds-qualifiers)) (:documentation "Represents a collection of declaration specifiers. This is used during type parsing to represent the type under construction. Instances are immutable: we build new ones rather than modifying existing ones. This leads to a certain amount of churn, but we'll just have to live with that. (Why are instances immutable? Because it's much easier to merge a new specifier into an existing collection and then check that the resulting thing is valid, rather than having to deal with all of the possible special cases of what the new thing might be. And if the merged collection isn't good, I must roll back to the previous version. So I don't get to take advantage of a mutable structure.)")) (defmethod ds-label ((ty c-type)) :c-type) (defmethod ds-name ((ty c-type)) (princ-to-string ty)) (defmethod ds-kind ((ty c-type)) 'type) (defparameter *good-declspecs* '(((:int) (:signed :unsigned) (:short :long :long-long) ()) ((:char) (:signed :unsigned) () ()) ((:double) () (:long) (:complex :imaginary)) (t () () ())) "List of good collections of declaration specifiers. Each item is a list of the form (TYPES SIGNS SIZES COMPLEXITIES). Each of TYPES, SIGNS, SIZES, and COMPLEXITIES, is either a list of acceptable specifiers of the appropriate kind, or T, which matches any specifier.") (defun good-declspecs-p (specs) "Are SPECS a good collection of declaration specifiers?" (let ((speclist (list (ds-type specs) (ds-sign specs) (ds-size specs) (ds-complexity specs)))) (some (lambda (it) (every (lambda (spec pat) (or (eq pat t) (null spec) (member (ds-label spec) pat))) speclist it)) *good-declspecs*))) (defun combine-declspec (specs ds) "Combine the declspec DS with the existing SPECS. Returns new DECLSPECS if they're OK, or `nil' if not. The old SPECS are not modified." (let* ((kind (ds-kind ds)) (old (slot-value specs kind))) (multiple-value-bind (ok new) (case kind (qualifier (values t (adjoin ds old))) (size (cond ((not old) (values t ds)) ((and (eq (ds-label old) :long) (eq ds old)) (values t (gethash :long-long *declspec-map*))) (t (values nil nil)))) (t (values (not old) ds))) (if ok (let ((copy (copy-instance specs))) (setf (slot-value copy kind) new) (and (good-declspecs-p copy) copy)) nil)))) (defun declspecs-type (specs) "Convert `declspecs' SPECS into a standalone C type object." (let ((type (ds-type specs)) (size (ds-size specs)) (sign (ds-sign specs)) (cplx (ds-complexity specs)) (quals (mapcar #'ds-label (ds-qualifiers specs)))) (cond ((typep type 'c-type) (qualify-c-type type quals)) ((or type size sign cplx) (when (and sign (eq (ds-label sign) :signed) (eq (ds-label type) :int)) (setf sign nil)) (cond ((and (or (null type) (eq (ds-label type) :int)) (or size sign)) (setf type nil)) ((null type) (setf type (gethash :int *declspec-map*)))) (make-simple-type (format nil "~{~@[~A~^ ~]~}" (mapcar #'ds-name (remove nil (list sign cplx size type)))) quals)) (t nil)))) ;; Parsing declaration specifiers. (define-indicator :declspec "") (defun scan-declspec (scanner &key (predicate (constantly t)) (indicator :declspec)) "Scan a `declspec' from SCANNER. If PREDICATE is provided then only succeed if (funcall PREDICATE DECLSPEC) is true, where DECLSPEC is the raw declaration specifier or C-type object, so we won't have fetched the tag for a tagged type yet. If the PREDICATE returns false then the scan fails without consuming input. If we couldn't find an acceptable declaration specifier then issue INDICATOR as the failure indicator. Value on success is either a `declspec' object or a `c-type' object." ;; Turns out to be easier to do this by hand. (let ((ds (and (eq (token-type scanner) :id) (let ((kw (token-value scanner))) (or (and (boundp '*module-type-map*) (gethash kw *module-type-map*)) (gethash kw *declspec-map*)))))) (cond ((or (not ds) (and predicate (not (funcall predicate ds)))) (values (list indicator) nil nil)) ((and (typep ds 'declspec) (ds-taggedp ds)) (scanner-step scanner) (if (eq (token-type scanner) :id) (let ((ty (make-c-tagged-type (ds-label ds) (token-value scanner)))) (scanner-step scanner) (values ty t t)) (values :tag nil t))) (t (scanner-step scanner) (values ds t t))))) (defun scan-and-merge-declspec (scanner specs) "Scan a declaration specifier and merge it with SPECS. This is a parser function. If it succeeds, it returns the merged `declspecs' object. It can fail either if no valid declaration specifier is found or it cannot merge the declaration specifier with the existing SPECS." (with-parser-context (token-scanner-context :scanner scanner) (if-parse (:consumedp consumedp) (scan-declspec scanner) (aif (combine-declspec specs it) (values it t consumedp) (values (list :declspec) nil consumedp))))) (export 'parse-c-type) (defun parse-c-type (scanner) "Parse a C type from declaration specifiers. This is a parser function. If it succeeds then the result is a `c-type' object representing the type it found. Note that this function won't try to parse a C declarator." (with-parser-context (token-scanner-context :scanner scanner) (if-parse (:result specs :consumedp cp) (many (specs (make-instance 'declspecs) it :min 1) (peek (scan-and-merge-declspec scanner specs))) (let ((type (declspecs-type specs))) (if type (values type t cp) (values (list :declspec) nil cp)))))) ;;;-------------------------------------------------------------------------- ;;; Parsing declarators. ;;; ;;; The syntax of declaration specifiers was horrific. Declarators are a ;;; very simple expression syntax, but this time the semantics are awful. In ;;; particular, they're inside-out. If <> denotes mumble of foo, then op <> ;;; is something like mumble of op of foo. Unfortunately, the expression ;;; parser engine wants to apply op of mumble of foo, so I'll have to do some ;;; work to fix the impedance mismatch. ;;; ;;; The currency we'll use is a pair (FUNC . NAME), with the semantics that ;;; (funcall FUNC TYPE) returns the derived type. The result of ;;; `parse-declarator' will be of this form. (export 'parse-declarator) (defun parse-declarator (scanner base-type &key kernel abstractp) "Parse a C declarator, returning a pair (C-TYPE . NAME). The SCANNER is a token scanner to read from. The BASE-TYPE is the type extracted from the preceding declaration specifiers, as parsed by `parse-c-type'. The result contains both the resulting constructed C-TYPE (with any qualifiers etc. as necessary), and the name from the middle of the declarator. The name is parsed using the KERNEL parser provided, and defaults to matching a simple identifier `:id'. This might, e.g., be (? :id) to parse an `abstract declarator' which has optional names. There's an annoying ambiguity in the syntax, if an empty KERNEL is permitted. In this case, you must ensure that ABSTRACTP is true so that the appropriate heuristic can be applied. As a convenience, if ABSTRACTP is true then `(? :id)' is used as the default KERNEL." (with-parser-context (token-scanner-context :scanner scanner) (let ((kernel-parser (cond (kernel kernel) (abstractp (parser () (? :id))) (t (parser () :id))))) (labels ((qualifiers () ;; qualifier* (parse (seq ((quals (list () (scan-declspec scanner :indicator :qualifier :predicate (lambda (ds) (and (typep ds 'declspec) (eq (ds-kind ds) 'qualifier))))))) (mapcar #'ds-label quals)))) (star () ;; Prefix: `*' qualifiers (parse (seq (#\* (quals (qualifiers))) (preop "*" (state 9) (cons (lambda (type) (funcall (car state) (make-pointer-type type quals))) (cdr state)))))) (predict-argument-list-p () ;; See `prefix-lparen'. Predict an argument list rather ;; than a nested declarator if (a) abstract declarators are ;; permitted and (b) the next token is a declaration ;; specifier or ellipsis. (let ((type (token-type scanner)) (value (token-value scanner))) (and abstractp (or (eq type :ellipsis) (and (eq type :id) (or (gethash value *module-type-map*) (gethash value *declspec-map*))))))) (prefix-lparen () ;; Prefix: `(' ;; ;; Opening parentheses are treated as prefix operators by ;; the expression parsing engine. There's an annoying ;; ambiguity in the syntax if abstract declarators are ;; permitted: a `(' might be either the start of a nested ;; subdeclarator or the start of a postfix function argument ;; list. The two are disambiguated by stating that if the ;; token following the `(' is a `)' or a declaration ;; specifier, then we have a postfix argument list. (parse (peek (seq (#\( (nil (if (predict-argument-list-p) (values nil nil nil) (values t t nil)))) (lparen #\)))))) (kernel () (parse (seq ((name (funcall kernel-parser))) (cons #'identity name)))) (argument-list () ;; [argument [`,' argument]* [`,' `...']] | `...' ;; ;; The possibility of a trailing `,' `...' means that we ;; can't use the standard `list' parser. Note that, unlike ;; `real' C, we allow an ellipsis even if there are no ;; explicit arguments. (let ((args nil)) (loop (when (eq (token-type scanner) :ellipsis) (push :ellipsis args) (scanner-step scanner) (return)) (multiple-value-bind (arg winp consumedp) (parse (seq ((base-type (parse-c-type scanner)) (dtor (parse-declarator scanner base-type :abstractp t))) (make-argument (cdr dtor) (car dtor)))) (unless winp (if (or consumedp args) (return-from argument-list (values arg nil t)) (return))) (push arg args)) (unless (eq (token-type scanner) #\,) (return)) (scanner-step scanner)) (values (nreverse args) t args))) (postfix-lparen () ;; Postfix: `(' argument-list `)' (parse (seq (#\( (args (argument-list)) #\)) (postop "()" (state 10) (cons (lambda (type) (funcall (car state) (make-function-type type args))) (cdr state)))))) (dimension () ;; `[' c-fragment ']' (parse (seq ((frag (parse-delimited-fragment scanner #\[ #\]))) (c-fragment-text frag)))) (lbracket () ;; Postfix: dimension+ (parse (seq ((dims (list (:min 1) (dimension)))) (postop "[]" (state 10) (cons (lambda (type) (funcall (car state) (make-array-type type dims))) (cdr state))))))) ;; And now we actually do the declarator parsing. (parse (seq ((value (expr (:nestedp nestedp) ;; An actual operand. (kernel) ;; Binary operators. There aren't any. nil ;; Prefix operators. (or (star) (prefix-lparen)) ;; Postfix operators. (or (postfix-lparen) (lbracket) (when nestedp (seq (#\)) (rparen #\)))))))) (cons (funcall (car value) base-type) (cdr value)))))))) ;;;----- That's all, folks --------------------------------------------------