From 239fa5bd3dff0b38b0cebdd3438311f21c24ba4f Mon Sep 17 00:00:00 2001 Message-Id: <239fa5bd3dff0b38b0cebdd3438311f21c24ba4f.1716890487.git.mdw@distorted.org.uk> From: Mark Wooding Date: Sun, 14 Jul 2013 16:09:22 +0100 Subject: [PATCH] Work in progress. Mostly bug fixing. Organization: Straylight/Edgeware From: Mark Wooding --- doc/sod.tex | 102 +++---- emacs-hacks.el | 1 + src/c-types-parse.lisp | 261 +++++++++--------- src/c-types-test.lisp | 55 +++- src/lexer-impl.lisp | 359 +++++++------------------ src/lexer-proto.lisp | 245 ++++++----------- src/lexical-parse.lisp | 216 --------------- src/module-impl.lisp | 13 + src/module-parse.lisp | 2 + src/module-proto.lisp | 5 + src/parser/parser-test.lisp | 6 +- src/pset-parse.lisp | 156 ++++++----- src/pset-proto.lisp | 2 +- src/sod.asd | 15 +- src/{base-test.lisp => test-base.lisp} | 0 15 files changed, 555 insertions(+), 883 deletions(-) delete mode 100644 src/lexical-parse.lisp rename src/{base-test.lisp => test-base.lisp} (100%) diff --git a/doc/sod.tex b/doc/sod.tex index 50f6121..ba2aaa5 100644 --- a/doc/sod.tex +++ b/doc/sod.tex @@ -50,6 +50,8 @@ \def\@scripts{\futurelet\@ch\@scripts@i} \atdef ;#1\\{\normalfont\itshape;#1\\} +\let\@@grammar\grammar +\def\grammar{\def\textbar{\hbox{$|$}}\@@grammar} \begingroup\lccode`\~=`\_\lowercase{\endgroup \def\@scripts@i{\if1\ifx\@ch~1\else\ifx\@ch^1\else0\fi\fi% @@ -165,17 +167,17 @@ unusual notation in order to make the presentation easier to read. \begin{quote} $\epsilon$ ::= \end{quote} -\item $[$@$]$ means an optional @: +\item @[@@] means an optional @: \begin{quote} - \syntax{$[$$]$ ::= $\epsilon$ | } + \syntax{@[@] ::= $\epsilon$ | } \end{quote} -\item @$^*$ means a sequence of zero or more @s: +\item @^* means a sequence of zero or more @s: \begin{quote} - \syntax{$^*$ ::= $\epsilon$ | $^*$ } + \syntax{@^* ::= $\epsilon$ | @^* } \end{quote} -\item @$^+$ means a sequence of one or more @s: +\item @^+ means a sequence of one or more @s: \begin{quote} - \syntax{$^+$ ::= $^*$} + \syntax{@^+ ::= @^*} \end{quote} \item @ means a sequence of one or more @s separated by commas: @@ -212,19 +214,19 @@ disambiguate: \subsubsection{Identifiers} \label{sec:syntax.lex.id} \begin{grammar} - ::= $^*$ + ::= @^* - ::= $|$ "_" + ::= | "_" - ::= $|$ + ::= @! - ::= "A" $|$ "B" $|$ \dots\ $|$ "Z" -\alt "a" $|$ "b" $|$ \dots\ $|$ "z" + ::= "A" | "B" | \dots\ | "Z" +\alt "a" | "b" | \dots\ | "z" \alt - ::= "0" $|$ + ::= "0" | - ::= "1" $|$ "2" $| \cdots |$ "9" + ::= "1" | "2" $| \cdots |$ "9" \end{grammar} The precise definition of @ is left to the function @@ -235,10 +237,10 @@ programmers are encouraged to limit themselves to the standard ASCII letters. \begin{grammar} ::= -"char" $|$ "class" $|$ "code" $|$ "const" $|$ "double" $|$ "enum" $|$ -"extern" $|$ "float" $|$ "import" $|$ "int" $|$ "lisp" $|$ "load" $|$ "long" -$|$ "restrict" $|$ "short" $|$ "signed" $|$ "struct" $|$ "typename" $|$ -"union" $|$ "unsigned" $|$ "void" $|$ "volatile" +"char" | "class" | "code" | "const" | "double" | "enum" | +"extern" | "float" | "import" | "int" | "lisp" | "load" | "long" +| "restrict" | "short" | "signed" | "struct" | "typename" | +"union" | "unsigned" | "void" | "volatile" \end{grammar} Many of these are borrowed from~C; however, some (e.g., @"import" and @@ -248,7 +250,7 @@ Many of these are borrowed from~C; however, some (e.g., @"import" and \subsubsection{String and character literals} \label{sec:syntax.lex.string} \begin{grammar} - ::= "\"" $^*$ "\"" + ::= "\"" @^* "\"" ::= "'" "'" @@ -276,21 +278,21 @@ discouraged. \alt \alt - ::= $^*$ + ::= @^* - ::= "0" $($"b"$|$"B"$)$ $^+$ + ::= "0" @("b"|"B"@) @^+ - ::= "0" $|$ "1" + ::= "0" | "1" - ::= "0" $[$"o"$|$"O"$]$ $^+$ + ::= "0" @["o"|"O"@] @^+ - ::= "0" $|$ "1" $| \cdots |$ "7" + ::= "0" | "1" $| \cdots |$ "7" - ::= "0" $($"x"$|$"X"$)$ $^+$ + ::= "0" @("x"|"X"@) @^+ ::= -\alt "A" $|$ "B" $|$ "C" $|$ "D" $|$ "E" $|$ "F" -\alt "a" $|$ "b" $|$ "c" $|$ "d" $|$ "e" $|$ "f" +\alt "A" | "B" | "C" | "D" | "E" | "F" +\alt "a" | "b" | "c" | "d" | "e" | "f" \end{grammar} Sod understands only integers, not floating-point numbers; its integer syntax @@ -314,8 +316,8 @@ alphanumeric. ::= "/*" - $^*$ $($$^+$ $^*)^*$ - $^*$ + @^* @(@^+ @^*@)^* + @^* "*/" ::= "*" @@ -324,7 +326,7 @@ alphanumeric. ::= any character other than "*" or "/" - ::= "//" $^*$ + ::= "//" @^* ::= a newline character @@ -371,7 +373,7 @@ brackets, braces or parenthesis ends the fragment. \subsection{Module syntax} \label{sec:syntax-module} \begin{grammar} - ::= $^*$ + ::= @^* ::= \alt @@ -484,12 +486,12 @@ declarations instead. \begin{grammar} ::= - "code" ":" $[$$]$ + "code" ":" @[@] "{" "}" ::= "[" "]" - ::= $^+$ + ::= @^+ \end{grammar} The @ will be output unchanged to one of the output files. @@ -592,7 +594,7 @@ combinations are permitted. A declaration specifier must consist of zero or more @, and one of the following, up to reordering. \begin{itemize} \item @ -\item @"struct" , @"union" , @"enum" +\item @"struct" @, @"union" @, @"enum" @ \item @"void" \item @"char", @"unsigned char", @"signed char" \item @"short", @"unsigned short", @"signed short" @@ -610,30 +612,30 @@ All of these have their usual C meanings. \begin{grammar} ::= - $^*$ $^*$ + @^* @^* ::= | \alt "(" ")" ::= "." - ::= "*" $^*$ + ::= "*" @^* ::= "[" "]" \alt "(" ")" ::= | "..." -\alt $[$"," "..."$]$ +\alt @["," "..."@] - ::= $^+$ + ::= @^+ - ::= | $[$$]$ + ::= | @[@] ::= - $^+$ | $^*$ + @^+ | @^* ::= "(" ")" -\alt $[$$]$ $^+$ +\alt @[@] @^+ \end{grammar} The declarator syntax is taken from C, but with some differences. @@ -680,9 +682,9 @@ class Sub : Super { \begin{grammar} ::= - $[$$]$ + @[@] "class" ":" - "{" $^*$ "}" + "{" @^* "}" ::= ";" \alt @@ -727,10 +729,10 @@ These items are discussed on the following sections. \begin{grammar} ::= - $[$$]$ - $^+$ + @[@] + @^+ - ::= $[$"=" $]$ + ::= @["=" @] \end{grammar} A @ defines one or more slots. All instances of the class and any @@ -763,7 +765,7 @@ class Example : Super { \subsubsection{Initializer items} \label{sec:syntax.class.init} \begin{grammar} - ::= $[$"class"$]$ + ::= @["class"@] ::= "=" @@ -792,16 +794,16 @@ The initializer has one of two forms. \begin{grammar} ::= - $[$$]$ - $^+$ $[$$]$ + @[@] + @^+ @[@] \end{grammar} \subsubsection{Method items} \label{sec:syntax.class.method} \begin{grammar} ::= - $[$$]$ - $^+$ + @[@] + @^+ ::= "{" "}" | "extern" ";" \end{grammar} diff --git a/emacs-hacks.el b/emacs-hacks.el index c807c28..9127080 100644 --- a/emacs-hacks.el +++ b/emacs-hacks.el @@ -6,6 +6,7 @@ (dolist (entry '((parse 0) (if-parse 2) (if-char 2) (expr 1) + (label 1) (acond . cond) (define-class-slot 3))) (put (car entry) 'common-lisp-indent-function diff --git a/src/c-types-parse.lisp b/src/c-types-parse.lisp index a3ecae4..e3ac625 100644 --- a/src/c-types-parse.lisp +++ b/src/c-types-parse.lisp @@ -65,8 +65,9 @@ (cl:in-package #:sod) ;; `inline') since it's meaningless to me. (defclass declspec () - ;; This could have been done with DEFSTRUCT just as well, but a DEFCLASS - ;; can be tweaked interactively, which is a win at the moment. + ;; Despite the fact that it looks pretty trivial, this can't be done with + ;; `defstruct' for the simple reason that we add more methods to the + ;; accessor functions later. ((label :type keyword :initarg :label :reader ds-label) (name :type string :initarg :name :reader ds-name) (kind :type (member type sign size qualifier) @@ -115,9 +116,8 @@ (defparameter *declspec-map* ;; A collection of declaration specifiers, and how to merge them together. (defclass declspecs () - ;; Despite the fact that it looks pretty trivial, this can't be done with - ;; DEFCLASS for the simple reason that we add more methods to the accessor - ;; functions later. + ;; This could have been done with `defstruct' just as well, but a + ;; `defclass' can be tweaked interactively, which is a win at the moment. ((type :initform nil :initarg :type :reader ds-type) (sign :initform nil :initarg :sign :reader ds-sign) (size :initform nil :initarg :size :reader ds-size) @@ -202,7 +202,7 @@ (defun declspecs-type (specs) ((null type) (setf type (gethash :int *declspec-map*)))) (make-simple-type (format nil "~{~@[~A~^ ~]~}" - (mapcar #'ds-label + (mapcar #'ds-name (remove nil (list sign size type)))) quals)) @@ -259,6 +259,7 @@ (defun scan-and-merge-declspec (scanner specs) (values it t consumedp) (values (list :declspec) nil consumedp))))) +(export 'parse-c-type) (defun parse-c-type (scanner) "Parse a C type from declaration specifiers. @@ -288,122 +289,138 @@ (defun parse-c-type (scanner) ;;; (funcall FUNC TYPE) returns the derived type. The result of ;;; `parse-declarator' will be of this form. -(defun parse-declarator (scanner base-type &key abstractp) - (with-parser-context (token-scanner-context :scanner scanner) +(export 'parse-declarator) +(defun parse-declarator (scanner base-type &key centre abstractp) + "Parse a C declarator, returning a pair (C-TYPE . NAME). - (labels ((qualifiers () - ;; QUALIFIER* - - (parse - (seq ((quals (list () - (scan-declspec - scanner - :indicator :qualifier - :predicate (lambda (ds) - (and (typep ds 'declspec) - (eq (ds-kind ds) - 'qualifier))))))) - (mapcar #'ds-label quals)))) - - (star () - ;; Prefix: `*' QUALIFIERS - - (parse (seq (#\* (quals (qualifiers))) - (preop "*" (state 9) - (cons (lambda (type) - (funcall (car state) - (make-pointer-type type quals))) - (cdr state)))))) - - (prefix-lparen () - ;; Prefix: `(' - ;; - ;; Opening parentheses are treated as prefix operators by the - ;; expression parsing engine. There's an annoying ambiguity - ;; in the syntax if abstract declarators are permitted: a `(' - ;; might be either the start of a nested subdeclarator or the - ;; start of a postfix function argument list. The two are - ;; disambiguated by stating that if the token following the - ;; `(' is a `)' or a declaration specifier, then we have a - ;; postfix argument list. - - (parse - (peek (seq (#\( - (nil (if (and abstractp - (eq (token-type scanner) :id) - (let ((id (token-value scanner))) - (or (gethash id - *module-type-map*) - (gethash id - *declspec-map*)))) - (values nil nil nil) - (values t t nil)))) - (lparen #\)))))) - - (centre () - ;; ID | empty - ;; - ;; The centre might be empty or contain an identifier, - ;; depending on the setting of ABSTRACTP. - - (parse (or (when (not (eq abstractp t)) - (seq ((id :id)) (cons #'identity id))) - (when abstractp - (t (cons #'identity nil)))))) - - (argument-list () - ;; [ ARGUMENT [ `,' ARGUMENT ]* ] - - (parse (list () - (seq ((base-type (parse-c-type scanner)) - (dtor (parse-declarator scanner - base-type - :abstractp :maybe))) - (make-argument (cdr dtor) (car dtor))) - #\,))) - - (postfix-lparen () - ;; Postfix: `(' ARGUMENT-LIST `)' - - (parse (seq (#\( (args (argument-list)) #\)) - (postop "()" (state 9) - (cons (lambda (type) - (funcall (car state) - (make-function-type type args))) - (cdr state)))))) - - (dimension () - ;; `[' C-FRAGMENT ']' - - (parse-delimited-fragment scanner #\[ #\])) - - (lbracket () - ;; Postfix: DIMENSION+ - - (parse (seq ((dims (list (:min 1) (dimension)))) - (postop "[]" (state 10) - (cons (lambda (type) - (funcall (car state) - (make-array-type type dims))) - (cdr state))))))) - - ;; And now we actually do the declarator parsing. - (parse (seq ((value (expr (:nestedp nestedp) - - ;; An actual operand. - (centre) - - ;; Binary operators. There aren't any. - nil - - ;; Prefix operators. - (or (star) - (prefix-lparen)) - - ;; Postfix operators. - (or (postfix-lparen) - (lbracket) - (when nestedp (seq (#\)) (rparen #\)))))))) - (cons (funcall (car value) base-type) (cdr value))))))) + The SCANNER is a token scanner to read from. The BASE-TYPE is the type + extracted from the preceding declaration specifiers, as parsed by + `parse-c-type'. + + The result contains both the resulting constructed C-TYPE (with any + qualifiers etc. as necessary), and the name from the middle of the + declarator. The name is parsed using the CENTRE parser provided, and + defaults to matching a simple identifier `:id'. This might, e.g., be + (? :id) to parse an `abstract declarator' which has optional names. + + There's an annoying ambiguity in the syntax, if an empty CENTRE is + permitted. In this case, you must ensure that ABSTRACTP is true so that + the appropriate heuristic can be applied. As a convenience, if ABSTRACTP + is true then `(? :id)' is used as the default CENTRE." + (with-parser-context (token-scanner-context :scanner scanner) + (let ((centre-parser (cond (centre centre) + (abstractp (parser () (? :id))) + (t (parser () :id))))) + + (labels ((qualifiers () + ;; qualifier* + + (parse + (seq ((quals (list () + (scan-declspec + scanner + :indicator :qualifier + :predicate (lambda (ds) + (and (typep ds 'declspec) + (eq (ds-kind ds) + 'qualifier))))))) + (mapcar #'ds-label quals)))) + + (star () + ;; Prefix: `*' qualifiers + + (parse (seq (#\* (quals (qualifiers))) + (preop "*" (state 9) + (cons (lambda (type) + (funcall (car state) + (make-pointer-type type quals))) + (cdr state)))))) + + (next-declspec-p () + ;; Ansert whether the next token is a valid declaration + ;; specifier, without consuming it. + (and (eq (token-type scanner) :id) + (let ((id (token-value scanner))) + (or (gethash id *module-type-map*) + (gethash id *declspec-map*))))) + + (prefix-lparen () + ;; Prefix: `(' + ;; + ;; Opening parentheses are treated as prefix operators by + ;; the expression parsing engine. There's an annoying + ;; ambiguity in the syntax if abstract declarators are + ;; permitted: a `(' might be either the start of a nested + ;; subdeclarator or the start of a postfix function argument + ;; list. The two are disambiguated by stating that if the + ;; token following the `(' is a `)' or a declaration + ;; specifier, then we have a postfix argument list. + (parse + (peek (seq (#\( + (nil (if (and abstractp (next-declspec-p)) + (values nil nil nil) + (values t t nil)))) + (lparen #\)))))) + + (centre () + (parse (seq ((name (funcall centre-parser))) + (cons #'identity name)))) + + (argument-list () + ;; [ argument [ `,' argument ]* ] + + (parse (list () + (seq ((base-type (parse-c-type scanner)) + (dtor (parse-declarator scanner + base-type + :abstractp t))) + (make-argument (cdr dtor) (car dtor))) + #\,))) + + (postfix-lparen () + ;; Postfix: `(' argument-list `)' + + (parse (seq (#\( (args (argument-list)) #\)) + (postop "()" (state 10) + (cons (lambda (type) + (funcall (car state) + (make-function-type type args))) + (cdr state)))))) + + (dimension () + ;; `[' c-fragment ']' + + (parse (seq ((frag (parse-delimited-fragment + scanner #\[ #\]))) + (c-fragment-text frag)))) + + (lbracket () + ;; Postfix: dimension+ + + (parse (seq ((dims (list (:min 1) (dimension)))) + (postop "[]" (state 10) + (cons (lambda (type) + (funcall (car state) + (make-array-type type dims))) + (cdr state))))))) + + ;; And now we actually do the declarator parsing. + (parse (seq ((value (expr (:nestedp nestedp) + + ;; An actual operand. + (centre) + + ;; Binary operators. There aren't any. + nil + + ;; Prefix operators. + (or (star) + (prefix-lparen)) + + ;; Postfix operators. + (or (postfix-lparen) + (lbracket) + (when nestedp (seq (#\)) (rparen #\)))))))) + (cons (funcall (car value) base-type) (cdr value)))))))) ;;;----- That's all, folks -------------------------------------------------- diff --git a/src/c-types-test.lisp b/src/c-types-test.lisp index f1e4324..16e41ce 100644 --- a/src/c-types-test.lisp +++ b/src/c-types-test.lisp @@ -42,11 +42,25 @@ (defun assert-not-cteqp (a b) (when (c-type-equal-p a b) (failure "Assert unequal C types: ~A ~_and ~A" a b))) +(defun expand-tabs (string) + (with-output-to-string (out) + (do ((i 0 (1+ i)) + (char (char string 0) (char string i)) + (pos 0)) + ((>= i (length string))) + (case char + (#\newline (write-char char out) + (setf pos 0)) + (#\tab (write-string " " out :end (- 8 (mod pos 8))) + (setf pos (logandc2 (+ pos 8) 7))) + (t (write-char char out) + (incf pos)))))) + (defun assert-pp-ctype (type kernel string) (let* ((*print-right-margin* 77) (print (with-output-to-string (out) (pprint-c-type type out kernel)))) - (assert-equal print string + (assert-equal (expand-tabs print) (expand-tabs string) (format nil "Type ~S with kernel ~S ~_prints as `~A' ~_~ rather than `~A'." type kernel print string)))) @@ -232,4 +246,43 @@ (def-test-method commentify-non-recursive ((test c-types-test) :run nil) int typeflag), int /*nopenfd*/)"))) +;;;-------------------------------------------------------------------------- +;;; Parsing. + +(def-test-method parse-c-type ((test c-types-test) :run nil) + (flet ((check (string c-type name) + (let* ((char-scanner (make-string-scanner string)) + (scanner (make-instance 'sod-token-scanner + :char-scanner char-scanner + :filename ""))) + (with-parser-context (token-scanner-context :scanner scanner) + (define-module ("" :truename nil :location scanner) + (multiple-value-bind (result winp consumedp) + (parse (seq ((ds (parse-c-type scanner)) + (dc (parse-declarator scanner ds)) + :eof) + dc)) + (declare (ignore consumedp)) + (cond ((null c-type) + (assert-false winp)) + (t + (assert-true winp) + (unless (eq c-type t) + (assert-cteqp (car result) c-type)) + (unless (eq name t) + (assert-equal (cdr result) name)))))))))) + + (check "int x" (c-type int) "x") + (check "int long unsigned long y" (c-type unsigned-long-long) "y") + (check "int long int x" nil nil) + (check "float v[69][42]" (c-type ([] float "69" "42")) "v") + (check "const char *const tab[]" + (c-type ([] (* (char :const) :const) "")) + "tab") + (check "void (*signal(int, void (*)(int)))(int)" + (c-type (func (* (func void (nil int))) + (nil int) + (nil (* (func void (nil int)))))) + "signal"))) + ;;;----- That's all, folks -------------------------------------------------- diff --git a/src/lexer-impl.lisp b/src/lexer-impl.lisp index 03a6bcc..f474590 100644 --- a/src/lexer-impl.lisp +++ b/src/lexer-impl.lisp @@ -26,272 +26,113 @@ (cl:in-package #:sod) ;;;-------------------------------------------------------------------------- -;;; Basic lexical analyser. +;;; Class implementation. -(defstruct (pushed-token - (:constructor make-pushed-token (type value location))) - "A token that has been pushed back into a lexer for later processing." - type value location) +(defmethod shared-initialize :after + ((scanner sod-token-scanner) slot-names &key) + (default-slot (scanner 'sod-parser::filename slot-names) + (scanner-filename (token-scanner-char-scanner scanner)))) -;;; Class definition. - -(export 'basic-lexer) -(defclass basic-lexer () - ((stream :initarg :stream :type stream :reader lexer-stream) - (char :initform nil :type (or character null) :reader lexer-char) - (pushback-chars :initform nil :type list) - (token-type :initform nil :accessor token-type) - (token-value :initform nil :accessor token-value) - (location :initform nil :reader file-location) - (pushback-tokens :initform nil :type list)) - (:documentation - "Base class for lexical analysers. - - The lexer reads characters from STREAM, which, for best results, wants to - be a `position-aware-input-stream'. - - The lexer provides one-character lookahead by default: the current - lookahead character is available to subclasses in the slot CHAR. Before - beginning lexical analysis, the lookahead character needs to be - established with `next-char'. If one-character lookahead is insufficient, - the analyser can push back an arbitrary number of characters using - `pushback-char'. - - The `next-token' function scans and returns the next token from the - STREAM, and makes it available as TOKEN-TYPE and TOKEN-VALUE, providing - one-token lookahead. A parser using the lexical analyser can push back - tokens using `pushback-tokens'. - - For convenience, the lexer implements a `file-location' method (delegated - to the underlying stream).")) - -;;; Reading and pushing back characters. - -(defmethod next-char ((lexer basic-lexer)) - (with-slots (stream char pushback-chars) lexer - (setf char (if pushback-chars - (pop pushback-chars) - (read-char stream nil))))) - -(defmethod pushback-char ((lexer basic-lexer) new-char) - (with-slots (char pushback-chars) lexer - (push char pushback-chars) - (setf char new-char))) - -(defmethod fixup-stream* ((lexer basic-lexer) thunk) - (with-slots (stream char pushback-chars) lexer - (when pushback-chars - (error "Lexer has pushed-back characters.")) - (when (slot-boundp lexer 'char) - (unread-char char stream)) - (unwind-protect - (funcall thunk stream) - (setf char (read-char stream nil))))) - -;;; Reading and pushing back tokens. - -(defmethod next-token :around ((lexer basic-lexer)) - (unless (slot-boundp lexer 'char) - (next-char lexer))) - -(defmethod next-token ((lexer basic-lexer)) - (with-slots (pushback-tokens token-type token-value location) lexer - (setf (values token-type token-value) - (if pushback-tokens - (let ((pushback (pop pushback-tokens))) - (setf location (pushed-token-location pushback)) - (values (pushed-token-type pushback) - (pushed-token-value pushback))) - (scan-token lexer))))) - -(defmethod scan-token :around ((lexer basic-lexer)) - (with-default-error-location (lexer) - (call-next-method))) - -(defmethod pushback-token ((lexer basic-lexer) new-token-type - &optional new-token-value new-location) - (with-slots (pushback-tokens token-type token-value location) lexer - (push (make-pushed-token token-type token-value location) - pushback-tokens) - (when new-location (setf location new-location)) - (setf token-type new-token-type - token-value new-token-value))) - -;;; Utilities. - -(defmethod skip-spaces ((lexer basic-lexer)) - (do ((ch (lexer-char lexer) (next-char lexer))) - ((not (whitespace-char-p ch)) ch))) +(defmethod make-scanner-stream ((scanner sod-token-scanner)) + (make-scanner-stream (token-scanner-char-scanner scanner))) ;;;-------------------------------------------------------------------------- -;;; Our main lexer. - -(export 'sod-lexer) -(defclass sod-lexer (basic-lexer) - () - (:documentation - "Lexical analyser for the SOD lanuage. - - See the `lexer' class for the gory details about the lexer protocol.")) +;;; Indicators and error messages. -(defmethod scan-token ((lexer sod-lexer)) - (with-slots (stream char keywords location) lexer - (prog (ch) +(defvar *indicator-map* (make-hash-table) + "Hash table mapping indicator objects to human-readable descriptions.") - consider +(defun show-char (stream char &optional colonp atsignp) + "Format CHAR to STREAM in a readable way. - ;; Stash the position of this token so that we can report it later. - (setf ch (skip-spaces lexer) - location (file-location stream)) + Usable in `format''s ~/.../ command." + (declare (ignore colonp atsignp)) + (cond ((null char) (write-string "" stream)) + ((and (graphic-char-p char) (char/= char #\space)) + (format stream "`~C'" char)) + (t (format stream "<~(~:C~)>" char)))) - ;; Now work out what it is that we're dealing with. - (cond - - ;; End-of-file brings its own peculiar joy. - ((null ch) (return (values :eof t))) - - ;; Strings. - ((or (char= ch #\") (char= ch #\')) - (let* ((quote ch) - (string - (with-output-to-string (out) - (loop - (flet ((getch () - (setf ch (next-char lexer)) - (when (null ch) - (cerror* "Unexpected end of file in ~ - ~:[string~;character~] constant" - (char= quote #\')) - (return)))) - (getch) - (cond ((char= ch quote) (return)) - ((char= ch #\\) (getch))) - (write-char ch out)))))) - (setf ch (next-char lexer)) - (ecase quote - (#\" (return (values :string string))) - (#\' (case (length string) - (0 (cerror* "Empty character constant") - (return (values :char #\?))) - (1 (return (values :char (char string 0)))) - (t (cerror* "Multiple characters in character constant") - (return (values :char (char string 0))))))))) - - ;; Pick out identifiers and keywords. - ((or (alpha-char-p ch) (char= ch #\_)) - - ;; Scan a sequence of alphanumerics and underscores. We could - ;; allow more interesting identifiers, but it would damage our C - ;; lexical compatibility. - (let ((id (with-output-to-string (out) - (loop - (write-char ch out) - (setf ch (next-char lexer)) - (when (or (null ch) - (not (or (alphanumericp ch) - (char= ch #\_)))) - (return)))))) - - ;; Done. - (return (values :id id)))) - - ;; Pick out numbers. Currently only integers, but we support - ;; multiple bases. - ((digit-char-p ch) - - ;; Sort out the prefix. If we're looking at `0b', `0o' or `0x' - ;; (maybe uppercase) then we've got a funny radix to deal with. - ;; Otherwise, a leading zero signifies octal (daft, I know), else - ;; we're left with decimal. - (multiple-value-bind (radix skip-char) - (if (char/= ch #\0) - (values 10 nil) - (case (and (setf ch (next-char lexer)) - (char-downcase ch)) - (#\b (values 2 t)) - (#\o (values 8 t)) - (#\x (values 16 t)) - (t (values 8 nil)))) - - ;; If we last munched an interesting letter, we need to skip over - ;; it. That's what the SKIP-CHAR flag is for. - ;; - ;; Danger, Will Robinson! If we're just about to eat a radix - ;; letter, then the next thing must be a digit. For example, - ;; `0xfatenning' parses as a hex number followed by an identifier - ;; `0xfa ttening', but `0xturning' is an octal number followed by - ;; an identifier `0 xturning'. - (when skip-char - (let ((peek (next-char lexer))) - (unless (digit-char-p peek radix) - (pushback-char lexer ch) - (return-from scan-token (values :integer 0))) - (setf ch peek))) - - ;; Scan an integer. While there are digits, feed them into the - ;; accumulator. - (do ((accum 0 (+ (* accum radix) digit)) - (digit (and ch (digit-char-p ch radix)) - (and ch (digit-char-p ch radix)))) - ((null digit) (return-from scan-token - (values :integer accum))) - (setf ch (next-char lexer))))) - - ;; A slash might be the start of a comment. - ((char= ch #\/) - (setf ch (next-char lexer)) - (case ch - - ;; Comment up to the end of the line. - (#\/ - (loop - (setf ch (next-char lexer)) - (when (or (null ch) (char= ch #\newline)) - (go scan)))) - - ;; Comment up to the next `*/'. - (#\* - (tagbody - top - (case (setf ch (next-char lexer)) - (#\* (go star)) - ((nil) (go done)) - (t (go top))) - star - (case (setf ch (next-char lexer)) - (#\* (go star)) - (#\/ (setf ch (next-char lexer)) - (go done)) - ((nil) (go done)) - (t (go top))) - done) - (go consider)) - - ;; False alarm. (The next character is already set up.) - (t - (return (values #\/ t))))) - - ;; A dot: might be `...'. Tread carefully! We need more lookahead - ;; than is good for us. - ((char= ch #\.) - (setf ch (next-char lexer)) - (cond ((eql ch #\.) - (setf ch (next-char lexer)) - (cond ((eql ch #\.) (return (values :ellipsis nil))) - (t (pushback-char lexer #\.) - (return (values #\. t))))) - (t - (return (values #\. t))))) - - ;; Anything else is a lone delimiter. - (t - (return (multiple-value-prog1 - (values ch t) - (next-char lexer))))) - - scan - ;; Scan a new character and try again. - (setf ch (next-char lexer)) - (go consider)))) +;;;-------------------------------------------------------------------------- +;;; Token scanning. + +(defmethod scanner-token ((scanner sod-token-scanner)) + (with-slots (char-scanner line column) scanner + (with-parser-context (character-scanner-context :scanner char-scanner) + + (flet ((scan-digits (&key (radix 10) (min 1) (init 0)) + ;; Scan and return a sequence of digits. + (parse (many (acc init (+ (* acc radix) it) :min min) + (label (list :digit radix) + (filter (lambda (ch) + (digit-char-p ch radix)))))))) + + ;; Skip initial junk, and remember the place. + (loop + (setf (scanner-line scanner) (scanner-line char-scanner) + (scanner-column scanner) (scanner-column char-scanner)) + (cond-parse (:consumedp cp :expected exp) + ((satisfies whitespace-char-p) (parse :whitespace)) + ((scan-comment char-scanner)) + (t (if cp (lexer-error char-scanner exp cp) (return))))) + + ;; Now parse something. + (cond-parse (:consumedp cp :expected exp) + + ;; Alphanumerics mean we read an identifier. + ((or #\_ (satisfies alpha-char-p)) + (values :id (with-output-to-string (out) + (write-char it out) + (parse (many (nil nil (write-char it out)) + (or #\_ (satisfies alphanumericp))))))) + + ;; Quotes introduce a literal. + ((seq ((quote (or #\" #\')) + (contents (many (out (make-string-output-stream) + (progn (write-char it out) out) + :final (get-output-stream-string out)) + (or (and #\\ :any) (not quote)))) + (nil (char quote))) + (ecase quote + (#\" contents) + (#\' (case (length contents) + (1 (char contents 0)) + (0 (cerror* "Empty character literal") #\?) + (t (cerror* "Too many characters in literal") + (char contents 0)))))) + (values (etypecase it + (character :char) + (string :string)) + it)) + + ;; Zero introduces a chosen-radix integer. + ((and #\0 + (or (and (or #\b #\B) (scan-digits :radix 2)) + (and (or #\o #\O) (scan-digits :radix 8)) + (and (or #\x #\X) (scan-digits :radix 16)) + (scan-digits :radix 8 :min 0))) + (values :int it)) + + ;; Any other digit forces radix-10. + ((seq ((d (filter digit-char-p)) + (i (scan-digits :radix 10 :min 0 :init d))) + i) + (values :int it)) + + ;; Some special punctuation sequences are single tokens. + ("..." (values :ellipsis nil)) + + ;; Any other character is punctuation. + (:any (values it nil)) + + ;; End of file means precisely that. + (:eof (values :eof nil)) + + ;; Report errors and try again. Because we must have consumed some + ;; input in order to get here (we've matched both :any and :eof) we + ;; must make progress on every call. + (t + (assert cp) + (lexer-error char-scanner exp cp) + (scanner-token scanner))))))) ;;;----- That's all, folks -------------------------------------------------- diff --git a/src/lexer-proto.lisp b/src/lexer-proto.lisp index 8e0c889..e72152e 100644 --- a/src/lexer-proto.lisp +++ b/src/lexer-proto.lisp @@ -26,178 +26,91 @@ (cl:in-package #:sod) ;;;-------------------------------------------------------------------------- -;;; Accessors. +;;; Class definition. -(export 'lexer-char) -(defgeneric lexer-char (lexer) +(export 'sod-token-scanner) +(defclass sod-token-scanner (token-scanner) + ((char-scanner :initarg :char-scanner :reader token-scanner-char-scanner)) (:documentation - "Return the current lookahead character from the LEXER. + "A token scanner for SOD input files. - When the lexer is first created, there is no lookahead character: you must - `prime the pump' by calling `next-char'. The lexer represents - encountering the end of its input stream by setting the lookahead - character to nil. At this point it is still possible to push back - characters.")) + Not a lot here, apart from a character scanner to read from and the + standard token scanner infrastructure.")) ;;;-------------------------------------------------------------------------- -;;; Formatting tokens. - -(defgeneric format-token (token-type &optional token-value) - (:documentation - "Return a string describing a token with the specified type and value.") - (:method ((token-type (eql :eof)) &optional token-value) - (declare (ignore token-value)) - "") - (:method ((token-type (eql :string)) &optional token-value) - (declare (ignore token-value)) - "") - (:method ((token-type (eql :char)) &optional token-value) - (declare (ignore token-value)) - "") - (:method ((token-type (eql :id)) &optional token-value) - (format nil "" token-value)) - (:method ((token-type symbol) &optional token-value) - (declare (ignore token-value)) - (check-type token-type keyword) - (format nil "`~(~A~)'" token-type)) - (:method ((token-type character) &optional token-value) - (declare (ignore token-value)) - (format nil "~:[<~:C>~;`~C'~]" - (and (graphic-char-p token-type) - (char/= token-type #\space)) - token-type))) - -;;;-------------------------------------------------------------------------- -;;; Reading and pushing back characters. - -(export 'next-char) -(defgeneric next-char (lexer) - (:documentation - "Fetch the next character from the LEXER's input stream. - - Read a character from the input stream, and store it in the LEXER's CHAR - slot. The character stored is returned. If characters have been pushed - back then pushed-back characters are used instead of the input stream. If - there are no more characters to be read then the lookahead character is - nil. Returns the new lookahead character. - - (This function is primarily intended for the use of lexer subclasses.)")) - -(export 'pushback-char) -(defgeneric pushback-char (lexer char) - (:documentation - "Push the CHAR back into the lexer. - - Make CHAR be the current lookahead character (stored in the LEXER's CHAR - slot). The previous lookahead character is pushed down, and will be made - available again once this character is consumed by NEXT-CHAR. - - (This function is primarily intended for the use of lexer subclasses.)")) - -(defgeneric fixup-stream* (lexer thunk) - (:documentation - "Helper function for `with-lexer-stream'. - - This function does the main work for `with-lexer-stream'. The THUNK is - invoked on a single argument, the LEXER's underlying STREAM.")) - -(export 'with-lexer-stream) -(defmacro with-lexer-stream ((streamvar lexer) &body body) - "Evaluate BODY with STREAMVAR bound to the LEXER's input stream. - - The STREAM is fixed up so that the next character read (e.g., using - `read-char') will be the lexer's current lookahead character. Once the - BODY completes, the next character in the stream is read and set as the - lookahead character. It is an error if the lexer has pushed-back - characters (since these can't be pushed back into the input stream - properly)." - - `(fixup-stream* ,lexer (lambda (,streamvar) ,@body))) +;;; Indicators and error messages. + +(export 'define-indicator) +(defun define-indicator (indicator description) + "Associate an INDICATOR with its textual DESCRIPTION. + + Updates the the `*indicator-map*'." + (setf (gethash indicator *indicator-map*) description) + indicator) + +(export 'syntax-error) +(defun syntax-error (scanner expected &key (continuep t)) + "Signal a (maybe) continuable syntax error." + (labels ((show-token (type value) + (if (characterp type) + (format nil "~/sod::show-char/" type) + (case type + (:id (format nil "" value)) + (:string "") + (:char "") + (:eof "") + (:ellipsis "`...'") + (t (format nil "" type value))))) + (show-expected (thing) + (acond ((gethash thing *indicator-map*) it) + ((atom thing) (show-token thing nil)) + ((eq (car thing) :id) + (format nil "`~A'" (cadr thing))) + (t (format nil "" thing))))) + (funcall (if continuep #'cerror* #'error) + "Syntax error: ~ + expected ~{~#[~;~A~;~A or ~A~:;~A, ~]~} ~ + but found ~A" + (mapcar #'show-expected expected) + (show-token (token-type scanner) (token-value scanner))))) + +(export 'lexer-error) +(defun lexer-error (char-scanner expected consumedp) + "Signal a continuable lexical error." + (cerror* "Lexical error: ~ + expected ~{~#[~;~A~;~A or ~A~;:~A, ~]~} ~ + but found ~/sod::show-char/~ + ~@[ at ~A~]" + (mapcar (lambda (exp) + (typecase exp + (character (format nil "~/sod::show-char/" exp)) + (string (format nil "`~A'" exp)) + ((cons (eql :digit) *) (format nil "" + (cadr exp))) + ((eql :eof) "") + ((eql :any) "") + (t (format nil "" exp)))) + expected) + (and (not (scanner-at-eof-p char-scanner)) + (scanner-current-char char-scanner)) + (and consumedp (file-location char-scanner)))) ;;;-------------------------------------------------------------------------- -;;; Reading and pushing back tokens. - -(export 'scan-token) -(defgeneric scan-token (lexer) - (:documentation - "Internal protocol for scanning tokens from an input stream. - - Implementing a method on this function is the main responsibility of LEXER - subclasses; it is called by the user-facing `next-token' function. - - The method should consume characters (using `next-char') as necessary, and - return two values: a token type and token value. These will be stored in - the corresponding slots in the lexer object in order to provide the user - with one-token lookahead.")) - -(export 'next-token) -(defgeneric next-token (lexer) - (:documentation - "Scan a token from an input stream. - - This function scans a token from an input stream. Two values are - returned: a `token type' and a `token value'. These are opaque to the - LEXER base class, but the intent is that the token type be significant to - determining the syntax of the input, while the token value carries any - additional information about the token's semantic content. The token type - and token value are also made available for lookahead via accessors - TOKEN-TYPE and TOKEN-VALUE on the `lexer' object. - - The new lookahead token type and value are returned as two separate - values. - - If tokens have been pushed back (see `pushback-token') then they are - returned one by one instead of scanning the stream.")) - -(export 'pushback-token) -(defgeneric pushback-token (lexer token-type &optional token-value location) - (:documentation - "Push a token back into the lexer. - - Make the given TOKEN-TYPE and TOKEN-VALUE be the current lookahead token. - The previous lookahead token is pushed down, and will be made available - agan once this new token is consumed by NEXT-TOKEN. If LOCATION is - non-nil then `file-location' is saved and replaced by LOCATION. The - TOKEN-TYPE and TOKEN-VALUE can be anything at all: for instance, they need - not be values which can actually be returned by NEXT-TOKEN.")) - -;;;-------------------------------------------------------------------------- -;;; Utilities. - -(export 'skip-spaces) -(defgeneric skip-spaces (lexer) - (:documentation - "Skip over whitespace characters in the LEXER. - - There must be a lookahead character; when the function returns, the - lookahead character will be a non-whitespace character or nil if there - were no non-whitespace characters remaining. Returns the new lookahead - character.")) - -(export 'require-token) -(defun require-token - (lexer wanted-token-type &key (errorp t) (consumep t) default) - "Require a particular token to appear. - - If the LEXER's current lookahead token has type `wanted-token-type' then - consume it (using `next-token') and return its value. Otherwise, if the - token doesn't have the requested type then signal a continuable error - describing the situation and return DEFAULT (which defaults to nil). - - If ERRORP is false then no error is signalled; this is useful for - consuming or checking for optional punctuation. If CONSUMEP is false then - a matching token is not consumed; non-matching tokens are never consumed." - - (with-slots (token-type token-value) lexer - (cond ((eql token-type wanted-token-type) - (prog1 token-value - (when consumep (next-token lexer)))) - (errorp - (cerror* "Expected ~A but found ~A" - (format-token wanted-token-type) - (format-token token-type token-value)) - default) - (t - default)))) +;;; Lexical analysis utilities. + +(defun scan-comment (char-scanner) + "Scan a comment (either `/* ... */' or `// ...') from CHAR-SCANNER. + + The result isn't interesting." + (with-parser-context (character-scanner-context :scanner char-scanner) + (parse (or (and "/*" + (and (skip-many () + (and (skip-many () (not #\*)) + (label "*/" (skip-many (:min 1) #\*))) + (not #\/)) + #\/)) + (and "//" + (skip-many () (not #\newline)) + (? #\newline)))))) ;;;----- That's all, folks -------------------------------------------------- diff --git a/src/lexical-parse.lisp b/src/lexical-parse.lisp deleted file mode 100644 index 1e9a76c..0000000 --- a/src/lexical-parse.lisp +++ /dev/null @@ -1,216 +0,0 @@ -;;; -*-lisp-*- -;;; -;;; Lexical analysis for input parser -;;; -;;; (c) 2009 Straylight/Edgeware -;;; - -;;;----- Licensing notice --------------------------------------------------- -;;; -;;; This file is part of the Sensble Object Design, an object system for C. -;;; -;;; SOD is free software; you can redistribute it and/or modify -;;; it under the terms of the GNU General Public License as published by -;;; the Free Software Foundation; either version 2 of the License, or -;;; (at your option) any later version. -;;; -;;; SOD is distributed in the hope that it will be useful, -;;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;;; GNU General Public License for more details. -;;; -;;; You should have received a copy of the GNU General Public License -;;; along with SOD; if not, write to the Free Software Foundation, -;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -(cl:in-package #:sod) - -;;;-------------------------------------------------------------------------- -;;; Class definition. - -(export 'sod-token-scanner) -(defclass sod-token-scanner (token-scanner) - ((char-scanner :initarg :char-scanner :reader token-scanner-char-scanner)) - (:documentation - "A token scanner for SOD input files. - - Not a lot here, apart from a character scanner to read from and the - standard token scanner infrastructure.")) - -(defmethod shared-initialize :after - ((scanner sod-token-scanner) slot-names &key) - (default-slot (scanner 'sod-parser::filename slot-names) - (scanner-filename (token-scanner-char-scanner scanner)))) - -;;;-------------------------------------------------------------------------- -;;; Utilities. - -(defun show-char (stream char &optional colonp atsignp) - "Format CHAR to STREAM in a readable way. - - Usable in `format''s ~/.../ command." - (declare (ignore colonp atsignp)) - (cond ((null char) (write-string "" stream)) - ((and (graphic-char-p char) (char/= char #\space)) - (format stream "`~C'" char)) - (t (format stream "<~(~:C~)>" char)))) - -(defun scan-comment (scanner) - "Scan a comment (either `/* ... */' or `// ...') from SCANNER. - - The result isn't interesting." - (with-parser-context (character-scanner-context :scanner scanner) - (parse (or (and "/*" - (and (skip-many () - (and (skip-many () (not #\*)) - (label "*/" (skip-many (:min 1) #\*))) - (not #\/)) - #\/)) - (and "//" - (skip-many () (not #\newline)) - (? #\newline)))))) - -(defmethod make-scanner-stream ((scanner sod-token-scanner)) - (make-scanner-stream (token-scanner-char-scanner scanner))) - -;;;-------------------------------------------------------------------------- -;;; Error reporting. - -(defvar *indicator-map* (make-hash-table) - "Hash table mapping indicator objects to human-readable descriptions.") - -(defun define-indicator (indicator description) - (setf (gethash indicator *indicator-map*) description) - indicator) - -(export 'syntax-error) -(defun syntax-error (scanner expected &key (continuep t)) - "Signal a (maybe) continuable syntax error." - (labels ((show-token (type value) - (if (characterp type) - (format nil "~/sod::show-char/" type) - (case type - (:id (format nil "" value)) - (:string "") - (:char "") - (:eof "") - (:ellipsis "`...'") - (t (format nil "" type value))))) - (show-expected (thing) - (acond ((gethash thing *indicator-map*) it) - ((atom thing) (show-token thing nil)) - ((eq (car thing) :id) - (format nil "`~A'" (cadr thing))) - (t (format nil "" thing))))) - (funcall (if continuep #'cerror* #'error) - "Syntax error: ~ - expected ~{~#[~;~A~;~A or ~A~:;~A, ~]~} ~ - but found ~A" - (mapcar #'show-expected expected) - (show-token (token-type scanner) (token-value scanner))))) - -(export 'lexer-error) -(defun lexer-error (char-scanner expected consumedp) - "Signal a continuable lexical error." - (cerror* "Lexical error: ~ - expected ~{~#[~;~A~;~A or ~A~;:~A, ~]~} ~ - but found ~/sod::show-char/~ - ~@[ at ~A~]" - (mapcar (lambda (exp) - (typecase exp - (character (format nil "~/sod::show-char/" exp)) - (string (format nil "`~A'" exp)) - ((cons (eql :digit) *) (format nil "" - (cadr exp))) - ((eql :eof) "") - ((eql :any) "") - (t (format nil "" exp)))) - expected) - (and (not (scanner-at-eof-p char-scanner)) - (scanner-current-char char-scanner)) - (and consumedp (file-location char-scanner)))) - -;;;-------------------------------------------------------------------------- -;;; Token scanner protocol implementation. - -(defmethod scanner-token ((scanner sod-token-scanner)) - (with-slots (char-scanner line column) scanner - (with-parser-context (character-scanner-context :scanner char-scanner) - - (flet ((scan-digits (&key (radix 10) (min 1) (init 0)) - ;; Scan an return a sequence of digits. - (parse (many (acc init (+ (* acc radix) it) :min min) - (label (list :digit radix) - (filter (lambda (ch) - (digit-char-p ch radix)))))))) - - ;; Skip initial junk, and remember the place. - (loop - (setf (scanner-line scanner) (scanner-line char-scanner) - (scanner-column scanner) (scanner-column char-scanner)) - (cond-parse (:consumedp cp :expected exp) - ((satisfies whitespace-char-p) (parse :whitespace)) - ((scan-comment char-scanner)) - (t (if cp (lexer-error char-scanner exp cp) (return))))) - - ;; Now parse something. - (cond-parse (:consumedp cp :expected exp) - - ;; Alphanumerics mean we read an identifier. - ((or #\_ (satisfies alpha-char-p)) - (values :id (with-output-to-string (out) - (write-char it out) - (parse (many (nil nil (write-char it out)) - (or #\_ (satisfies alphanumericp))))))) - - ;; Quotes introduce a literal. - ((seq ((quote (or #\" #\')) - (contents (many (out (make-string-output-stream) - (progn (write-char it out) out) - :final (get-output-stream-string out)) - (or (and #\\ :any) (not quote)))) - (nil (char quote))) - (ecase quote - (#\" contents) - (#\' (case (length contents) - (1 (char contents 0)) - (0 (cerror* "Empty character literal") #\?) - (t (cerror* "Too many characters in literal") - (char contents 0)))))) - (values (etypecase it - (character :char) - (string :string)) - it)) - - ;; Zero introduces a chosen-radix integer. - ((and #\0 - (or (and (or #\b #\B) (scan-digits :radix 2)) - (and (or #\o #\O) (scan-digits :radix 8)) - (and (or #\x #\X) (scan-digits :radix 16)) - (scan-digits :radix 8 :min 0))) - (values :int it)) - - ;; Any other digit forces radix-10. - ((seq ((d (filter digit-char-p)) - (i (scan-digits :radix 10 :min 0 :init d))) - i) - (values :int it)) - - ;; Some special punctuation sequences are single tokens. - ("..." (values :ellipsis nil)) - - ;; Any other character is punctuation. - (:any (values it nil)) - - ;; End of file means precisely that. - (:eof (values :eof nil)) - - ;; Report errors and try again. Because we must have consumed some - ;; input in order to get here (we've matched both :any and :eof) we - ;; must make progress on every call. - (t - (assert cp) - (lexer-error char-scanner exp cp) - (scanner-token scanner))))))) - -;;;----- That's all, folks -------------------------------------------------- diff --git a/src/module-impl.lisp b/src/module-impl.lisp index 753ca0a..5343ad0 100644 --- a/src/module-impl.lisp +++ b/src/module-impl.lisp @@ -91,6 +91,19 @@ (defun build-module (when (and truename (not (eq (module-state *module*) t))) (remhash truename *module-map*))))) +(defun call-with-temporary-module (thunk) + "Invoke THUNK in the context of a temporary module, returning its values. + + This is mainly useful for testing things which depend on module variables. + This is the functionality underlying `with-temporary-module'." + (let ((*module* (make-instance 'module + :name "" + :state nil))) + (call-with-module-environment + (lambda () + (module-import *builtin-module*) + (funcall thunk))))) + ;;;-------------------------------------------------------------------------- ;;; Type definitions. diff --git a/src/module-parse.lisp b/src/module-parse.lisp index 1989ebb..f87c586 100644 --- a/src/module-parse.lisp +++ b/src/module-parse.lisp @@ -159,6 +159,8 @@ (define-pluggable-parser module class (scanner) ;; `class' id [`:' id-list] `{' class-item* `}' (with-parser-context (token-scanner-context :scanner scanner) + (labels ((parse-item () + ;; class-item ::= property-set (parse (seq ("class" (name :id) (supers (? (seq (#\: (supers (list (:min 1) :id #\,))) diff --git a/src/module-proto.lisp b/src/module-proto.lisp index 93034a4..93b4f68 100644 --- a/src/module-proto.lisp +++ b/src/module-proto.lisp @@ -208,4 +208,9 @@ (defmacro define-module ,@(and truenamep `(:truename ,truename)) ,@(and locationp `(:location ,location)))) +(export 'with-temporary-module) +(defmacro with-temporary-module ((&key) &body body) + "Evaluate BODY within the context of a temporary module." + `(call-with-temporary-module (lambda () ,@body))) + ;;;----- That's all, folks -------------------------------------------------- diff --git a/src/parser/parser-test.lisp b/src/parser/parser-test.lisp index f25961e..6718d5c 100644 --- a/src/parser/parser-test.lisp +++ b/src/parser/parser-test.lisp @@ -400,10 +400,13 @@ (defun test-parse-expr (string) (seq (#\&) and) (seq (#\|) or))) (token (or (seq (#\() lp) + (seq (#\[) lb) (seq (#\-) neg) (seq (#\!) not))) (token (or (seq (#\!) fact) - (when nestedp (seq (#\)) rp)))))) + (when nestedp + (or (seq (#\)) rp) + (seq (#\]) rb))))))) (next (or :any (t :eof)))) (cons value next)))))) @@ -425,7 +428,6 @@ (def-test-method test-expression-parser ((test test-parser) :run nil) (assert-expr-parse "1 ** 2 ** 3" '((** 1 (** 2 3)) . :eof) t t) (assert-expr-parse "1 + 2) * 3" '((+ 1 2) . #\)) t t) (assert-expr-parse "1 + 2 * 3" '((+ 1 (* 2 3)) . :eof) t t) - (assert-expr-parse "! 1 + 2 = 3 | 6 - 3 /= 12/6" '((or (not (= (+ 1 2) 3)) (/= (- 6 3) (/ 12 6))) diff --git a/src/pset-parse.lisp b/src/pset-parse.lisp index d1e437e..be7984e 100644 --- a/src/pset-parse.lisp +++ b/src/pset-parse.lisp @@ -23,11 +23,55 @@ ;;; along with SOD; if not, write to the Free Software Foundation, ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -(defun play (args) - "Parse and evaluate a simple expression. +;;;-------------------------------------------------------------------------- +;;; The expression parser. + +(flet ((dispatch (name args &rest spec) + (acond ((find :invalid args :key #'car) + (cons :invalid nil)) + ((find-if (lambda (item) + (every (lambda (type arg) + (eql type (car arg))) + (cddr item) + args)) + spec) + (cons (car it) (apply (cadr it) + (mapcar #'cdr args)))) + (t + (cerror* "Type mismatch: operator `~A' applied to ~ + types ~{~(~A~)~#[~; and ~;, ~]~}" + name + (mapcar #'car args)) + (cons :invalid nil))))) + (let ((add (binop "+" (x y 5) + (dispatch "+" (list x y) (list :int #'+ :int :int)))) + (sub (binop "-" (x y 5) + (dispatch "-" (list x y) (list :int #'- :int :int)))) + (mul (binop "*" (x y 7) + (dispatch "*" (list x y) (list :int #'* :int :int)))) + (div (binop "/" (x y 7) + (dispatch "/" (list x y) + (list :int + (lambda (x y) + (cond ((zerop y) + (cerror* + "Division by zero") + (cons :invalid nil)) + (t + (floor x y)))) + :int :int)))) + (nop (preop "+" (x 9) + (dispatch "+" (list x) (list :int #'+ :int)))) + (neg (preop "-" (x 9) + (dispatch "-" (list x) (list :int #'- :int)))) + (lp (lparen #\))) + (rp (rparen #\)))) + + (defun parse-expression (scanner) + "Parse and evaluate a simple expression. The result is a pair (TYPE . VALUE). Currently, type types are `:id', - `:int', `:string', and `:char'. If an error prevented a sane ; value from + `:int', `:string', and `:char'. If an error prevented a sane value from being produced, the type `:invalid' is returned. The syntax of expressions is rather limited at the moment, but more may be @@ -39,66 +83,58 @@ (defun play (args) primary: int | id | string | `(' expression `)' | `?' lisp-expression Only operators for dealing with integers are provided." - - (labels ((type-dispatch (name args &rest spec) - (acond ((find :invalid args :key #'car) - (cons :invalid nil)) - ((find-if (lambda (item) - (every (lambda (type arg) - (eql type (car arg))) - (cddr item) - args)) - spec) - (cons (car it) (apply (cadr it) - (mapcar #'cdr args)))) - (t - (cerror* "Type mismatch: operator `~A' applied to ~ - types ~{~(~A~)~#[~; and ~;, ~]~}" - name - (mapcar #'car args)) - (cons :invalid nil)))) - (add (x y) (type-dispatch "+" (list x y) - (list :integer #'+ :integer :integer))) - (sub (x y) (type-dispatch "-" (list x y) - (list :integer #'- :integer :integer))) - (mul (x y) (type-dispatch "*" (list x y) - (list :integer #'* :integer :integer))) - (div (x y) (type-dispatch "/" (list x y) - (list :integer - (lambda (x y) - (cond ((zerop y) - (cerror* - "Division by zero") - (cons :invalid nil)) - (t - (floor x y)))) - :integer :integer))) - (nop (x) (type-dispatch "+" (list x) - (list :integer #'+ :integer))) - (neg (x) (type-dispatch "-" (list x) - (list :integer #'- :integer)))) - - (with-parser-context (token-scanner-context :scanner scanner) - (parse (expr (lisp (flet ((prop (type value) - (scanner-step scanner) - (values (cons type value) t t))) - (case (token-type scanner) - (:int - (prop :integer (token-value scanner))) - ((:id :char :string) - (prop (token-type scanner) (token-value scanner))) - (#\? - (let* ((stream (make-scanner-stream scanner)) - (sexp (read stream t))) + (with-parser-context (token-scanner-context :scanner scanner) + (parse (expr (:nestedp nestedp) + (lisp (flet ((prop (type value) (scanner-step scanner) - (values (cons (property-type sexp) sexp) - t t))) - (t - (values (list :int :id :char :string #\?) - nil nil))))) + (values (cons type value) t t))) + (case (token-type scanner) + ((:int :id :char :string) + (prop (token-type scanner) + (token-value scanner))) + (#\? + (let* ((stream (make-scanner-stream scanner)) + (sexp (read stream t))) + (scanner-step scanner) + (values (cons (property-type sexp) sexp) + t t))) + (t + (values (list :int :id :char :string #\?) + nil nil))))) + (or (seq (#\+) add) + (seq (#\-) sub) + (seq (#\*) mul) + (seq (#\/) div)) + (or (seq (#\() lp) + (seq (#\+) nop) + (seq (#\-) neg)) + (when nestedp (seq (#\)) rp)))))))) + +;;;-------------------------------------------------------------------------- +;;; Parsing property sets. (defun parse-property (scanner pset) "Parse a single property using the SCANNER; add it to the PSET." - ;; id `=' expression + ;; property ::= id `=' expression + (with-parser-context (token-scanner-context :scanner scanner) + (parse (seq ((name :id) #\= (result (parse-expression scanner))) + (let ((type (car result)) + (value (cdr result))) + (unless (eq type :invalid) + (add-property pset name value + :type type + :location scanner))))))) + +(export 'parse-property-set) +(defun parse-property-set (scanner) + "Parse an optional property set from the SCANNER and return it, or `nil'." + ;; property-set ::= [`[' property-list `]'] + (with-parser-context (token-scanner-context :scanner scanner) + (parse (? (seq (#\[ + (pset (many (pset (make-property-set) pset) + (parse-property scanner pset) + #\,)) + #\]) + pset))))) ;;;----- That's all, folks -------------------------------------------------- diff --git a/src/pset-proto.lisp b/src/pset-proto.lisp index e10e8b9..aafa306 100644 --- a/src/pset-proto.lisp +++ b/src/pset-proto.lisp @@ -42,7 +42,7 @@ (export 'property-type) (defgeneric property-type (value) (:documentation "Guess a sensible property type to use for VALUE.") (:method ((value symbol)) :symbol) - (:method ((value integer)) :integer) + (:method ((value integer)) :int) (:method ((value string)) :string) (:method ((value character)) :char) (:method (value) :other)) diff --git a/src/sod.asd b/src/sod.asd index 6c6d4fe..33b54c6 100644 --- a/src/sod.asd +++ b/src/sod.asd @@ -99,18 +99,20 @@ (:file "package" :depends-on ("parser")) + ;; Lexical analysis. + (:file "lexer-proto" :depends-on ("package" "parser")) + (:file "lexer-impl" :depends-on ("lexer-proto")) + (:file "fragment-parse" :depends-on ("lexer-proto")) + ;; C type representation protocol. (:file "c-types-proto" :depends-on ("package")) (:file "c-types-impl" :depends-on ("c-types-proto")) + (:file "c-types-parse" :depends-on ("c-types-proto" "fragment-parse")) ;; Property set protocol. (:file "pset-proto" :depends-on ("package")) (:file "pset-impl" :depends-on ("pset-proto")) - (:file "pset-parse" :depends-on ("pset-proto" "lexical-parse")) - - ;; Lexical analysis. - (:file "lexical-parse" :depends-on ("parser")) - (:file "fragment-parse" :depends-on ("lexical-parse")) + (:file "pset-parse" :depends-on ("pset-proto" "lexer-proto")) ;; Code generation protocol. (:file "codegen-proto" :depends-on ("package")) @@ -122,8 +124,9 @@ ("module-proto" "pset-proto" "c-types-class-impl" "builtin")) (:file "builtin" :depends-on ("module-proto" "pset-proto" "classes" "c-types-impl" "c-types-class-impl")) + #+no (:file "module-parse" :depends-on ("module-impl" - "lexical-parse" "fragment-parse")) + "lexer-proto" "fragment-parse")) ;; Output. (:file "output-proto" :depends-on ("package")) diff --git a/src/base-test.lisp b/src/test-base.lisp similarity index 100% rename from src/base-test.lisp rename to src/test-base.lisp -- [mdw]