;;; -*-lisp-*- ;;; ;;; Parsing C fragments from a scanner ;;; ;;; (c) 2010 Straylight/Edgeware ;;; ;;;----- Licensing notice --------------------------------------------------- ;;; ;;; This file is part of the Sensible Object Design, an object system for C. ;;; ;;; SOD is free software; you can redistribute it and/or modify ;;; it under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 2 of the License, or ;;; (at your option) any later version. ;;; ;;; SOD is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with SOD; if not, write to the Free Software Foundation, ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. (in-package #:sod) ;;;-------------------------------------------------------------------------- ;;; Fragment parsing. (export 'scan-c-fragment) (defun scan-c-fragment (scanner end-chars) "Parse a C fragment from the SCANNER. SCANNER must be a `sod-token-scanner' instance. The END-CHARS are a sequence of characters, any of which delimits the fragment. The delimiting character is left current in the scanner. The parsing process is a simple approximation to C lexical analysis. It takes into account comments (both C and C++ style), string and character literals." (let ((char-scanner (token-scanner-char-scanner scanner)) (delim nil) (stack nil)) (with-parser-context (character-scanner-context :scanner char-scanner) ;; Hack. If the first character is a newline then discard it ;; immediately. If I don't, then the output will look strange and the ;; location information will be unhelpful. (parse #\newline) ;; This seems the easiest way of gathering stuff. (with-scanner-place (place char-scanner) (flet ((push-delim (d) (push delim stack) (setf delim d)) (result () (let* ((output (scanner-interval char-scanner place)) (end (position-if (lambda (char) (or (char= char #\newline) (not (whitespace-char-p char)))) output :from-end t)) (trimmed (if end (subseq output 0 (1+ end)) ""))) (make-instance 'c-fragment :location (file-location place) :text trimmed)))) ;; March through characters until we reach the end. (loop (cond-parse (:consumedp cp :expected exp) ;; Whitespace and comments are universally dull. ((satisfies whitespace-char-p) (parse :whitespace)) ((scan-comment char-scanner)) ;; See if we've reached the end. We must leave the delimiter ;; in the scanner, so `if-char' and its various friends aren't ;; appropriate. ((lisp (if (and (null delim) (member (scanner-current-char char-scanner) end-chars)) (values (result) t t) (values end-chars nil nil))) (return (values it t t))) (:eof (lexer-error char-scanner '(:any) cp) (return (values (result) t t))) ;; Opening and closing brackets. Opening brackets push things ;; onto a stack; closing brackets pop things off again. (#\( (push-delim #\))) (#\[ (push-delim #\])) (#\{ (push-delim #\})) ((or #\) #\] #\}) (if (eql it delim) (setf delim (pop stack)) (cerror* "Unmatched `~C.'." it))) ;; String and character literals. ((seq ((quote (or #\" #\')) (nil (skip-many () (or (and #\\ :any) (not quote)))) (nil (char quote))))) ;; Anything else. (:any) ;; This really shouldn't be able to happen. (t (assert cp) (lexer-error char-scanner exp cp))))))))) (export 'parse-delimited-fragment) (defun parse-delimited-fragment (scanner begin end &key keep-end) "Parse a C fragment delimited by BEGIN and END. The BEGIN and END arguments are the start and end delimiters. BEGIN can be any token type, but is usually a delimiter character; it may also be t to mean `don't care' -- but there must be an initial token of some kind for annoying technical reasons. END may be either a character or a list of characters. If KEEP-END is true, the trailing delimiter is left in the token scanner so that it's available for further parsing decisions: this is probably what you want if END is a list." ;; This is decidedly nasty. The basic problem is that `scan-c-fragment' ;; works at the character level rather than at the lexical level, and if we ;; commit to the BEGIN character too early then `scanner-step' will eat the ;; first few characters of the fragment -- and then the rest of the parse ;; will get horrifically confused. (if (if (eq begin t) (not (scanner-at-eof-p scanner)) (eql (token-type scanner) begin)) (multiple-value-prog1 (values (scan-c-fragment scanner (if (listp end) end (list end))) t t) (scanner-step scanner) (unless keep-end (scanner-step scanner))) (values (list begin) nil nil))) ;;;----- That's all, folks --------------------------------------------------