;;;----- Licensing notice ---------------------------------------------------
;;;
-;;; This file is part of the Sensble Object Design, an object system for C.
+;;; This file is part of the Sensible Object Design, an object system for C.
;;;
;;; SOD is free software; you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
(defun scan-c-fragment (scanner end-chars)
"Parse a C fragment from the SCANNER.
- SCANNER must be a `sod-token-scanner' instance.
+ SCANNER must be a `sod-token-scanner' instance. The END-CHARS are a
+ sequence of characters, any of which delimits the fragment. The
+ delimiting character is left current in the scanner.
The parsing process is a simple approximation to C lexical analysis. It
takes into account comments (both C and C++ style), string and character
literals."
- (let ((char-scanner (token-scanner-char-scanner scanner))
- (delim nil)
- (stack nil))
+ (let* ((char-scanner (token-scanner-char-scanner scanner))
+ (delim-match nil) (delim-found nil) (delim-loc nil)
+ (stack nil) (start nil) (tokstart nil) (eofwhine t))
(with-parser-context (character-scanner-context :scanner char-scanner)
;; Hack. If the first character is a newline then discard it
(parse #\newline)
;; This seems the easiest way of gathering stuff.
+ (setf start (file-location char-scanner))
(with-scanner-place (place char-scanner)
- (flet ((push-delim (d)
- (push delim stack)
- (setf delim d))
+ (flet ((push-delim (found match)
+ (push (list delim-found delim-match delim-loc) stack)
+ (setf delim-found found
+ delim-match match
+ delim-loc tokstart))
+
+ (pop-delim ()
+ (destructuring-bind (found match loc) (pop stack)
+ (setf delim-found found
+ delim-match match
+ delim-loc loc)))
(result ()
(let* ((output (scanner-interval char-scanner place))
;; March through characters until we reach the end.
(loop
+ (setf tokstart (file-location char-scanner))
(cond-parse (:consumedp cp :expected exp)
;; Whitespace and comments are universally dull.
((satisfies whitespace-char-p) (parse :whitespace))
((scan-comment char-scanner))
- ;; See if we've reached the end. There's a small trick here: I
- ;; capture the result in the `if-char' consequent to ensure
- ;; that we don't include the delimiter.
- ((if-char () (and (null delim) (member it end-chars))
- (values (result) t t)
- (values end-chars nil nil))
+ ;; See if we've reached the end. We must leave the delimiter
+ ;; in the scanner, so `if-char' and its various friends aren't
+ ;; appropriate.
+ ((lisp (if (and (null delim-match)
+ (not (scanner-at-eof-p char-scanner))
+ (member (scanner-current-char char-scanner)
+ end-chars))
+ (values (result) t t)
+ (values end-chars nil nil)))
(return (values it t t)))
(:eof
- (lexer-error char-scanner '(:any) cp)
+ (when eofwhine
+ (lexer-error char-scanner nil))
+ (loop
+ (unless delim-found (return))
+ (info-with-location delim-loc
+ "Unmatched `~C' found here" delim-found)
+ (pop-delim))
+ (info-with-location start "C fragment started here")
(return (values (result) t t)))
;; Opening and closing brackets. Opening brackets push things
- ;; onto a stack; closing brackets pop things off again.
- (#\( (push-delim #\)))
- (#\[ (push-delim #\]))
- (#\{ (push-delim #\}))
- ((or #\) #\] #\})
- (if (eql it delim)
- (setf delim (pop stack))
- (cerror* "Unmatched `~C.'." it)))
+ ;; onto a stack; closing brackets pop things off again. Pop a
+ ;; bracket even if it doesn't match, to encourage progress
+ ;; towards finding an end-delimiter.
+ (#\( (push-delim #\( #\)))
+ (#\[ (push-delim #\[ #\]))
+ (#\{ (push-delim #\{ #\}))
+ ((lisp (let ((char (scanner-current-char char-scanner)))
+ (case char
+ ((#\) #\] #\})
+ (unless (eql char delim-match)
+ (lexer-error char-scanner
+ (and delim-match
+ (list delim-match)))
+ (when delim-loc
+ (info-with-location
+ delim-loc
+ "Mismatched `~C' found here" delim-found)))
+ (scanner-step char-scanner)
+ (when delim-match (pop-delim))
+ (values char t t))
+ (t
+ (values '(#\) #\] #\}) nil nil))))))
;; String and character literals.
((seq ((quote (or #\" #\'))
(nil (skip-many ()
- (or (and #\\ :any) (not quote))))
- (nil (char quote)))))
+ (or (and #\\ :any) (not quote))))
+ (nil (or (char quote)
+ (seq (:eof)
+ (lexer-error char-scanner (list quote))
+ (info-with-location tokstart
+ "Literal started here")
+ (setf eofwhine nil)))))))
;; Anything else.
(:any)
;; This really shouldn't be able to happen.
(t
(assert cp)
- (lexer-error char-scanner exp cp)))))))))
+ (when (scanner-at-eof-p char-scanner)
+ (setf eofwhine nil))
+ (lexer-error char-scanner exp)))))))))
(export 'parse-delimited-fragment)
-(defun parse-delimited-fragment (scanner begin end)
+(defun parse-delimited-fragment (scanner begin end &key keep-end)
"Parse a C fragment delimited by BEGIN and END.
- The BEGIN and END arguments are characters. (Currently, BEGIN can be any
- token type, but you probably shouldn't rely on this.)"
+ The BEGIN and END arguments are the start and end delimiters. BEGIN can
+ be any token type, but is usually a delimiter character; it may also be t
+ to mean `don't care' -- but there must be an initial token of some kind
+ for annoying technical reasons. END may be either a character or a list
+ of characters. If KEEP-END is true, the trailing delimiter is left in the
+ token scanner so that it's available for further parsing decisions: this
+ is probably what you want if END is a list."
;; This is decidedly nasty. The basic problem is that `scan-c-fragment'
;; works at the character level rather than at the lexical level, and if we
- ;; commit to the `[' too early then `scanner-step' will eat the first few
- ;; characters of the fragment -- and then the rest of the parse will get
- ;; horrifically confused.
-
- (if (eql (token-type scanner) begin)
- (multiple-value-prog1 (values (scan-c-fragment scanner (list end)) t t)
- (scanner-step scanner))
+ ;; commit to the BEGIN character too early then `scanner-step' will eat the
+ ;; first few characters of the fragment -- and then the rest of the parse
+ ;; will get horrifically confused.
+
+ (if (if (eq begin t)
+ (not (scanner-at-eof-p scanner))
+ (eql (token-type scanner) begin))
+ (multiple-value-prog1
+ (values (scan-c-fragment scanner
+ (if (listp end) end
+ (list end)))
+ t
+ t)
+ (scanner-step scanner)
+ (unless keep-end (scanner-step scanner)))
(values (list begin) nil nil)))
;;;----- That's all, folks --------------------------------------------------