chiark / gitweb /
src/method-impl.lisp: Initialize `suppliedp' flags properly.
[sod] / src / fragment-parse.lisp
... / ...
CommitLineData
1;;; -*-lisp-*-
2;;;
3;;; Parsing C fragments from a scanner
4;;;
5;;; (c) 2010 Straylight/Edgeware
6;;;
7
8;;;----- Licensing notice ---------------------------------------------------
9;;;
10;;; This file is part of the Sensible Object Design, an object system for C.
11;;;
12;;; SOD is free software; you can redistribute it and/or modify
13;;; it under the terms of the GNU General Public License as published by
14;;; the Free Software Foundation; either version 2 of the License, or
15;;; (at your option) any later version.
16;;;
17;;; SOD is distributed in the hope that it will be useful,
18;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;;; GNU General Public License for more details.
21;;;
22;;; You should have received a copy of the GNU General Public License
23;;; along with SOD; if not, write to the Free Software Foundation,
24;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26(in-package #:sod)
27
28;;;--------------------------------------------------------------------------
29;;; Fragment parsing.
30
31(export 'scan-c-fragment)
32(defun scan-c-fragment (scanner end-chars)
33 "Parse a C fragment from the SCANNER.
34
35 SCANNER must be a `sod-token-scanner' instance. The END-CHARS are a
36 sequence of characters, any of which delimits the fragment. The
37 delimiting character is left current in the scanner.
38
39 The parsing process is a simple approximation to C lexical analysis. It
40 takes into account comments (both C and C++ style), string and character
41 literals."
42
43 (let ((char-scanner (token-scanner-char-scanner scanner))
44 (delim nil)
45 (stack nil))
46 (with-parser-context (character-scanner-context :scanner char-scanner)
47
48 ;; Hack. If the first character is a newline then discard it
49 ;; immediately. If I don't, then the output will look strange and the
50 ;; location information will be unhelpful.
51 (parse #\newline)
52
53 ;; This seems the easiest way of gathering stuff.
54 (with-scanner-place (place char-scanner)
55
56 (flet ((push-delim (d)
57 (push delim stack)
58 (setf delim d))
59
60 (result ()
61 (let* ((output (scanner-interval char-scanner place))
62 (end (position-if (lambda (char)
63 (or (char= char #\newline)
64 (not
65 (whitespace-char-p char))))
66 output :from-end t))
67 (trimmed (if end (subseq output 0 (1+ end)) "")))
68 (make-instance 'c-fragment
69 :location (file-location place)
70 :text trimmed))))
71
72 ;; March through characters until we reach the end.
73 (loop
74 (cond-parse (:consumedp cp :expected exp)
75
76 ;; Whitespace and comments are universally dull.
77 ((satisfies whitespace-char-p) (parse :whitespace))
78 ((scan-comment char-scanner))
79
80 ;; See if we've reached the end. We must leave the delimiter
81 ;; in the scanner, so `if-char' and its various friends aren't
82 ;; appropriate.
83 ((lisp (if (and (null delim)
84 (member (scanner-current-char char-scanner)
85 end-chars))
86 (values (result) t t)
87 (values end-chars nil nil)))
88 (return (values it t t)))
89 (:eof
90 (lexer-error char-scanner '(:any) cp)
91 (return (values (result) t t)))
92
93 ;; Opening and closing brackets. Opening brackets push things
94 ;; onto a stack; closing brackets pop things off again.
95 (#\( (push-delim #\)))
96 (#\[ (push-delim #\]))
97 (#\{ (push-delim #\}))
98 ((or #\) #\] #\})
99 (if (eql it delim)
100 (setf delim (pop stack))
101 (cerror* "Unmatched `~C.'." it)))
102
103 ;; String and character literals.
104 ((seq ((quote (or #\" #\'))
105 (nil (skip-many ()
106 (or (and #\\ :any) (not quote))))
107 (nil (char quote)))))
108
109 ;; Anything else.
110 (:any)
111
112 ;; This really shouldn't be able to happen.
113 (t
114 (assert cp)
115 (lexer-error char-scanner exp cp)))))))))
116
117(export 'parse-delimited-fragment)
118(defun parse-delimited-fragment (scanner begin end &key keep-end)
119 "Parse a C fragment delimited by BEGIN and END.
120
121 The BEGIN and END arguments are the start and end delimiters. BEGIN can
122 be any token type, but is usually a delimiter character; it may also be t
123 to mean `don't care' -- but there must be an initial token of some kind
124 for annoying technical reasons. END may be either a character or a list
125 of characters. If KEEP-END is true, the trailing delimiter is left in the
126 token scanner so that it's available for further parsing decisions: this
127 is probably what you want if END is a list."
128
129 ;; This is decidedly nasty. The basic problem is that `scan-c-fragment'
130 ;; works at the character level rather than at the lexical level, and if we
131 ;; commit to the BEGIN character too early then `scanner-step' will eat the
132 ;; first few characters of the fragment -- and then the rest of the parse
133 ;; will get horrifically confused.
134
135 (if (if (eq begin t)
136 (not (scanner-at-eof-p scanner))
137 (eql (token-type scanner) begin))
138 (multiple-value-prog1 (values (scan-c-fragment scanner
139 (if (listp end)
140 end
141 (list end)))
142 t t)
143 (scanner-step scanner)
144 (unless keep-end (scanner-step scanner)))
145 (values (list begin) nil nil)))
146
147;;;----- That's all, folks --------------------------------------------------