chiark / gitweb /
doc/concepts.tex: Typeset method rĂ´le names as identifiers.
[sod] / src / fragment-parse.lisp
CommitLineData
bf090e02
MW
1;;; -*-lisp-*-
2;;;
3;;; Parsing C fragments from a scanner
4;;;
5;;; (c) 2010 Straylight/Edgeware
6;;;
7
8;;;----- Licensing notice ---------------------------------------------------
9;;;
e0808c47 10;;; This file is part of the Sensible Object Design, an object system for C.
bf090e02
MW
11;;;
12;;; SOD is free software; you can redistribute it and/or modify
13;;; it under the terms of the GNU General Public License as published by
14;;; the Free Software Foundation; either version 2 of the License, or
15;;; (at your option) any later version.
16;;;
17;;; SOD is distributed in the hope that it will be useful,
18;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;;; GNU General Public License for more details.
21;;;
22;;; You should have received a copy of the GNU General Public License
23;;; along with SOD; if not, write to the Free Software Foundation,
24;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26(in-package #:sod)
27
28;;;--------------------------------------------------------------------------
29;;; Fragment parsing.
30
31(export 'scan-c-fragment)
32(defun scan-c-fragment (scanner end-chars)
33 "Parse a C fragment from the SCANNER.
34
c91b90c3
MW
35 SCANNER must be a `sod-token-scanner' instance. The END-CHARS are a
36 sequence of characters, any of which delimits the fragment. The
37 delimiting character is left current in the scanner.
bf090e02
MW
38
39 The parsing process is a simple approximation to C lexical analysis. It
40 takes into account comments (both C and C++ style), string and character
41 literals."
42
e046c3f6
MW
43 (let* ((char-scanner (token-scanner-char-scanner scanner))
44 (delim-match nil) (delim-found nil) (delim-loc nil)
45 (stack nil) (start nil) (tokstart nil) (eofwhine t))
bf090e02
MW
46 (with-parser-context (character-scanner-context :scanner char-scanner)
47
48 ;; Hack. If the first character is a newline then discard it
49 ;; immediately. If I don't, then the output will look strange and the
50 ;; location information will be unhelpful.
51 (parse #\newline)
52
53 ;; This seems the easiest way of gathering stuff.
e046c3f6 54 (setf start (file-location char-scanner))
bf090e02
MW
55 (with-scanner-place (place char-scanner)
56
e046c3f6
MW
57 (flet ((push-delim (found match)
58 (push (list delim-found delim-match delim-loc) stack)
59 (setf delim-found found
60 delim-match match
61 delim-loc tokstart))
62
63 (pop-delim ()
64 (destructuring-bind (found match loc) (pop stack)
65 (setf delim-found found
66 delim-match match
67 delim-loc loc)))
bf090e02
MW
68
69 (result ()
70 (let* ((output (scanner-interval char-scanner place))
71 (end (position-if (lambda (char)
72 (or (char= char #\newline)
73 (not
74 (whitespace-char-p char))))
75 output :from-end t))
76 (trimmed (if end (subseq output 0 (1+ end)) "")))
77 (make-instance 'c-fragment
78 :location (file-location place)
79 :text trimmed))))
80
81 ;; March through characters until we reach the end.
82 (loop
e046c3f6 83 (setf tokstart (file-location char-scanner))
bf090e02
MW
84 (cond-parse (:consumedp cp :expected exp)
85
86 ;; Whitespace and comments are universally dull.
87 ((satisfies whitespace-char-p) (parse :whitespace))
88 ((scan-comment char-scanner))
89
c91b90c3
MW
90 ;; See if we've reached the end. We must leave the delimiter
91 ;; in the scanner, so `if-char' and its various friends aren't
92 ;; appropriate.
e046c3f6
MW
93 ((lisp (if (and (null delim-match)
94 (not (scanner-at-eof-p char-scanner))
c91b90c3
MW
95 (member (scanner-current-char char-scanner)
96 end-chars))
97 (values (result) t t)
98 (values end-chars nil nil)))
bf090e02
MW
99 (return (values it t t)))
100 (:eof
e046c3f6
MW
101 (when eofwhine
102 (lexer-error char-scanner nil))
103 (loop
104 (unless delim-found (return))
105 (info-with-location delim-loc
106 "Unmatched `~C' found here" delim-found)
107 (pop-delim))
108 (info-with-location start "C fragment started here")
bf090e02
MW
109 (return (values (result) t t)))
110
111 ;; Opening and closing brackets. Opening brackets push things
e046c3f6
MW
112 ;; onto a stack; closing brackets pop things off again. Pop a
113 ;; bracket even if it doesn't match, to encourage progress
114 ;; towards finding an end-delimiter.
115 (#\( (push-delim #\( #\)))
116 (#\[ (push-delim #\[ #\]))
117 (#\{ (push-delim #\{ #\}))
118 ((lisp (let ((char (scanner-current-char char-scanner)))
119 (case char
120 ((#\) #\] #\})
121 (unless (eql char delim-match)
122 (lexer-error char-scanner
123 (and delim-match
124 (list delim-match)))
125 (when delim-loc
126 (info-with-location
127 delim-loc
128 "Mismatched `~C' found here" delim-found)))
129 (scanner-step char-scanner)
130 (when delim-match (pop-delim))
131 (values char t t))
132 (t
133 (values '(#\) #\] #\}) nil nil))))))
bf090e02
MW
134
135 ;; String and character literals.
136 ((seq ((quote (or #\" #\'))
137 (nil (skip-many ()
e046c3f6
MW
138 (or (and #\\ :any) (not quote))))
139 (nil (or (char quote)
140 (seq (:eof)
141 (lexer-error char-scanner (list quote))
142 (info-with-location tokstart
143 "Literal started here")
144 (setf eofwhine nil)))))))
bf090e02
MW
145
146 ;; Anything else.
147 (:any)
148
149 ;; This really shouldn't be able to happen.
150 (t
151 (assert cp)
e046c3f6
MW
152 (when (scanner-at-eof-p char-scanner)
153 (setf eofwhine nil))
26c5ecfe 154 (lexer-error char-scanner exp)))))))))
bf090e02
MW
155
156(export 'parse-delimited-fragment)
c91b90c3 157(defun parse-delimited-fragment (scanner begin end &key keep-end)
bf090e02
MW
158 "Parse a C fragment delimited by BEGIN and END.
159
c91b90c3
MW
160 The BEGIN and END arguments are the start and end delimiters. BEGIN can
161 be any token type, but is usually a delimiter character; it may also be t
162 to mean `don't care' -- but there must be an initial token of some kind
163 for annoying technical reasons. END may be either a character or a list
164 of characters. If KEEP-END is true, the trailing delimiter is left in the
165 token scanner so that it's available for further parsing decisions: this
166 is probably what you want if END is a list."
bf090e02
MW
167
168 ;; This is decidedly nasty. The basic problem is that `scan-c-fragment'
169 ;; works at the character level rather than at the lexical level, and if we
c91b90c3
MW
170 ;; commit to the BEGIN character too early then `scanner-step' will eat the
171 ;; first few characters of the fragment -- and then the rest of the parse
172 ;; will get horrifically confused.
173
174 (if (if (eq begin t)
175 (not (scanner-at-eof-p scanner))
176 (eql (token-type scanner) begin))
caa08874
MW
177 (multiple-value-prog1
178 (values (scan-c-fragment scanner
179 (if (listp end) end
180 (list end)))
181 t
182 t)
c91b90c3
MW
183 (scanner-step scanner)
184 (unless keep-end (scanner-step scanner)))
bf090e02
MW
185 (values (list begin) nil nil)))
186
187;;;----- That's all, folks --------------------------------------------------