Commit | Line | Data |
---|---|---|
bf090e02 MW |
1 | ;;; -*-lisp-*- |
2 | ;;; | |
3 | ;;; Parsing C fragments from a scanner | |
4 | ;;; | |
5 | ;;; (c) 2010 Straylight/Edgeware | |
6 | ;;; | |
7 | ||
8 | ;;;----- Licensing notice --------------------------------------------------- | |
9 | ;;; | |
e0808c47 | 10 | ;;; This file is part of the Sensible Object Design, an object system for C. |
bf090e02 MW |
11 | ;;; |
12 | ;;; SOD is free software; you can redistribute it and/or modify | |
13 | ;;; it under the terms of the GNU General Public License as published by | |
14 | ;;; the Free Software Foundation; either version 2 of the License, or | |
15 | ;;; (at your option) any later version. | |
16 | ;;; | |
17 | ;;; SOD is distributed in the hope that it will be useful, | |
18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;;; GNU General Public License for more details. | |
21 | ;;; | |
22 | ;;; You should have received a copy of the GNU General Public License | |
23 | ;;; along with SOD; if not, write to the Free Software Foundation, | |
24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
25 | ||
26 | (in-package #:sod) | |
27 | ||
28 | ;;;-------------------------------------------------------------------------- | |
29 | ;;; Fragment parsing. | |
30 | ||
31 | (export 'scan-c-fragment) | |
32 | (defun scan-c-fragment (scanner end-chars) | |
33 | "Parse a C fragment from the SCANNER. | |
34 | ||
c91b90c3 MW |
35 | SCANNER must be a `sod-token-scanner' instance. The END-CHARS are a |
36 | sequence of characters, any of which delimits the fragment. The | |
37 | delimiting character is left current in the scanner. | |
bf090e02 MW |
38 | |
39 | The parsing process is a simple approximation to C lexical analysis. It | |
40 | takes into account comments (both C and C++ style), string and character | |
41 | literals." | |
42 | ||
e046c3f6 MW |
43 | (let* ((char-scanner (token-scanner-char-scanner scanner)) |
44 | (delim-match nil) (delim-found nil) (delim-loc nil) | |
45 | (stack nil) (start nil) (tokstart nil) (eofwhine t)) | |
bf090e02 MW |
46 | (with-parser-context (character-scanner-context :scanner char-scanner) |
47 | ||
48 | ;; Hack. If the first character is a newline then discard it | |
49 | ;; immediately. If I don't, then the output will look strange and the | |
50 | ;; location information will be unhelpful. | |
51 | (parse #\newline) | |
52 | ||
53 | ;; This seems the easiest way of gathering stuff. | |
e046c3f6 | 54 | (setf start (file-location char-scanner)) |
bf090e02 MW |
55 | (with-scanner-place (place char-scanner) |
56 | ||
e046c3f6 MW |
57 | (flet ((push-delim (found match) |
58 | (push (list delim-found delim-match delim-loc) stack) | |
59 | (setf delim-found found | |
60 | delim-match match | |
61 | delim-loc tokstart)) | |
62 | ||
63 | (pop-delim () | |
64 | (destructuring-bind (found match loc) (pop stack) | |
65 | (setf delim-found found | |
66 | delim-match match | |
67 | delim-loc loc))) | |
bf090e02 MW |
68 | |
69 | (result () | |
70 | (let* ((output (scanner-interval char-scanner place)) | |
71 | (end (position-if (lambda (char) | |
72 | (or (char= char #\newline) | |
73 | (not | |
74 | (whitespace-char-p char)))) | |
75 | output :from-end t)) | |
76 | (trimmed (if end (subseq output 0 (1+ end)) ""))) | |
77 | (make-instance 'c-fragment | |
78 | :location (file-location place) | |
79 | :text trimmed)))) | |
80 | ||
81 | ;; March through characters until we reach the end. | |
82 | (loop | |
e046c3f6 | 83 | (setf tokstart (file-location char-scanner)) |
bf090e02 MW |
84 | (cond-parse (:consumedp cp :expected exp) |
85 | ||
86 | ;; Whitespace and comments are universally dull. | |
87 | ((satisfies whitespace-char-p) (parse :whitespace)) | |
88 | ((scan-comment char-scanner)) | |
89 | ||
c91b90c3 MW |
90 | ;; See if we've reached the end. We must leave the delimiter |
91 | ;; in the scanner, so `if-char' and its various friends aren't | |
92 | ;; appropriate. | |
e046c3f6 MW |
93 | ((lisp (if (and (null delim-match) |
94 | (not (scanner-at-eof-p char-scanner)) | |
c91b90c3 MW |
95 | (member (scanner-current-char char-scanner) |
96 | end-chars)) | |
97 | (values (result) t t) | |
98 | (values end-chars nil nil))) | |
bf090e02 MW |
99 | (return (values it t t))) |
100 | (:eof | |
e046c3f6 MW |
101 | (when eofwhine |
102 | (lexer-error char-scanner nil)) | |
103 | (loop | |
104 | (unless delim-found (return)) | |
105 | (info-with-location delim-loc | |
106 | "Unmatched `~C' found here" delim-found) | |
107 | (pop-delim)) | |
108 | (info-with-location start "C fragment started here") | |
bf090e02 MW |
109 | (return (values (result) t t))) |
110 | ||
111 | ;; Opening and closing brackets. Opening brackets push things | |
e046c3f6 MW |
112 | ;; onto a stack; closing brackets pop things off again. Pop a |
113 | ;; bracket even if it doesn't match, to encourage progress | |
114 | ;; towards finding an end-delimiter. | |
115 | (#\( (push-delim #\( #\))) | |
116 | (#\[ (push-delim #\[ #\])) | |
117 | (#\{ (push-delim #\{ #\})) | |
118 | ((lisp (let ((char (scanner-current-char char-scanner))) | |
119 | (case char | |
120 | ((#\) #\] #\}) | |
121 | (unless (eql char delim-match) | |
122 | (lexer-error char-scanner | |
123 | (and delim-match | |
124 | (list delim-match))) | |
125 | (when delim-loc | |
126 | (info-with-location | |
127 | delim-loc | |
128 | "Mismatched `~C' found here" delim-found))) | |
129 | (scanner-step char-scanner) | |
130 | (when delim-match (pop-delim)) | |
131 | (values char t t)) | |
132 | (t | |
133 | (values '(#\) #\] #\}) nil nil)))))) | |
bf090e02 MW |
134 | |
135 | ;; String and character literals. | |
136 | ((seq ((quote (or #\" #\')) | |
137 | (nil (skip-many () | |
e046c3f6 MW |
138 | (or (and #\\ :any) (not quote)))) |
139 | (nil (or (char quote) | |
140 | (seq (:eof) | |
141 | (lexer-error char-scanner (list quote)) | |
142 | (info-with-location tokstart | |
143 | "Literal started here") | |
144 | (setf eofwhine nil))))))) | |
bf090e02 MW |
145 | |
146 | ;; Anything else. | |
147 | (:any) | |
148 | ||
149 | ;; This really shouldn't be able to happen. | |
150 | (t | |
151 | (assert cp) | |
e046c3f6 MW |
152 | (when (scanner-at-eof-p char-scanner) |
153 | (setf eofwhine nil)) | |
26c5ecfe | 154 | (lexer-error char-scanner exp))))))))) |
bf090e02 MW |
155 | |
156 | (export 'parse-delimited-fragment) | |
c91b90c3 | 157 | (defun parse-delimited-fragment (scanner begin end &key keep-end) |
bf090e02 MW |
158 | "Parse a C fragment delimited by BEGIN and END. |
159 | ||
c91b90c3 MW |
160 | The BEGIN and END arguments are the start and end delimiters. BEGIN can |
161 | be any token type, but is usually a delimiter character; it may also be t | |
162 | to mean `don't care' -- but there must be an initial token of some kind | |
163 | for annoying technical reasons. END may be either a character or a list | |
164 | of characters. If KEEP-END is true, the trailing delimiter is left in the | |
165 | token scanner so that it's available for further parsing decisions: this | |
166 | is probably what you want if END is a list." | |
bf090e02 MW |
167 | |
168 | ;; This is decidedly nasty. The basic problem is that `scan-c-fragment' | |
169 | ;; works at the character level rather than at the lexical level, and if we | |
c91b90c3 MW |
170 | ;; commit to the BEGIN character too early then `scanner-step' will eat the |
171 | ;; first few characters of the fragment -- and then the rest of the parse | |
172 | ;; will get horrifically confused. | |
173 | ||
174 | (if (if (eq begin t) | |
175 | (not (scanner-at-eof-p scanner)) | |
176 | (eql (token-type scanner) begin)) | |
caa08874 MW |
177 | (multiple-value-prog1 |
178 | (values (scan-c-fragment scanner | |
179 | (if (listp end) end | |
180 | (list end))) | |
181 | t | |
182 | t) | |
c91b90c3 MW |
183 | (scanner-step scanner) |
184 | (unless keep-end (scanner-step scanner))) | |
bf090e02 MW |
185 | (values (list begin) nil nil))) |
186 | ||
187 | ;;;----- That's all, folks -------------------------------------------------- |