Commit | Line | Data |
---|---|---|
bf090e02 MW |
1 | ;;; -*-lisp-*- |
2 | ;;; | |
3 | ;;; Parsing C fragments from a scanner | |
4 | ;;; | |
5 | ;;; (c) 2010 Straylight/Edgeware | |
6 | ;;; | |
7 | ||
8 | ;;;----- Licensing notice --------------------------------------------------- | |
9 | ;;; | |
e0808c47 | 10 | ;;; This file is part of the Sensible Object Design, an object system for C. |
bf090e02 MW |
11 | ;;; |
12 | ;;; SOD is free software; you can redistribute it and/or modify | |
13 | ;;; it under the terms of the GNU General Public License as published by | |
14 | ;;; the Free Software Foundation; either version 2 of the License, or | |
15 | ;;; (at your option) any later version. | |
16 | ;;; | |
17 | ;;; SOD is distributed in the hope that it will be useful, | |
18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;;; GNU General Public License for more details. | |
21 | ;;; | |
22 | ;;; You should have received a copy of the GNU General Public License | |
23 | ;;; along with SOD; if not, write to the Free Software Foundation, | |
24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
25 | ||
26 | (in-package #:sod) | |
27 | ||
28 | ;;;-------------------------------------------------------------------------- | |
29 | ;;; Fragment parsing. | |
30 | ||
31 | (export 'scan-c-fragment) | |
32 | (defun scan-c-fragment (scanner end-chars) | |
33 | "Parse a C fragment from the SCANNER. | |
34 | ||
c91b90c3 MW |
35 | SCANNER must be a `sod-token-scanner' instance. The END-CHARS are a |
36 | sequence of characters, any of which delimits the fragment. The | |
37 | delimiting character is left current in the scanner. | |
bf090e02 MW |
38 | |
39 | The parsing process is a simple approximation to C lexical analysis. It | |
40 | takes into account comments (both C and C++ style), string and character | |
41 | literals." | |
42 | ||
43 | (let ((char-scanner (token-scanner-char-scanner scanner)) | |
44 | (delim nil) | |
45 | (stack nil)) | |
46 | (with-parser-context (character-scanner-context :scanner char-scanner) | |
47 | ||
48 | ;; Hack. If the first character is a newline then discard it | |
49 | ;; immediately. If I don't, then the output will look strange and the | |
50 | ;; location information will be unhelpful. | |
51 | (parse #\newline) | |
52 | ||
53 | ;; This seems the easiest way of gathering stuff. | |
54 | (with-scanner-place (place char-scanner) | |
55 | ||
56 | (flet ((push-delim (d) | |
57 | (push delim stack) | |
58 | (setf delim d)) | |
59 | ||
60 | (result () | |
61 | (let* ((output (scanner-interval char-scanner place)) | |
62 | (end (position-if (lambda (char) | |
63 | (or (char= char #\newline) | |
64 | (not | |
65 | (whitespace-char-p char)))) | |
66 | output :from-end t)) | |
67 | (trimmed (if end (subseq output 0 (1+ end)) ""))) | |
68 | (make-instance 'c-fragment | |
69 | :location (file-location place) | |
70 | :text trimmed)))) | |
71 | ||
72 | ;; March through characters until we reach the end. | |
73 | (loop | |
74 | (cond-parse (:consumedp cp :expected exp) | |
75 | ||
76 | ;; Whitespace and comments are universally dull. | |
77 | ((satisfies whitespace-char-p) (parse :whitespace)) | |
78 | ((scan-comment char-scanner)) | |
79 | ||
c91b90c3 MW |
80 | ;; See if we've reached the end. We must leave the delimiter |
81 | ;; in the scanner, so `if-char' and its various friends aren't | |
82 | ;; appropriate. | |
83 | ((lisp (if (and (null delim) | |
84 | (member (scanner-current-char char-scanner) | |
85 | end-chars)) | |
86 | (values (result) t t) | |
87 | (values end-chars nil nil))) | |
bf090e02 MW |
88 | (return (values it t t))) |
89 | (:eof | |
90 | (lexer-error char-scanner '(:any) cp) | |
91 | (return (values (result) t t))) | |
92 | ||
93 | ;; Opening and closing brackets. Opening brackets push things | |
94 | ;; onto a stack; closing brackets pop things off again. | |
95 | (#\( (push-delim #\))) | |
96 | (#\[ (push-delim #\])) | |
97 | (#\{ (push-delim #\})) | |
98 | ((or #\) #\] #\}) | |
99 | (if (eql it delim) | |
100 | (setf delim (pop stack)) | |
101 | (cerror* "Unmatched `~C.'." it))) | |
102 | ||
103 | ;; String and character literals. | |
104 | ((seq ((quote (or #\" #\')) | |
105 | (nil (skip-many () | |
106 | (or (and #\\ :any) (not quote)))) | |
107 | (nil (char quote))))) | |
108 | ||
109 | ;; Anything else. | |
110 | (:any) | |
111 | ||
112 | ;; This really shouldn't be able to happen. | |
113 | (t | |
114 | (assert cp) | |
115 | (lexer-error char-scanner exp cp))))))))) | |
116 | ||
117 | (export 'parse-delimited-fragment) | |
c91b90c3 | 118 | (defun parse-delimited-fragment (scanner begin end &key keep-end) |
bf090e02 MW |
119 | "Parse a C fragment delimited by BEGIN and END. |
120 | ||
c91b90c3 MW |
121 | The BEGIN and END arguments are the start and end delimiters. BEGIN can |
122 | be any token type, but is usually a delimiter character; it may also be t | |
123 | to mean `don't care' -- but there must be an initial token of some kind | |
124 | for annoying technical reasons. END may be either a character or a list | |
125 | of characters. If KEEP-END is true, the trailing delimiter is left in the | |
126 | token scanner so that it's available for further parsing decisions: this | |
127 | is probably what you want if END is a list." | |
bf090e02 MW |
128 | |
129 | ;; This is decidedly nasty. The basic problem is that `scan-c-fragment' | |
130 | ;; works at the character level rather than at the lexical level, and if we | |
c91b90c3 MW |
131 | ;; commit to the BEGIN character too early then `scanner-step' will eat the |
132 | ;; first few characters of the fragment -- and then the rest of the parse | |
133 | ;; will get horrifically confused. | |
134 | ||
135 | (if (if (eq begin t) | |
136 | (not (scanner-at-eof-p scanner)) | |
137 | (eql (token-type scanner) begin)) | |
138 | (multiple-value-prog1 (values (scan-c-fragment scanner | |
139 | (if (listp end) | |
140 | end | |
141 | (list end))) | |
142 | t t) | |
143 | (scanner-step scanner) | |
144 | (unless keep-end (scanner-step scanner))) | |
bf090e02 MW |
145 | (values (list begin) nil nil))) |
146 | ||
147 | ;;;----- That's all, folks -------------------------------------------------- |