chiark / gitweb /
src/method-impl.lisp: Initialize `suppliedp' flags properly.
[sod] / src / fragment-parse.lisp
CommitLineData
bf090e02
MW
1;;; -*-lisp-*-
2;;;
3;;; Parsing C fragments from a scanner
4;;;
5;;; (c) 2010 Straylight/Edgeware
6;;;
7
8;;;----- Licensing notice ---------------------------------------------------
9;;;
e0808c47 10;;; This file is part of the Sensible Object Design, an object system for C.
bf090e02
MW
11;;;
12;;; SOD is free software; you can redistribute it and/or modify
13;;; it under the terms of the GNU General Public License as published by
14;;; the Free Software Foundation; either version 2 of the License, or
15;;; (at your option) any later version.
16;;;
17;;; SOD is distributed in the hope that it will be useful,
18;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;;; GNU General Public License for more details.
21;;;
22;;; You should have received a copy of the GNU General Public License
23;;; along with SOD; if not, write to the Free Software Foundation,
24;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26(in-package #:sod)
27
28;;;--------------------------------------------------------------------------
29;;; Fragment parsing.
30
31(export 'scan-c-fragment)
32(defun scan-c-fragment (scanner end-chars)
33 "Parse a C fragment from the SCANNER.
34
c91b90c3
MW
35 SCANNER must be a `sod-token-scanner' instance. The END-CHARS are a
36 sequence of characters, any of which delimits the fragment. The
37 delimiting character is left current in the scanner.
bf090e02
MW
38
39 The parsing process is a simple approximation to C lexical analysis. It
40 takes into account comments (both C and C++ style), string and character
41 literals."
42
43 (let ((char-scanner (token-scanner-char-scanner scanner))
44 (delim nil)
45 (stack nil))
46 (with-parser-context (character-scanner-context :scanner char-scanner)
47
48 ;; Hack. If the first character is a newline then discard it
49 ;; immediately. If I don't, then the output will look strange and the
50 ;; location information will be unhelpful.
51 (parse #\newline)
52
53 ;; This seems the easiest way of gathering stuff.
54 (with-scanner-place (place char-scanner)
55
56 (flet ((push-delim (d)
57 (push delim stack)
58 (setf delim d))
59
60 (result ()
61 (let* ((output (scanner-interval char-scanner place))
62 (end (position-if (lambda (char)
63 (or (char= char #\newline)
64 (not
65 (whitespace-char-p char))))
66 output :from-end t))
67 (trimmed (if end (subseq output 0 (1+ end)) "")))
68 (make-instance 'c-fragment
69 :location (file-location place)
70 :text trimmed))))
71
72 ;; March through characters until we reach the end.
73 (loop
74 (cond-parse (:consumedp cp :expected exp)
75
76 ;; Whitespace and comments are universally dull.
77 ((satisfies whitespace-char-p) (parse :whitespace))
78 ((scan-comment char-scanner))
79
c91b90c3
MW
80 ;; See if we've reached the end. We must leave the delimiter
81 ;; in the scanner, so `if-char' and its various friends aren't
82 ;; appropriate.
83 ((lisp (if (and (null delim)
84 (member (scanner-current-char char-scanner)
85 end-chars))
86 (values (result) t t)
87 (values end-chars nil nil)))
bf090e02
MW
88 (return (values it t t)))
89 (:eof
90 (lexer-error char-scanner '(:any) cp)
91 (return (values (result) t t)))
92
93 ;; Opening and closing brackets. Opening brackets push things
94 ;; onto a stack; closing brackets pop things off again.
95 (#\( (push-delim #\)))
96 (#\[ (push-delim #\]))
97 (#\{ (push-delim #\}))
98 ((or #\) #\] #\})
99 (if (eql it delim)
100 (setf delim (pop stack))
101 (cerror* "Unmatched `~C.'." it)))
102
103 ;; String and character literals.
104 ((seq ((quote (or #\" #\'))
105 (nil (skip-many ()
106 (or (and #\\ :any) (not quote))))
107 (nil (char quote)))))
108
109 ;; Anything else.
110 (:any)
111
112 ;; This really shouldn't be able to happen.
113 (t
114 (assert cp)
115 (lexer-error char-scanner exp cp)))))))))
116
117(export 'parse-delimited-fragment)
c91b90c3 118(defun parse-delimited-fragment (scanner begin end &key keep-end)
bf090e02
MW
119 "Parse a C fragment delimited by BEGIN and END.
120
c91b90c3
MW
121 The BEGIN and END arguments are the start and end delimiters. BEGIN can
122 be any token type, but is usually a delimiter character; it may also be t
123 to mean `don't care' -- but there must be an initial token of some kind
124 for annoying technical reasons. END may be either a character or a list
125 of characters. If KEEP-END is true, the trailing delimiter is left in the
126 token scanner so that it's available for further parsing decisions: this
127 is probably what you want if END is a list."
bf090e02
MW
128
129 ;; This is decidedly nasty. The basic problem is that `scan-c-fragment'
130 ;; works at the character level rather than at the lexical level, and if we
c91b90c3
MW
131 ;; commit to the BEGIN character too early then `scanner-step' will eat the
132 ;; first few characters of the fragment -- and then the rest of the parse
133 ;; will get horrifically confused.
134
135 (if (if (eq begin t)
136 (not (scanner-at-eof-p scanner))
137 (eql (token-type scanner) begin))
138 (multiple-value-prog1 (values (scan-c-fragment scanner
139 (if (listp end)
140 end
141 (list end)))
142 t t)
143 (scanner-step scanner)
144 (unless keep-end (scanner-step scanner)))
bf090e02
MW
145 (values (list begin) nil nil)))
146
147;;;----- That's all, folks --------------------------------------------------