| 1 | ;;; -*-lisp-*- |
| 2 | ;;; |
| 3 | ;;; Implementation of lexical analysis protocol. |
| 4 | ;;; |
| 5 | ;;; (c) 2009 Straylight/Edgeware |
| 6 | ;;; |
| 7 | |
| 8 | ;;;----- Licensing notice --------------------------------------------------- |
| 9 | ;;; |
| 10 | ;;; This file is part of the Sensible Object Design, an object system for C. |
| 11 | ;;; |
| 12 | ;;; SOD is free software; you can redistribute it and/or modify |
| 13 | ;;; it under the terms of the GNU General Public License as published by |
| 14 | ;;; the Free Software Foundation; either version 2 of the License, or |
| 15 | ;;; (at your option) any later version. |
| 16 | ;;; |
| 17 | ;;; SOD is distributed in the hope that it will be useful, |
| 18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | ;;; GNU General Public License for more details. |
| 21 | ;;; |
| 22 | ;;; You should have received a copy of the GNU General Public License |
| 23 | ;;; along with SOD; if not, write to the Free Software Foundation, |
| 24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
| 25 | |
| 26 | (cl:in-package #:sod) |
| 27 | |
| 28 | ;;;-------------------------------------------------------------------------- |
| 29 | ;;; Class implementation. |
| 30 | |
| 31 | (defmethod shared-initialize :after |
| 32 | ((scanner sod-token-scanner) slot-names &key) |
| 33 | (default-slot (scanner 'sod-parser::filename slot-names) |
| 34 | (scanner-filename (token-scanner-char-scanner scanner)))) |
| 35 | |
| 36 | (defmethod make-scanner-stream ((scanner sod-token-scanner)) |
| 37 | (make-scanner-stream (token-scanner-char-scanner scanner))) |
| 38 | |
| 39 | ;;;-------------------------------------------------------------------------- |
| 40 | ;;; Indicators and error messages. |
| 41 | |
| 42 | (defun show-char (stream char &optional colonp atsignp) |
| 43 | "Format CHAR to STREAM in a readable way. |
| 44 | |
| 45 | Usable in `format''s ~/.../ command." |
| 46 | (declare (ignore colonp atsignp)) |
| 47 | (cond ((null char) (write-string "<end-of-file>" stream)) |
| 48 | ((and (graphic-char-p char) (char/= char #\space)) |
| 49 | (format stream "`~C'" char)) |
| 50 | (t (format stream "<~(~:C~)>" char)))) |
| 51 | |
| 52 | (defun skip-until (scanner token-types &key keep-end) |
| 53 | "This is the implementation of the `skip-until' parser." |
| 54 | (do ((consumedp nil t)) |
| 55 | ((member (token-type scanner) token-types) |
| 56 | (unless keep-end (scanner-step scanner)) |
| 57 | (values nil t (or keep-end consumedp))) |
| 58 | (when (scanner-at-eof-p scanner) |
| 59 | (return (values token-types nil consumedp))) |
| 60 | (scanner-step scanner))) |
| 61 | |
| 62 | (defun parse-error-recover (scanner parser recover |
| 63 | &key ignore-unconsumed force-progress) |
| 64 | "This is the implementation of the `error' parser." |
| 65 | (multiple-value-bind (result win consumedp) (funcall parser) |
| 66 | (cond ((or win |
| 67 | (and (not consumedp) |
| 68 | (or ignore-unconsumed |
| 69 | (scanner-at-eof-p scanner)))) |
| 70 | ;; If we succeeded, or if we didn't consume any tokens and the |
| 71 | ;; caller's OK with that, then there's nothing for us to do here. |
| 72 | ;; On the other hand, if we failed, didn't consume any tokens, and |
| 73 | ;; we're at end-of-file, then there's not much hope of making |
| 74 | ;; onward progress, so in this case we propagate the failure |
| 75 | ;; rather than trying to recover. And we assume that the |
| 76 | ;; continuation will somehow arrange to report the problem, and |
| 77 | ;; avoid inundating the user with error reports. |
| 78 | (values result win consumedp)) |
| 79 | (t |
| 80 | ;; Now we have to do some kind of sensible error recovery. The |
| 81 | ;; important thing to do here is to make sure that we make some |
| 82 | ;; progress. If we consumed any tokens then we're fine, and we'll |
| 83 | ;; just try the provided recovery strategy. Otherwise, if we're |
| 84 | ;; not at EOF, then we can ensure progress by discarding the |
| 85 | ;; current token. Finally, if we are at EOF then our best bet is |
| 86 | ;; simply to propagate the current failure back to the caller, but |
| 87 | ;; we handled that case above. |
| 88 | (syntax-error scanner result) |
| 89 | (when (and force-progress (not consumedp)) (scanner-step scanner)) |
| 90 | (funcall recover))))) |
| 91 | |
| 92 | ;;;-------------------------------------------------------------------------- |
| 93 | ;;; Token scanning. |
| 94 | |
| 95 | (defmethod scanner-token ((scanner sod-token-scanner)) |
| 96 | (with-slots (char-scanner line column) scanner |
| 97 | (with-parser-context (character-scanner-context :scanner char-scanner) |
| 98 | |
| 99 | (flet ((scan-digits (&key (radix 10) (min 1) (init 0)) |
| 100 | ;; Scan and return a sequence of digits. |
| 101 | (parse (many (acc init (+ (* acc radix) it) :min min) |
| 102 | (label (list :digit radix) |
| 103 | (filter (lambda (ch) |
| 104 | (digit-char-p ch radix))))))) |
| 105 | (start-floc () |
| 106 | ;; This is a little nasty. We scan the first token during |
| 107 | ;; instance initialization, as a result of `shared-initialize' |
| 108 | ;; on `token-scanner'. Unfortunately, this happens before |
| 109 | ;; we've had a chance to initialize our own `filename' slot. |
| 110 | ;; This means that we can't use the SCANNER as a file |
| 111 | ;; location, however tempting it might be. So we have this |
| 112 | ;; hack. |
| 113 | (make-file-location (scanner-filename char-scanner) |
| 114 | (scanner-line scanner) |
| 115 | (scanner-column scanner)))) |
| 116 | |
| 117 | ;; Skip initial junk, and remember the place. |
| 118 | (loop |
| 119 | (setf (scanner-line scanner) (scanner-line char-scanner) |
| 120 | (scanner-column scanner) (scanner-column char-scanner)) |
| 121 | (cond-parse (:consumedp cp :expected exp) |
| 122 | ((satisfies whitespace-char-p) (parse :whitespace)) |
| 123 | ((scan-comment char-scanner)) |
| 124 | (t (if cp (lexer-error char-scanner exp) (return))))) |
| 125 | |
| 126 | ;; Now parse something. |
| 127 | (cond-parse (:consumedp cp :expected exp) |
| 128 | |
| 129 | ;; Alphanumerics mean we read an identifier. |
| 130 | ((or #\_ (satisfies alpha-char-p)) |
| 131 | (values :id (with-output-to-string (out) |
| 132 | (write-char it out) |
| 133 | (parse (many (nil nil (write-char it out)) |
| 134 | (or #\_ (satisfies alphanumericp))))))) |
| 135 | |
| 136 | ;; Quotes introduce a literal. |
| 137 | ((seq ((quote (or #\" #\')) |
| 138 | (contents (many (out (make-string-output-stream) |
| 139 | (progn (write-char it out) out) |
| 140 | :final (get-output-stream-string out)) |
| 141 | (or (and #\\ :any) (not quote)))) |
| 142 | (nil (or (char quote) |
| 143 | (seq (:eof) |
| 144 | (lexer-error char-scanner (list quote)) |
| 145 | (info-with-location |
| 146 | (start-floc) "Literal started here"))))) |
| 147 | (ecase quote |
| 148 | (#\" contents) |
| 149 | (#\' (case (length contents) |
| 150 | (1 (char contents 0)) |
| 151 | (0 (cerror*-with-location (start-floc) |
| 152 | "Lexical error: ~ |
| 153 | empty character literal") |
| 154 | #\?) |
| 155 | (t (cerror*-with-location (start-floc) |
| 156 | "Lexical error: ~ |
| 157 | too many characters ~ |
| 158 | in literal") |
| 159 | (char contents 0)))))) |
| 160 | (values (etypecase it |
| 161 | (character :char) |
| 162 | (string :string)) |
| 163 | it)) |
| 164 | |
| 165 | ;; Zero introduces a chosen-radix integer. |
| 166 | ((and #\0 |
| 167 | (or (and (or #\b #\B) (scan-digits :radix 2)) |
| 168 | (and (or #\o #\O) (scan-digits :radix 8)) |
| 169 | (and (or #\x #\X) (scan-digits :radix 16)) |
| 170 | (scan-digits :radix 8 :min 0))) |
| 171 | (values :int it)) |
| 172 | |
| 173 | ;; Any other digit forces radix-10. |
| 174 | ((seq ((d (filter digit-char-p)) |
| 175 | (i (scan-digits :radix 10 :min 0 :init d))) |
| 176 | i) |
| 177 | (values :int it)) |
| 178 | |
| 179 | ;; Some special punctuation sequences are single tokens. |
| 180 | ("..." (values :ellipsis nil)) |
| 181 | |
| 182 | ;; Any other character is punctuation. |
| 183 | (:any (values it nil)) |
| 184 | |
| 185 | ;; End of file means precisely that. |
| 186 | (:eof (values :eof nil)) |
| 187 | |
| 188 | ;; Report errors and try again. Because we must have consumed some |
| 189 | ;; input in order to get here (we've matched both :any and :eof) we |
| 190 | ;; must make progress on every call. |
| 191 | (t |
| 192 | (assert cp) |
| 193 | (lexer-error char-scanner exp) |
| 194 | (scanner-token scanner))))))) |
| 195 | |
| 196 | ;;;----- That's all, folks -------------------------------------------------- |