3 ;;; Infix-to-S-exp translation
5 ;;; (c) 2006 Mark Wooding
8 ;;;----- Licensing notice ---------------------------------------------------
10 ;;; This program is free software; you can redistribute it and/or modify
11 ;;; it under the terms of the GNU General Public License as published by
12 ;;; the Free Software Foundation; either version 2 of the License, or
13 ;;; (at your option) any later version.
15 ;;; This program is distributed in the hope that it will be useful,
16 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;;; GNU General Public License for more details.
20 ;;; You should have received a copy of the GNU General Public License
21 ;;; along with this program; if not, write to the Free Software Foundation,
22 ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24 ;;;--------------------------------------------------------------------------
27 (defpackage #:infix-keywords
29 (:export #:|(| #:|)| #:{ #:} #:|,| #:@ #:|$| #:& #:\| #:~
30 #:and #:or #:not #:xor
31 #:== #:/= #:< #:<= #:> #:>= #:eq #:eql #:equal #:equalp
32 #:+ #:- #:* #:/ #:// #:% #:^ #:= #:!
33 #:+= #:-= #:*= #:%= #:&= #:\|= #:xor= #:<<= #:>>=
41 (:use #:common-lisp #:infix-keywords)
42 (:export #:operator #:operatorp
43 #:*token* #:get-token #:*get-token*
44 #:pushval #:popval #:flushops #:pushop
45 #:infix-done #:parse-infix
46 #:defopfunc #:definfix #:defprefix #:defpostfix
47 #:infix #:prefix #:postfix #:operand
49 #:binop-apply #:binop-apply-append
50 #:unop-apply #:unop-apply-toggle
52 #:read-infix #:install-infix-reader))
56 ;;;--------------------------------------------------------------------------
59 (defstruct (operator (:predicate operatorp)
61 "An operator object. The name serves mainly for documentation. The left
62 and right precedences control operator stacking behaviour. The function
63 is called when this operator is popped off the stack.
65 If the left precedence is not nil, then operators currently on the stack
66 whose /right/-precedence is greater than or equal to this operator's
67 /left/-precedence are popped before this operator can be pushed. If the
68 right precedence is nil, then this operator is not in fact pushed, but
69 processed immediately."
70 (name nil :type symbol)
71 (lprec nil :type (or fixnum null))
72 (rprec nil :type (or fixnum null))
73 (func (lambda () nil) :type #-ecl (function () t) #+ecl function))
75 ;;;--------------------------------------------------------------------------
76 ;;; Global parser state.
79 "The parser input stream. Bound automatically by `read-infix'.")
81 ;;;--------------------------------------------------------------------------
82 ;;; State for one level of `parse-infix'.
85 "Value stack. Contains (partially constructed) Lisp forms.")
87 "Operator stack. Contains operator objects.")
89 "The current token. Could be any Lisp object.")
90 (defvar *paren-depth* 0
91 "Depth of parentheses in the current `parse-infix'. Used to override the
92 minprec restriction.")
94 ;;;--------------------------------------------------------------------------
97 (defconstant eof (cons :eof nil)
98 "A magical object which `get-token' returns at end-of-file.")
100 (defun default-get-token ()
101 "Read a token from *stream* and store it in *token*."
102 (flet ((whitespacep (ch)
103 (member ch '(#\newline #\space #\tab #\page)))
105 (member ch '(#\; #\, #\: #\( #\) #\@ #\$ #\[ #\] #\{ #\})))
107 (member ch '(#\# #\| #\\ #\" #\' #\`)))
110 (return-from default-get-token)))
114 (setf ch (read-char *stream* nil nil t))
115 (cond ((null ch) (done eof))
116 ((whitespacep ch) (go top))
117 ((eql ch #\;) (go comment))
118 ((self-delim-p ch) (done (intern (string ch)
120 ((or (macro-char-p ch) (alphanumericp ch)) (go read))
123 (unread-char ch *stream*)
124 (done (read *stream* t nil t))
126 (done (intern (with-output-to-string (out)
129 (setf ch (read-char *stream* nil nil t))
133 ((or (self-delim-p ch)
136 (unread-char ch *stream*)
139 (write-char ch out)))))
143 (case (setf ch (read-char *stream* nil nil t))
145 ((#\newline) (go top))
146 (t (go comment)))))))
148 (defvar *get-token* #'default-get-token
149 "The current tokenizing function.")
152 "Read a token, and store it in *token*. Indirects via *get-token*."
153 (funcall *get-token*))
155 ;;;--------------------------------------------------------------------------
156 ;;; Stack manipulation.
159 "Push VAL onto the value stack."
163 "Pop a value off the value stack and return it."
166 (defun flushops (prec)
167 "Flush out operators on the operator stack with precedecnce higher than or
168 equal to PREC. This is used when a new operator is pushed, to ensure that
169 higher-precedence operators snarf their arguments."
173 (let ((head (car *opstk*)))
174 (when (> prec (op-rprec head))
177 (funcall (op-func head)))))
180 "Push the operator OP onto the stack. If the operator has a
181 left-precedence, then operators with higher precedence are flushed (see
182 `flushops'). If the operator has no left-precedence, the operator is
183 invoked immediately."
184 (let ((lp (op-lprec op)))
189 (funcall (op-func op))))
191 ;;;--------------------------------------------------------------------------
195 "Signal that `parse-infix' has reached the end of an expression. This is
196 primarily used by the `)' handler function if it finds there are no
198 (throw 'infix-done nil))
200 (defun parse-infix (&optional minprec)
201 "Parses an infix expression and return the resulting Lisp form. This is
202 the heart of the whole thing.
204 Expects a token to be ready in *token*; leaves *token* as the first token
205 which couldn't be parsed.
207 The syntax parsed by this function doesn't fit nicely into a BNF, since we
208 parsing is effected by the precedences of the various operators. We have
209 low-precedence prefix operators such as `not', for example."
210 (flet ((lookup (items)
211 (dolist (item items (values nil nil))
212 (let ((op (get *token* (car item))))
213 (when op (return (values op (cdr item))))))))
222 (when (eq *token* eof)
223 (error "operand expected; found eof"))
226 (multiple-value-bind (op newstate)
227 (lookup '((prefix . :operand)
228 (operand . :operator)))
233 (setf state :operator))
236 (setf state newstate))
243 (setf state :operator))))
247 (multiple-value-bind (op newstate)
248 (lookup '((infix . :operand)
249 (postfix . :operator)))
257 (zerop *paren-depth*)
259 (< (op-lprec op) minprec))
263 (setf state newstate)))
266 (flushops most-negative-fixnum)
267 (assert (and (consp *valstk*)
268 (null (cdr *valstk*))))
271 ;;;--------------------------------------------------------------------------
272 ;;; Machinery for defining operators.
274 (defmacro defopfunc (op kind &body body)
275 "Defines a magical operator. The operator's name is the symbol OP. The
276 KIND must be one of the symbols `infix', `prefix' or `postfix'. The body
277 is evaluated when the operator is parsed, and must either push appropriate
278 things on the operator stack or do its own parsing and push a result on
281 (setf (get ',op ',kind)
285 (defmacro definfix (op prec &body body)
286 "Defines an infix operator. The operator's name is the symbol OP. The
287 operator's precedence is specified by PREC, which may be one of the
290 * PREC -- equivalent to (:lassoc PREC)
291 * (:lassoc PREC) -- left-associative with precedence PREC
292 * (:rassoc PREC) -- right-associative with precedence PREC
293 * (LPREC . RPREC) -- independent left- and right-precedences
294 * (LPREC RPREC) -- synonym for the dotted form
296 In fact, (:lassoc PREC) is the same as (PREC . PREC), and (:rassoc PREC)
297 is the same as (PREC . (1- PREC)).
299 The BODY is evaluated when the operator's arguments are fully resolved.
300 It should pop off two arguments and push one result. Nobody will check
301 that this is done correctly."
305 (error "bad precedence spec ~S" prec)))
306 (cond ((integerp prec)
310 ((and (integerp (car prec))
311 (integerp (cdr prec)))
312 (values (car prec) (cdr prec)))
313 ((or (not (consp (cdr prec)))
314 (not (integerp (cadr prec)))
315 (not (null (cddr prec))))
317 ((integerp (car prec))
318 (values (car prec) (cadr prec)))
319 ((eq (car prec) :lassoc)
320 (values (cadr prec) (cadr prec)))
321 ((eq (car prec) :rassoc)
322 (values (cadr prec) (1- (cadr prec))))
326 (setf (get ',op 'infix)
327 (make-operator :name ',op
328 :lprec ,lprec :rprec ,rprec
329 :func (lambda () ,@body)))
332 (eval-when (:compile-toplevel :load-toplevel)
333 (defun do-defunary (kind op prec body)
334 (unless (integerp prec)
335 (error "bad precedence spec ~S" prec))
337 (setf (get ',op ',kind)
338 (make-operator :name ',op
341 (postfix :lprec)) ,prec
342 :func (lambda () ,@body)))
344 (defmacro defprefix (op prec &body body)
345 "Defines a prefix operator. The operator's name is the symbol OP. The
346 operator's (right) precedence is PREC. The body is evaluated with the
347 operator's argument is fully determined. It should pop off one argument
348 and push one result."
349 (do-defunary 'prefix op prec body))
350 (defmacro defpostfix (op prec &body body)
351 "Defines a postfix operator. The operator's name is the symbol OP. The
352 operator's (left) precedence is PREC. The body is evaluated with the
353 operator's argument is fully determined. It should pop off one argument
354 and push one result."
355 (do-defunary 'postfix op prec body))
357 ;;;--------------------------------------------------------------------------
358 ;;; Infrastructure for operator definitions.
360 (defun delim (delim &optional (requiredp t))
361 "Parse DELIM, and read the next token. Returns t if the DELIM was found,
362 or nil if not (and REQUIREDP was nil)."
363 (cond ((eq *token* delim) (get-token) t)
364 (requiredp (error "expected `~(~A~)'; found ~S" delim *token*))
367 (defun errfunc (&rest args)
368 "Returns a function which reports an error. Useful when constructing
370 (lambda () (apply #'error args)))
372 (defun binop-apply (name)
373 "Apply the Lisp binop NAME to the top two items on the value stack; i.e.,
374 if the top two items are Y and X, then we push (NAME X Y)."
375 (let ((y (popval)) (x (popval)))
376 (pushval (list name x y))))
378 (defun binop-apply-append (name)
379 "As for `binop-apply' but if the second-from-top item on the stack has the
380 form (NAME SOMETHING ...) then fold the top item into the form rather than
382 (let ((y (popval)) (x (popval)))
383 (pushval (if (and (consp x) (eq (car x) name))
387 (defun unop-apply (name)
388 "Apply the Lisp unop NAME to the top item on the value stack; i.e., if the
389 top item is X, then push (NAME X)."
390 (pushval (list name (popval))))
392 (defun unop-apply-toggle (name)
393 "As for `unop-apply', but if the top item has the form (NAME X) already,
396 (pushval (if (and (consp x)
403 (defun strip-progn (form)
404 "Return a version of FORM suitable for putting somewhere where there's an
405 implicit `progn'. If FORM has the form (PROGN . FOO) then return FOO,
406 otherwise return (FORM)."
407 (if (and (consp form)
408 (eq (car form) 'progn))
412 (defun parse-expr-list ()
413 "Parse a list of expressions separated by commas."
416 (push (parse-infix 0) stuff)
417 (unless (delim '|,| nil)
421 (defun parse-ident-list ()
422 "Parse a list of symbols separated by commas."
425 (unless (symbolp *token*)
426 (error "expected symbol; found ~S" *token*))
429 (unless (delim '|,| nil)
433 ;;;--------------------------------------------------------------------------
434 ;;; Various simple operators.
436 (definfix |,| (:lassoc -1) (binop-apply-append 'progn))
438 (definfix or (:lassoc 10) (binop-apply-append 'or))
439 (definfix and (:lassoc 15) (binop-apply-append 'and))
441 (defprefix not 19 (unop-apply-toggle 'not))
443 (definfix == (:lassoc 20) (binop-apply-append '=))
444 (definfix /= (:lassoc 20) (binop-apply-append '/=))
445 (definfix < (:lassoc 20) (binop-apply-append '<))
446 (definfix <= (:lassoc 20) (binop-apply-append '<=))
447 (definfix >= (:lassoc 20) (binop-apply-append '>=))
448 (definfix > (:lassoc 20) (binop-apply-append '>))
449 (definfix eq (:lassoc 20) (binop-apply-append 'eq))
450 (definfix eql (:lassoc 20) (binop-apply-append 'eql))
451 (definfix equal (:lassoc 20) (binop-apply-append 'equal))
452 (definfix equalp (:lassoc 20) (binop-apply-append 'equalp))
454 (definfix \| (:lassoc 30) (binop-apply-append 'logior))
455 (definfix xor (:lassoc 30) (binop-apply-append 'logxor))
456 (definfix & (:lassoc 35) (binop-apply-append 'logand))
458 (definfix << (:lassoc 40) (binop-apply 'ash))
459 (definfix >> (:lassoc 40) (unop-apply-toggle '-) (binop-apply 'ash))
461 (definfix + (:lassoc 50) (binop-apply-append '+))
462 (definfix - (:lassoc 50) (binop-apply-append '-))
464 (definfix * (:lassoc 60) (binop-apply-append '*))
465 (definfix / (:lassoc 60) (binop-apply '/))
466 (definfix // (:lassoc 60) (binop-apply 'floor))
467 (definfix % (:lassoc 60) (binop-apply 'mod))
469 (definfix ^ (:rassoc 70) (binop-apply 'expt))
471 (definfix = (120 . 5) (binop-apply 'setf))
472 (definfix += (120 . 5) (binop-apply 'incf))
473 (definfix -= (120 . 5) (binop-apply 'decf))
475 (defprefix + 100 nil)
476 (defprefix - 100 (unop-apply-toggle '-))
477 (defprefix ~ 100 (unop-apply-toggle 'lognot))
479 (defprefix ++ 100 (unop-apply 'incf))
480 (defprefix -- 100 (unop-apply 'decf))
482 ;;(defpostfix ! 110 (unop-apply 'factorial))
485 "An escape to the standard Lisp reader."
486 (pushval (read *stream* t nil t))
489 ;;;--------------------------------------------------------------------------
490 ;;; Parentheses, for grouping and function-calls.
492 (defun push-paren (right)
493 "Pushes a funny parenthesis operator. Since this operator has no left
494 precedence, and very low right precedence, it is pushed over any stack of
495 operators and can only be popped by magic or end-of-file. In the latter
496 case, cause an error."
497 (pushop (make-operator :name right
498 :lprec nil :rprec -1000
499 :func (errfunc "missing `~A'" right)))
503 (defun pop-paren (right)
504 "Pops a parenthesis. If there are no parentheses, maybe they belong to the
505 caller's syntax. Otherwise, pop off operators above the current funny
506 parenthesis operator, and then remove it."
507 (when (zerop *paren-depth*)
511 (unless (eq (op-name (car *opstk*)) right)
512 (error "spurious `~A'" right))
513 (assert (plusp *paren-depth*))
518 (defopfunc |(| prefix (push-paren '\)))
519 (defopfunc |)| postfix (pop-paren '\)))
520 (defopfunc |{| prefix (push-paren '\}))
521 (defopfunc |}| postfix (pop-paren '\}))
523 (defopfunc |(| postfix
525 (pushval (cons (popval) (and (not (eq *token* '|)|)) (parse-expr-list))))
528 ;;;--------------------------------------------------------------------------
529 ;;; Various bits of special syntax.
531 (defopfunc if operand
532 "Parse an `if' form. Syntax:
534 IF ::= `if' CONDITION `then' CONSEQUENCE [`else' ALTERNATIVE]
536 We parse this into an `if' where sensible, or into a `cond' if we see an
537 `else if' pair. The usual `dangling else' rule is followed."
540 (setf cond (parse-infix))
542 (setf cons (parse-infix 0))
543 (if (not (eq *token* 'else))
544 (pushval (list 'if cond cons))
547 (cond ((not (eq *token* 'if))
548 (pushval (list 'if cond cons (parse-infix 0))))
551 (flet ((clause (cond cons)
552 (push (cons cond (strip-progn cons)) clauses)))
556 (setf cond (parse-infix))
558 (setf cons (parse-infix 0))
560 (unless (eq *token* 'else) (return))
565 (clause t (parse-infix 0))
567 (pushval (cons 'cond (nreverse clauses)))))))))))
569 (defun do-letlike (kind)
570 "Parse a `let' form. Syntax:
572 LET ::= `let' | `let*' VARS `in' EXPR
573 VARS ::= VAR | VARS `,' VAR
574 VAR ::= NAME [`=' VALUE]
576 Translates into the obvious Lisp code."
577 (let ((clauses nil) name value)
580 (unless (symbolp *token*)
581 (error "symbol expected, found ~S" *token*))
587 (setf value (parse-infix 0))
588 (push (list name value) clauses))
590 (unless (eq *token* '|,|)
594 (pushval `(,kind ,(nreverse clauses) ,@(strip-progn (parse-infix 0))))))
595 (defopfunc let operand (do-letlike 'let))
596 (defopfunc let* operand (do-letlike 'let*))
598 (defopfunc when operand
600 (pushval `(when ,(parse-infix)
601 ,@(progn (delim 'do) (strip-progn (parse-infix 0))))))
603 (defopfunc unless operand
605 (pushval `(unless ,(parse-infix)
606 ,@(progn (delim 'do) (strip-progn (parse-infix 0))))))
608 (defopfunc loop operand
610 (pushval `(loop ,@(strip-progn (parse-infix 0)))))
612 (defopfunc bind operand
614 (let ((ids (parse-ident-list))
615 (valform (progn (delim '=) (parse-infix 0)))
616 (body (if (delim '|,| nil)
620 (strip-progn (parse-infix 0))))))
621 (list (if (and ids (null (cdr ids)))
622 `(let ((,(car ids) ,valform)) ,@body)
623 `(multiple-value-bind ,ids ,valform ,@body))))))
625 (pushval (car (loop)))))
627 ;;;--------------------------------------------------------------------------
628 ;;; Parsing function bodies and lambda lists.
630 (defun parse-lambda-list ()
631 "Parse an infix-form lambda list and return the Lisp equivalent."
632 (flet ((ampersand-symbol-p (thing)
634 (let ((name (symbol-name thing)))
635 (plusp (length name))
636 (char= (char name 0) #\&))))
639 (when (or (eq *token* '&)
641 (unread-char #\& *stream*)
642 (setf *token* (read *stream* t nil t)))))
644 (let ((*get-token* #'get-lambda-token))
646 (unless (eq *token* '|)|)
649 (cond ((ampersand-symbol-p *token*)
652 (when (eq *token* '|)|)
657 (let ((name *token*))
660 (push (list name (parse-infix 0)) args)
665 (when (delim '|,| nil)
671 (defun parse-func-name ()
672 "Parse a function name and return its Lisp equivalent."
673 (cond ((delim '|(| nil)
674 (prog1 (parse-infix) (delim '|)|)))
675 (t (prog1 *token* (get-token)))))
677 (defopfunc lambda operand
679 (pushval `(lambda ,(parse-lambda-list) ,@(strip-progn (parse-infix 0)))))
681 (defun do-defunlike (kind)
682 "Process a defun-like form."
684 (pushval `(,kind ,(parse-func-name) ,(parse-lambda-list)
685 ,@(strip-progn (parse-infix 0)))))
687 (defopfunc defun operand (do-defunlike 'defun))
688 (defopfunc defmacro operand (do-defunlike 'defmacro))
690 (defun do-fletlike (kind)
691 "Process a flet-like form."
695 (push `(,(parse-func-name) ,(parse-lambda-list)
696 ,@(strip-progn (parse-infix 0)))
698 (unless (delim '|,| nil)
701 (pushval `(,kind ,(nreverse clauses) ,@(strip-progn (parse-infix 0))))))
703 (defopfunc flet operand (do-fletlike 'flet))
704 (defopfunc labels operand (do-fletlike 'labels))
706 ;;;--------------------------------------------------------------------------
707 ;;; User-interface stuff.
709 (defun read-infix (&optional (*stream* *standard-input*) &key (delim eof))
710 "Reads an infix expression from STREAM and returns the corresponding Lisp.
711 Requires the expression to be delimited properly by DELIM (by default
717 (unless (eq *token* delim)
718 (error "expected ~S; found ~S" delim *token*)))))
720 (defun install-infix-reader
721 (&optional (start #\{) (end #\}) &key dispatch (readtable *readtable*))
722 "Installs a macro character `{ INFIX... }' for translating infix notation
723 to Lisp forms. You also want to (use-package :infix-keywords) if you do
725 (let ((delim (intern (string end) 'infix-keywords)))
726 (flet ((doit (stream &rest noise)
727 (declare (ignore noise))
728 (read-infix stream :delim delim)))
730 (set-dispatch-macro-character dispatch start #'doit readtable)
731 (set-macro-character start #'doit nil readtable))
732 (unless (or (eql start end)
735 (get-macro-character end readtable)
736 (and func (not nontermp))))
737 (set-macro-character end (lambda (noise)
738 (declare (ignore noise))
739 (error "Unexpected `~C'." end))
742 ;;;--------------------------------------------------------------------------
745 (defun test-infix (string)
746 (with-input-from-string (in string)
749 (defun test-tokenize (string &optional (get-token #'get-token))
750 (with-input-from-string (*stream* string)
751 (loop with *token* = nil
752 do (funcall get-token)
753 until (eq *token* eof)
756 (defun testrig (what run tests)
759 for (input . output) in tests
760 for result = (handler-case (funcall run input)
762 (setf error (format nil "~A" err))
764 unless (equal result output)
768 result = ~:[~S~*~;~*error ~A~]
772 (eq result 'fail) result error
775 finally (return ok)))
778 (testrig "tokenize" #'test-tokenize
783 ("&optional" . (& optional))
784 ("(4)" . (|(| 4 |)|))))
787 (testrig "infix" #'test-infix
794 ("1 + 2 + 3" . (+ 1 2 3))
796 ("x += 5" . (incf x 5))
797 ("1 << 5" . (ash 1 5))
798 ("1 >> 5" . (ash 1 (- 5)))
799 ("1 & 5" . (logand 1 5))
800 ("lambda (x, y) x + y" . (lambda (x y) (+ x y)))
801 ("lambda (x, y) (x += y, x - 1)" . (lambda (x y) (incf x y) (- x 1)))
802 ("lambda (x, &optional y = 1) x - y" .
803 (lambda (x &optional (y 1)) (- x y)))
804 ("foo(x, y)" . (foo x y))
805 ("if a == b then x + y" . (if (= a b) (+ x y)))
806 ("if a == b then x + y else x - y" . (if (= a b) (+ x y) (- x y)))
807 ("if a == b then x + y else if a == -b then x - y" .
808 (cond ((= a b) (+ x y)) ((= a (- b)) (- x y))))
809 ("let x = 1 in x ^ 4" . (let ((x 1)) (expt x 4)))
810 ("x ^ y ^ z" . (expt x (expt y z)))
811 ("a < b and not b < c or c > d" .
812 (or (and (< a b) (not (< b c))) (> c d)))
813 ("cdr(x) = nil" . (setf (cdr x) nil))
814 ("labels foo (x) x + 1, bar (x) x - 1 in foo(bar(y))".
815 (labels ((foo (x) (+ x 1)) (bar (x) (- x 1))) (foo (bar y))))
816 ("defun foo (x) x - 6" .
817 (defun foo (x) (- x 6)))
818 ("bind x = 3 in x - 2" . (let ((x 3)) (- x 2)))
819 ("bind x, y = values(1, 2),
821 docs, decls, body = parse-body(body) in complicated" .
822 (multiple-value-bind (x y) (values 1 2)
824 (multiple-value-bind (docs decls body) (parse-body body)
827 ;;;--------------------------------------------------------------------------
831 (flet ((dotrace (func)
833 (trace :function func
837 :print-all *valstk*))))
839 (dolist (s '(if \( \) \:))
840 (dolist (p '(infix prefix postfix))
841 (let ((op (get s p)))
842 (dotrace (etypecase op
844 (operator (op-func op))
846 (dolist (f '(read-infix parse-infix binop-apply unop-apply pushval popval
847 pushop flushops push-paren get-token))
850 ;;;--------------------------------------------------------------------------