chiark - git - mdw - sod/blob - src/parser/parser-test.lisp

   1 ;;; -*-lisp-*-
   2 ;;;
   3 ;;; Test parser infrastructure
   4 ;;;
   5 ;;; (c) 2009 Straylight/Edgeware
   6 ;;;
   7
   8 ;;;----- Licensing notice ---------------------------------------------------
   9 ;;;
  10 ;;; This file is part of the Sensible Object Design, an object system for C.
  11 ;;;
  12 ;;; SOD is free software; you can redistribute it and/or modify
  13 ;;; it under the terms of the GNU General Public License as published by
  14 ;;; the Free Software Foundation; either version 2 of the License, or
  15 ;;; (at your option) any later version.
  16 ;;;
  17 ;;; SOD is distributed in the hope that it will be useful,
  18 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 ;;; GNU General Public License for more details.
  21 ;;;
  22 ;;; You should have received a copy of the GNU General Public License
  23 ;;; along with SOD; if not, write to the Free Software Foundation,
  24 ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  25
  26 (cl:in-package #:sod-test)
  27
  28 (defclass test-parser (test-case)
  29   ())
  30 (add-test *sod-test-suite* (get-suite test-parser))
  31
  32 ;;;--------------------------------------------------------------------------
  33 ;;; Utilities.
  34
  35 (defmacro assert-parse
  36     ((string value winp consumedp &key (scanner (gensym "SCANNER-")))
  37      &body parser)
  38   (once-only (string value winp consumedp)
  39     (with-gensyms (my-value my-winp my-consumedp label what)
  40       `(let ((,scanner (make-string-scanner ,string)))
  41          (declare (ignorable ,scanner))
  42          (multiple-value-bind (,my-value ,my-winp ,my-consumedp)
  43              (with-parser-context
  44                  (character-scanner-context :scanner ,scanner)
  45                (parse ,@parser))
  46            (flet ((,label (,what)
  47                     (format nil "~A; parsing ~S with ~S"
  48                             ,what ,string ',@parser)))
  49              (cond (,winp
  50                     (assert-true ,my-winp (,label "winp"))
  51                     (if (eq ,value t)
  52                         (assert-not-eql ,my-value nil
  53                                         (,label "parser result"))
  54                         (assert-equal ,my-value ,value
  55                                       (,label "parser result"))))
  56                    (t
  57                     (assert-false ,my-winp (,label "winp"))
  58                     (assert-true (and (null (set-difference ,my-value ,value
  59                                                             :test #'equal))
  60                                       (null (set-difference ,value ,my-value
  61                                                             :test #'equal)))
  62                                  (,label "failure indicator"))))
  63              (if ,consumedp
  64                  (assert-true ,my-consumedp (,label "consumedp"))
  65                  (assert-false ,my-consumedp (,label "consumedp")))))))))
  66
  67 ;;;--------------------------------------------------------------------------
  68 ;;; Simple parser tests.
  69 ;;;
  70 ;;; This lot causes SBCL to warn like a mad thing.  It's too clever for us,
  71 ;;; and does half of the work at compile time!
  72
  73 (def-test-method test-simple ((test test-parser) :run nil)
  74   "Test simple atomic parsers, because we rely on them later."
  75
  76   ;; Characters match themselves.  For a character known only at run-time,
  77   ;; use (char CH).
  78   (assert-parse ("abcd" #\a t t) #\a)
  79   (let ((ch #\b))
  80     (assert-parse ("abcd" '(#\b) nil nil) (char ch)))
  81
  82   ;; A character can't match at EOF.
  83   (assert-parse ("" '(#\z) nil nil) #\z)
  84
  85   ;; All characters match :any; but EOF isn't a character.
  86   (assert-parse ("z" #\z t t) :any)
  87   (assert-parse ("" '(:any) nil nil) :any)
  88
  89   ;; The parser (satisfies PREDICATE) succeeds if the PREDICATE returns
  90   ;; true when applied to the current character.
  91   (assert-parse ("a" #\a t t) (satisfies alpha-char-p))
  92   (assert-parse ("0" '(alpha-char-p) nil nil) (satisfies alpha-char-p))
  93
  94   ;; The parser (not CHAR) matches a character other than CHAR; but it won't
  95   ;; match EOF.
  96   (assert-parse ("a" #\a t t) (not #\b))
  97   (assert-parse ("b" '((not #\b)) nil nil) (not #\b))
  98   (assert-parse ("" '((not #\b)) nil nil) (not #\b))
  99
 100   ;; But :eof matches only at EOF.
 101   (assert-parse ("" :eof t nil) :eof)
 102   (assert-parse ("abcd" '(:eof) nil nil) :eof)
 103
 104   ;; Strings match themselves without consuming if they fail.
 105   (assert-parse ("abcd" "ab" t t) "ab")
 106   (assert-parse ("abcd" '("cd") nil nil) "cd"))
 107
 108 (def-test-method test-sequence ((test test-parser) :run nil)
 109
 110   ;; An empty sequence always succeeds and never consumes.  And provokes
 111   ;; warnings: don't do this.
 112   (assert-parse ("" :win t nil) (seq () :win))
 113   (assert-parse ("abcd" :win t nil) (seq () :win))
 114
 115   ;; A `seq' matches the individual parsers in order, and binds their results
 116   ;; to variables -- if given.  The result is the value of the body.  If any
 117   ;; parser fails having consumed input, then input stays consumed.  There's
 118   ;; no backtracking.
 119   (assert-parse ("abcd" '(#\a . #\c) t t)
 120     (seq ((foo #\a) #\b (bar #\c)) (cons foo bar)))
 121   (assert-parse ("abcd" '(#\c) nil t)
 122     (seq ((foo #\a) (bar #\c)) (cons foo bar)))
 123   (assert-parse ("abcd" '(#\c) nil nil)
 124     (seq ((bar #\c) (foo #\a)) (cons foo bar))))
 125
 126 (def-test-method test-repeat ((test test-parser) :run nil)
 127
 128   ;; A `many' matches a bunch of similar things in a row.  You can compute a
 129   ;; result using `do'-like accumulation.
 130   (assert-parse ("aaaab" 4 t t) (many (acc 0 (1+ acc)) #\a))
 131
 132   ;; The default minimum is zero; so the parser always succeeds.
 133   (assert-parse ("aaaab" 0 t nil) (many (acc 0 (1+ acc)) #\b))
 134
 135   ;; You can provide an explicit minimum.  Then the match might fail.
 136   (assert-parse ("aabb" 2 t t) (many (acc 0 (1+ acc) :min 2) #\a))
 137   (assert-parse ("aabb" '(#\a) nil t) (many (acc 0 (1+ acc) :min 3) #\a))
 138
 139   ;; You can also provide an explicit maximum.  This will cause the parser to
 140   ;; stop searching, but it can't make it fail.
 141   (assert-parse ("aaaab" 3 t t) (many (acc 0 (1+ acc) :max 3) #\a))
 142
 143   ;; You can provide both a maximum and a minimum at the same time.  If
 144   ;; they're consistent, you won't be surprised.  If they aren't, then the
 145   ;; maximum wins and the minimum is simply ignored (currently).
 146   (assert-parse ("aaaaab" 4 t t)
 147     (many (acc 0 (1+ acc) :min 3 :max 4) #\a))
 148   (assert-parse ("aabbbb" '(#\a) nil t)
 149     (many (acc 0 (1+ acc) :min 3 :max 4) #\a))
 150   (assert-parse ("aaabbb" 3 t t)
 151     (many (acc 0 (1+ acc) :min 3 :max 3) #\a))
 152   (assert-parse ("aaabbb" 3 t t)
 153     (many (acc 0 (1+ acc) :min 17 :max 3) #\a))
 154
 155   ;; You can provide a separator.  The `many' parser will look for the
 156   ;; separator between each of the main items, but will ignore the results.
 157   (assert-parse ("a,a,abc" 3 t t) (many (acc 0 (1+ acc)) #\a #\,))
 158   (assert-parse ("a,a,abc" 2 t t) (many (acc 0 (1+ acc) :max 2) #\a #\,))
 159
 160   ;; If `many' sees a separator then by default it commits to finding another
 161   ;; item; so this can cause a parse to fail.
 162   (assert-parse ("a,a,bc" '(#\a) nil t) (many (acc 0 (1+ acc)) #\a #\,))
 163   (assert-parse ("abc" 1 t t) (many (acc 0 (1+ acc)) #\a #\,))
 164
 165   ;; If you specify a separator then the default minimum number of
 166   ;; repetitions becomes 1 rather than 0.  But you can override this
 167   ;; explicitly.
 168   (assert-parse ("bc" '(#\a) nil nil) (many (acc 0 (1+ acc)) #\a #\,))
 169   (assert-parse ("bc" 0 t nil) (many (acc 0 (1+ acc) :min 0) #\a #\,))
 170
 171   ;; The parser will fail looking for a separator if there aren't enough
 172   ;; items.
 173   (assert-parse ("a,abc" '(#\,) nil t)
 174     (many (acc 0 (1+ acc) :min 3) #\a #\,))
 175
 176   ;; You can override the commit-on-separator behaviour by using :commit.
 177   ;; This makes a trailing separator legal (but optional), so it also affects
 178   ;; the behaviour regarding maximum and minimum repetitions.  (Commitment is
 179   ;; irrelevant if you don't have a separator.)
 180   (assert-parse ("a,a,bc" 2 t t)
 181     (many (acc 0 (1+ acc) :commitp nil) #\a #\,))
 182   (assert-parse ("a,a,abc" 3 t t)
 183     (many (acc 0 (1+ acc) :commitp nil) #\a #\,))
 184   (assert-parse ("a,a,a,bc" 3 t t)
 185     (seq ((n (many (acc 0 (1+ acc) :max 3 :commitp t) #\a #\,))
 186           #\,)
 187       n))
 188   (assert-parse ("a,a,a,bc" 3 t t)
 189     (seq ((n (many (acc 0 (1+ acc) :max 3 :commitp nil) #\a #\,))
 190           #\b)
 191       n))
 192   (assert-parse ("a,a,bc" '(#\a) nil t)
 193     (many (acc 0 (1+ acc) :min 3 :commitp nil) #\a #\,))
 194
 195   ;; The `many' parser won't backtrack.  The `many' eats as many `a's as
 196   ;; possible; asking for another one is sure to fail.
 197   (assert-parse ("aaaabc" '(#\a) nil t) (and (skip-many () #\a) #\a)))
 198
 199 (def-test-method test-repeat-hairy ((test test-parser) :run nil)
 200   ;; The `many' expander is very hairy and does magical things if it notices
 201   ;; that some of its arguments are constants.  So here we test a number of
 202   ;; the above things again, using variables so that it has to produce code
 203   ;; which makes decisions at run-time.  (I've no doubt that SBCL will issue
 204   ;; an infinite number of notes explaining how clever it is and how it can
 205   ;; do it all at compile-time anyway.  Of course, suppressing these notes is
 206   ;; the main reason `many' is so hairy anyway.)
 207
 208   (let ((zero 0) (two 2) (three 3) (yes t) (no nil))
 209
 210     ;; Minima.
 211     (assert-parse ("aaaab" 4 t t) (many (acc 0 (1+ acc) :min zero) #\a))
 212     (assert-parse ("aaaab" 0 t nil) (many (acc 0 (1+ acc) :min zero) #\b))
 213     (assert-parse ("aabb" 2 t t) (many (acc 0 (1+ acc) :min two) #\a))
 214     (assert-parse ("aabb" '(#\a) nil t)
 215       (many (acc 0 (1+ acc) :min three) #\a))
 216
 217     ;; Maxima.
 218     (assert-parse ("aaaab" 4 t t) (many (acc 0 (1+ acc) :max no) #\a))
 219     (assert-parse ("aaaab" 3 t t) (many (acc 0 (1+ acc) :max three) #\a))
 220
 221     ;; And now together with separators and commitment.  Oh, my.
 222     (assert-parse ("a,a,a,bc" 3 t t)
 223       (many (acc 0 (1+ acc) :commitp no) #\a #\,))
 224     (assert-parse ("a,a,a,bc" '(#\a) nil t)
 225       (many (acc 0 (1+ acc) :commitp yes) #\a #\,))
 226     (assert-parse ("a,a,bc" '(#\a) nil t)
 227       (many (acc 0 (1+ acc) :min three :commitp yes) #\a #\,))
 228     (assert-parse ("a,a,bc" '(#\a) nil t)
 229       (many (acc 0 (1+ acc) :min 3 :commitp yes) #\a #\,))
 230     (assert-parse ("a,a,bc" '(#\a) nil t)
 231       (many (acc 0 (1+ acc) :min three :commitp t) #\a #\,))
 232     (assert-parse ("a,a,a,bc" 3 t t)
 233       (seq ((n (many (acc 0 (1+ acc) :max three :commitp no) #\a #\,)) #\b)
 234         n))
 235     (assert-parse ("a,a,a,bc" 3 t t)
 236       (seq ((n (many (acc 0 (1+ acc) :max three :commitp yes) #\a #\,)) #\,)
 237         n))
 238     (assert-parse ("a,a,a,bc" 3 t t)
 239       (seq ((n (many (acc 0 (1+ acc) :max 3 :commitp no) #\a #\,)) #\b)
 240         n))
 241     (assert-parse ("a,a,a,bc" 3 t t)
 242       (seq ((n (many (acc 0 (1+ acc) :max 3 :commitp yes) #\a #\,)) #\,)
 243         n))
 244     (assert-parse ("a,a,a,bc" 3 t t)
 245       (seq ((n (many (acc 0 (1+ acc) :max three :commitp nil) #\a #\,)) #\b)
 246         n))
 247     (assert-parse ("a,a,a,bc" 3 t t)
 248       (seq ((n (many (acc 0 (1+ acc) :max three :commitp t) #\a #\,)) #\,)
 249         n))))
 250
 251 (def-test-method test-alternate ((test test-parser) :run nil)
 252
 253   ;; An `or' matches the first parser that either succeeds or fails having
 254   ;; consumed input.
 255   (assert-parse ("abcd" #\a t t) (or #\a #\b))
 256   (assert-parse ("abcd" #\a t t) (or #\b #\a))
 257   (assert-parse ("abcd" '(#\b #\c) nil nil) (or #\b #\c))
 258
 259   ;; Strings don't consume if they fail.
 260   (assert-parse ("abcd" "ab" t t) (or "cd" "ab"))
 261   (assert-parse ("abcd" "ab" t t) (or "ad" "ab"))
 262   (assert-parse ("abcd" '("ad" "ac") nil nil) (or "ad" "ac"))
 263
 264   ;; But `seq' will if some component consumes.
 265   (assert-parse ("abcd" '(#\d) nil t) (or (and #\a #\d) "ab"))
 266   (assert-parse ("abcd" "ab" t t) (or (and #\c #\d) "ab"))
 267
 268   ;; We can tame this using `peek' which rewinds the source if its argument
 269   ;; fails, so as to hide consumption of input.
 270   (assert-parse ("abcd" "ab" t t) (or (peek (and #\a #\d)) "ab"))
 271   (assert-parse ("abcd" '(#\a #\b "cd") t t)
 272     (seq ((foo (peek (seq ((foo #\a) (bar #\b)) (list foo bar))))
 273           (bar "cd"))
 274       (append foo (list bar))))
 275
 276   ;; Failure indicators are union'd if they all fail.
 277   (assert-parse ("abcd" '(#\q #\x #\z) nil nil)
 278     (or #\q (peek (and #\a (or #\x #\q))) #\z))
 279
 280   ;; But if any of them consumed input then you only get the indicators from
 281   ;; the consuming branch, because we committed to it when we consumed the
 282   ;; input.
 283   (assert-parse ("abcd" '(#\x #\q) nil t)
 284     (or #\q #\z (and #\a (or #\q #\x)))))
 285
 286 ;;;--------------------------------------------------------------------------
 287 ;;; Some tests with a simple recursive parser.
 288
 289 (defstruct (node
 290              (:predicate nodep)
 291              (:constructor make-node (left data right)))
 292   "Structure type for a simple binary tree."
 293   left data right)
 294
 295 (defun parse-tree (scanner)
 296   "Parse a textual representation into a simple binary tree.
 297
 298    The syntax is simple:
 299
 300         TREE ::= EMPTY | `(' TREE CHAR TREE `)'
 301
 302    There's an ambiguity in this syntax, at least if you have limited
 303    lookahead: suppose you've just parsed the opening `(' of a TREE, and you
 304    see another `(' -- is it the start of the non-empty left sub-TREE, or is
 305    it the CHAR following an empty left sub-TREE?  We opt for the first choice
 306    always."
 307
 308   ;; This came from another project, although it isn't actually used there.
 309   ;; It exposed the weakness in an earlier design which prompted the addition
 310   ;; of the CONSUMEDP flags to the parser protocol.
 311
 312   (with-parser-context (character-scanner-context :scanner scanner)
 313     (labels ((tree ()
 314                (parse (or (seq (#\(
 315                                 (left (tree))
 316                                 (data :any)
 317                                 (right (tree))
 318                                 #\))
 319                             (make-node left data right))
 320                           (values nil t nil)))))
 321       (parse (seq ((tree (tree)) :eof)
 322                tree)))))
 323
 324 (defun parse-tree-lookahead (scanner)
 325   "Parse a textual representation into a simple binary tree.
 326
 327    The syntax is simple, and, indeed, the grammar's the same as for
 328    `sod-parse-tree':
 329
 330         TREE ::= EMPTY | `(' TREE CHAR TREE `)'
 331
 332    But the rules are different.  Instead of resolving the `ambiguity' between
 333    TREE and CHAR when we find another `(' after the opening `(' of a TREE
 334    deterministically in favour of TREE as `parse-tree' does, we try that
 335    first, and backtrack if necessary."
 336
 337   ;; Bison can do this, but you have to persuade it to use the scary GLR
 338   ;; parser algorithm
 339
 340   (with-parser-context (character-scanner-context :scanner scanner)
 341     (labels ((tree ()
 342                (parse (or (peek (seq (#\(
 343                                       (left (tree))
 344                                       (data :any)
 345                                       (right (tree))
 346                                       #\))
 347                                   (make-node left data right)))
 348                           (values nil t nil)))))
 349       (parse (seq ((tree (tree)) :eof)
 350                tree)))))
 351
 352 (def-test-method test-simple-tree-parser ((test test-parser) :run nil)
 353   (assert-parse ("" nil t nil :scanner sc) (parse-tree sc))
 354   (assert-parse ("((a)b((c)d(e)))" t t t :scanner sc) (parse-tree sc))
 355   (assert-parse ("((a)b((c)d(e)))z" '(:eof) nil t :scanner sc)
 356     (parse-tree sc))
 357   (assert-parse ("((a)b((c)d(e))" '(#\)) nil t :scanner sc) (parse-tree sc))
 358   (assert-parse ("(([)*(]))" t t t :scanner sc) (parse-tree sc))
 359   (assert-parse ("((()-()))" '(#\)) nil t :scanner sc) (parse-tree sc))
 360   (assert-parse ("((()-()))" t t t :scanner sc) (parse-tree-lookahead sc)))
 361
 362 ;;;--------------------------------------------------------------------------
 363 ;;; Test expression parser.
 364
 365 (eval-when (:compile-toplevel :load-toplevel :execute)
 366   (defparse token (:context (context character-parser-context) parser)
 367     (with-gensyms (value)
 368       (expand-parser-spec context
 369                           `(seq ((,value ,parser) :whitespace) ,value)))))
 370
 371 (let ((add (binop "+" (x y 5) `(+ ,x ,y)))
 372       (sub (binop "-" (x y 5) `(- ,x ,y)))
 373       (mul (binop "*" (x y 7) `(* ,x ,y)))
 374       (div (binop "/" (x y 7) `(/ ,x ,y)))
 375       (eq (binop "=" (x y 3 :assoc nil) `(= ,x ,y)))
 376       (ne (binop "/=" (x y 3 :assoc nil) `(/= ,x ,y)))
 377       (lt (binop "<" (x y 3 :assoc nil) `(< ,x ,y)))
 378       (gt (binop ">" (x y 3 :assoc nil) `(> ,x ,y)))
 379       (and (binop "&" (x y 2) `(and ,x ,y)))
 380       (or (binop "|" (x y 1) `(or ,x ,y)))
 381       (expt (binop "**" (x y 8 :assoc :right) `(** ,x ,y)))
 382       (neg (preop "-" (x 9) `(- ,x)))
 383       (not (preop "!" (x 2) `(not ,x)))
 384       (fact (postop "!" (x 10) `(! ,x)))
 385       (lp (lparen #\))) (rp (rparen #\)))
 386       (lb (lparen #\])) (rb (rparen #\])))
 387   (defun test-parse-expr (string)
 388     (with-parser-context (string-parser :string string)
 389       (parse (seq (:whitespace
 390                    (value (expr (:nestedp nestedp)
 391                             (token (many (a 0 (+ (* a 10) it) :min 1)
 392                                      (filter digit-char-p)))
 393                             (token (or (seq ("**") expt)
 394                                        (seq ("/=") ne)
 395                                        (seq (#\+) add)
 396                                        (seq (#\-) sub)
 397                                        (seq (#\*) mul)
 398                                        (seq (#\/) div)
 399                                        (seq (#\=) eq)
 400                                        (seq (#\<) lt)
 401                                        (seq (#\>) gt)
 402                                        (seq (#\&) and)
 403                                        (seq (#\|) or)))
 404                             (token (or (seq (#\() lp)
 405                                        (seq (#\[) lb)
 406                                        (seq (#\-) neg)
 407                                        (seq (#\!) not)))
 408                             (token (or (seq (#\!) fact)
 409                                        (when nestedp
 410                                          (or (seq (#\)) rp)
 411                                              (seq (#\]) rb)))))))
 412                    (next (or :any (t :eof))))
 413                (cons value next))))))
 414
 415 (defun assert-expr-parse (string value winp consumedp)
 416   (multiple-value-bind (v w c) (test-parse-expr string)
 417     (flet ((message (what)
 418              (format nil "expression ~S; ~A" string what)))
 419       (cond (winp (assert-true w (message "winp"))
 420                   (assert-equal v value (message "value")))
 421             (t (assert-false w (message "winp"))
 422                (assert-equal v value (message "expected"))))
 423       (assert-eql c consumedp (message "consumedp")))))
 424
 425 (def-test-method test-expression-parser ((test test-parser) :run nil)
 426   (assert-expr-parse "1 + 2 + 3" '((+ (+ 1 2) 3) . :eof) t t)
 427   (assert-expr-parse "1 + 2 * 3" '((+ 1 (* 2 3)) . :eof) t t)
 428   (assert-expr-parse "1 * 2 + 3" '((+ (* 1 2) 3) . :eof) t t)
 429   (assert-expr-parse "(1 + 2) * 3" '((* (+ 1 2) 3) . :eof) t t)
 430   (assert-expr-parse "1 ** 2 ** 3" '((** 1 (** 2 3)) . :eof) t t)
 431   (assert-expr-parse "1 + 2) * 3" '((+ 1 2) . #\)) t t)
 432   (assert-expr-parse "1 + 2 * 3" '((+ 1 (* 2 3)) . :eof) t t)
 433   (assert-expr-parse "! 1 + 2 = 3 | 6 - 3 /= 12/6"
 434                      '((or (not (= (+ 1 2) 3))
 435                            (/= (- 6 3) (/ 12 6)))
 436                        . :eof)
 437                      t t)
 438   (assert-expr-parse "! 1 > 2 & ! 4 < 6 | 3 < 4 & 9 > 10"
 439                      '((or (and (not (> 1 2)) (not (< 4 6)))
 440                            (and (< 3 4) (> 9 10)))
 441                        . :eof)
 442                      t t)
 443
 444   (assert-condition 'simple-error (test-parse-expr "(1 + 2"))
 445   (assert-condition 'simple-error (test-parse-expr "(1 + 2]"))
 446   (assert-condition 'simple-error (test-parse-expr "1 < 2 < 3")))
 447
 448 ;;;----- That's all, folks --------------------------------------------------