ref: 64ed7785525654984f16cc4aadd25bd5e3a3331e
dir: /test/ast/match.lsp/
; tree regular expression pattern matching ; by Jeff Bezanson (def (unique lst) (if (not lst) nil (cons (car lst) (filter (lambda (x) (not (eq? x (car lst)))) (unique (cdr lst)))))) ; list of special pattern symbols that cannot be variable names (def metasymbols '(_ ...)) ; expression tree pattern matching ; matches expr against pattern p and returns an assoc list ((var . expr) (var . expr) ...) ; mapping variables to captured subexpressions, or nil if no match. ; when a match succeeds, __ is always bound to the whole matched expression. ; ; p is an expression in the following pattern language: ; ; _ match anything, not captured ; <func> any scheme function; matches if (func expr) returns T ; <var> match anything and capture as <var>. future occurrences of <var> in the pattern ; must match the same thing. ; (head <p1> <p2> etc) match an s-expr with 'head' matched literally, and the rest of the ; subpatterns matched recursively. ; (-/ <ex>) match <ex> literally ; (-^ <p>) complement of pattern <p> ; (-- <var> <p>) match <p> and capture as <var> if match succeeds ; ; regular match constructs: ; ... match any number of anything ; (-$ <p1> <p2> etc) match any of subpatterns <p1>, <p2>, etc ; (-* <p>) match any number of <p> ; (-? <p>) match 0 or 1 of <p> ; (-+ <p>) match at least 1 of <p> ; all of these can be wrapped in (-- var ) for capturing purposes ; This is NP-complete. Be careful. ; (def (match- p expr state) (cond ((symbol? p) (cond ((eq? p '_) state) (else (let ((capt (assq p state))) (if capt (and (equal? expr (cdr capt)) state) (cons (cons p expr) state)))))) ((procedure? p) (and (p expr) state)) ((cons? p) (cond ((eq? (car p) '-/) (and (equal? (cadr p) expr) state)) ((eq? (car p) '-^) (and (not (match- (cadr p) expr state)) state)) ((eq? (car p) '--) (and (match- (caddr p) expr state) (cons (cons (cadr p) expr) state))) ((eq? (car p) '-$) ; greedy alternation for toplevel pattern (match-alt (cdr p) nil (list expr) state nil 1)) (else (and (cons? expr) (equal? (car p) (car expr)) (match-seq (cdr p) (cdr expr) state (length (cdr expr))))))) (else (and (equal? p expr) state)))) ; match an alternation (def (match-alt alt prest expr state var L) (if (not alt) nil ; no alternatives left (let ((subma (match- (car alt) (car expr) state))) (or (and subma (match-seq prest (cdr expr) (if var (cons (cons var (car expr)) subma) subma) (- L 1))) (match-alt (cdr alt) prest expr state var L))))) ; match generalized kleene star (try consuming min to max) (def (match-star- p prest expr state var min max L sofar) (cond ; case 0: impossible to match ((> min max) nil) ; case 1: only allowed to match 0 subexpressions ((= max 0) (match-seq prest expr (if var (cons (cons var (reverse sofar)) state) state) L)) ; case 2: must match at least 1 ((> min 0) (and (match- p (car expr) state) (match-star- p prest (cdr expr) state var (- min 1) (- max 1) (- L 1) (cons (car expr) sofar)))) ; otherwise, must match either 0 or between 1 and max subexpressions (else (or (match-star- p prest expr state var 0 0 L sofar) (match-star- p prest expr state var 1 max L sofar))))) (def (match-star p prest expr state var min max L) (match-star- p prest expr state var min max L nil)) ; match sequences of expressions (def (match-seq p expr state L) (cond ((not state) nil) ((not p) (if (not expr) state nil)) (else (let ((subp (car p)) (var nil)) (if (and (cons? subp) (eq? (car subp) '--)) (begin (set! var (cadr subp)) (set! subp (caddr subp))) nil) (let ((head (if (cons? subp) (car subp) nil))) (cond ((eq? subp '...) (match-star '_ (cdr p) expr state var 0 L L)) ((eq? head '-*) (match-star (cadr subp) (cdr p) expr state var 0 L L)) ((eq? head '-+) (match-star (cadr subp) (cdr p) expr state var 1 L L)) ((eq? head '-?) (match-star (cadr subp) (cdr p) expr state var 0 1 L)) ((eq? head '-$) (match-alt (cdr subp) (cdr p) expr state var L)) (else (and (cons? expr) (match-seq (cdr p) (cdr expr) (match- (car p) (car expr) state) (- L 1)))))))))) (def (match p expr) (match- p expr (list (cons '__ expr)))) ; given a pattern p, return the list of capturing variables it uses (def (patargs- p) (cond ((and (symbol? p) (not (member p metasymbols))) (list p)) ((cons? p) (if (eq? (car p) '-/) nil (unique (apply append (map patargs- (cdr p)))))) (else nil))) (def (patargs p) (cons '__ (patargs- p))) ; try to transform expr using a pattern-lambda from plist ; returns the new expression, or expr if no matches (def (apply-patterns plist expr) (if (not plist) expr (if (procedure? plist) (let ((enew (plist expr))) (if (not enew) expr enew)) (let ((enew ((car plist) expr))) (if (not enew) (apply-patterns (cdr plist) expr) enew))))) ; top-down fixed-point macroexpansion. this is a typical algorithm, ; but it may leave some structure that matches a pattern unexpanded. ; the advantage is that non-terminating cases cannot arise as a result ; of expression composition. in other words, if the outer loop terminates ; on all inputs for a given set of patterns, then the whole algorithm ; terminates. pattern sets that violate this should be easier to detect, ; for example ; (pattern-lambda (/ 2 3) '(/ 3 2)), (pattern-lambda (/ 3 2) '(/ 2 3)) ; TODO: ignore quoted expressions (def (pattern-expand plist expr) (if (not (cons? expr)) expr (let ((enew (apply-patterns plist expr))) (if (eq? enew expr) ; expr didn't change; move to subexpressions (cons (car expr) (map (lambda (subex) (pattern-expand plist subex)) (cdr expr))) ; expr changed; iterate (pattern-expand plist enew)))))