;; Handles creating and using a self-balancing binary tree (specifically ;; an AVL tree). ;; ;; The only externally visible function is NEW-TREE. ;; ;; Create a new tree, with comparator function CMP. This returns a procedure ;; which gives access to the tree via the following commands. ;; ;; The CMP procedure is a two-argument procedure, which is applied to the ;; objects added to the tree, and which returns negative, zero ;; or positive depending on whether the first argument is less than, ;; equal to, or greater than the second. ;; ;; (define tree (new-tree cmp)) : Create a new TREE procedure. ;; ;; (tree 'add data) ;; Add the given data to the tree. The data must be an acceptable ;; argument to the CMP function. ;; ;; (tree 'get key) ;; Retrieve an item of data matching the key (that is, ;; an item D such that (CMP KEY D) evaluates to zero. If there is ;; no such item, it returns #f. The KEY is always applied ;; as the first argument of CMP. ;; ;; (tree 'drill visitor-down ?visitor-up) ;; Drill down into the tree, visiting each node on the way down ;; and up. ;; ;; Prototypes: ;; VISITOR-DOWN data -> integer ;; VISITOR-UP data integer boolean -> integer ;; ;; Apply VISITOR-DOWN to the data of the current node. If it ;; returns 0, return the result of applying VISITOR-UP to ;; (current-node #f #f). ;; ;; If it returns negative/positive, return the result of ;; applying VISITOR-UP to the data of the current node, ;; the result of applying this algorithm to the left/right child, ;; and a boolean which is true if the child in question was from ;; the right tree. ;; ;; If we run out of tree (that is, if NODE is #f), then call ;; (VISITOR-UP #f #f #f). ;; ;; If VISITOR-UP is not present, then a default is used, which ;; simply returns the data of the node on which VISITOR-DOWN ;; returned 0, or #f if there was no such node. ;; ;; (tree 'restore filename) ;; Restore the state of the tree from the given file. ;; ;; (tree 'save filename) ;; Save the state of the tree to the given file. ;; ;; (tree 'stats) ;; Return a list of pairs giving statistics of the tree. ;; ;; (tree 'sum visitor) ;; Prototype: VISITOR data any any -> any ;; ;; 'Traverse' the tree by, for each node, applying VISITOR ;; to the node's data, the result of applying this procedure ;; to the node's left child, and to its right child, returning ;; the result. ;; ;; (tree 'traverse visitor) ;; Visit each node in the tree in order, applying the one-arg ;; procedure VISITOR to the data of each node. ;; ;; Copyright Norman Gray 2006, ;; Released under the terms of the GNU General Public Licence ;; ;; $Revision$ (define (new-tree cmp) (let ((top #f) (tree-size 0) ;number of nodes in the tree (<=> cmp)) (lambda args (define dispatch (let ((op (car args))) (cond ((eq? op 'add) ;add data to the tree (lambda (data) (if top (begin (add-node! top (new-node data) <=>) (let ((new-top (balance-tree top))) (if (not (eq? new-top top)) (set! top new-top)))) (set! top (new-node data))) (set! tree-size (+ tree-size 1)))) ((eq? op 'drill) ;drill down (lambda (visitor-down . visitor-up) (if (null? visitor-up) (drill-into-tree top visitor-down (let ((have-value? #f) (value #f)) (lambda (data child-result right?) (if (not have-value?) (begin (set! value data) (set! have-value? #t))) value))) (drill-into-tree top visitor-down (car visitor-up))))) ((eq? op 'get) ;retrieve data (lambda (query) (find-in-tree top query cmp))) ((eq? op 'restore) ;restore state from file (lambda (filename data-deserialiser) (call-with-input-file filename (lambda (port) (call-with-values (lambda () (tree-deserialise data-deserialiser <=> port)) (lambda (tree n-values) (set! top tree) (set! tree-size n-values))))) #t)) ((eq? op 'save) ;save state to file (lambda (filename data-serialiser) (call-with-output-file filename (lambda (port) (tree-serialise top data-serialiser port))))) ((eq? op 'stats) ;return statistics (lambda () `((size . ,tree-size)))) ((eq? op 'sum) ;'sum' the tree (lambda (visitor) (sum-tree top visitor))) ((eq? op 'traverse) ;traverse the tree (lambda (visitor) (traverse-tree top visitor))) ((eq? op 'Xdisplay) ;debugging (lambda () (format #t "top=~a~%" (tree->string top)))) ((eq? op 'Xtest) ;regression test run-tests) (else (error (format #f "Unrecognised command ~a in NEW-TREE" (car args))))))) (apply dispatch (cdr args))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Testing support (define run-tests (let ((testlist '())) (define (all-true l) ;return true if all args true (cond ((null? l) #t) ((not (car l)) #f) (else (all-true (cdr l))))) (lambda args (cond ((null? args) ;run tests, returning #t if all pass (all-true (map (lambda (testcase) (let ((name (car testcase)) (f (cadr testcase)) (exp (caddr testcase))) (let* ((actual (f)) (res (string=? actual exp))) (if (not res) (format #t "Test: ~a~%Expected ~a~%Got ~a~%" name exp actual)) res))) testlist))) ((= (length args) 3) (set! testlist (cons args testlist))) (else (error "Wrong number of args to run-tests")))))) ;; (make-test TEST-BODY EXPECTED-RESULT) ;; TEST-BODY is the body of a no-argument procedure, which returns a string. ;; This result is tested against the EXPECTED-RESULT. (define-syntax make-test (syntax-rules () ((_ args ...) ;; disable make-test. This has to expand to something (otherwise ;; that's a syntax error, and it has to expand to a definition, ;; otherwise we can't compile this file inside the module in ;; sisc/binary-tree.scm. (define _null #f)) ;disabled ;; ((_ test-body ok-result) ;; (run-tests "anon" (lambda () test-body) ok-result)) ;; ((_ name test-body ok-result) ;; (run-tests name (lambda () test-body) ok-result)) )) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Implementation ;; ;; See http://en.wikipedia.org/wiki/AVL_tree (define (new-node data) (cons (cons (cons #f #f) #f) data)) (define (data node) (cdr node)) (define (set-data! node data) (set-cdr! node data)) (define (left-child node) (caaar node)) ;; Set the left-hand child of the given NODE to NEW-NODE (define (set-left-child! node new-node) (set-car! (caar node) new-node)) (define (right-child node) (cdaar node)) ;; See SET-LEFT-CHILD! (define (set-right-child! node new-node) (set-cdr! (caar node) new-node)) ;; Return the height of the given NODE (number of levels of tree, counting ;; from 1 for the root node -- an empty node has height 1). ;; If NODE is #f, return 0. The calculation is cached for next time. (define (height node) (if node (begin (if (not (cdar node)) (set-cdr! (car node) (+ 1 (max (if (left-child node) (height (left-child node)) 0) (if (right-child node) (height (right-child node)) 0))))) (cdar node)) 0)) ;; Clear the height cache, so that the value will be calculated afresh ;; next time HEIGHT is called. (define (clear-height! node) (if node (set-cdr! (car node) #f))) ;; Add the NEW-NODE to the given NODE, with ordering relation <=>. ;; Do any rotations necessary to keep the tree balanced. ;; If there is already a node which is equal to the NEW-NODE (in the ;; sense that procedure <=> returns 0), then silently replace its data ;; with the new data. (define (add-node! node new-node <=>) ;; (format #t "Adding node [~a] to node [~a], height=~a~%" ;; (data new-node) (data node) (height node)) (clear-height! node) ;invalidate height cache (let ((cmp (<=> (data new-node) (data node)))) (cond ((= cmp 0) (set-data! node (data new-node))) ((< cmp 0) (if (left-child node) (begin (add-node! (left-child node) new-node <=>) (let ((new-l (balance-tree (left-child node)))) (if (not (eq? new-l (left-child node))) (set-left-child! node new-l)))) (set-left-child! node new-node))) (else (if (right-child node) (begin (add-node! (right-child node) new-node <=>) (let ((new-r (balance-tree (right-child node)))) (if (not (eq? new-r (right-child node))) (set-right-child! node new-r)))) (set-right-child! node new-node)))))) (make-test "uneven-tree" (tree->string (make-test-tree char-cmp #\a #\b #\c #\d #\e)) "(.a5.(.b4.(.c3.(.d2.(.e1.)))))") (make-test "growing-tree" ; add to tree, causing balancing (let ((t (make-test-tree char-cmp #\d #\b))) (add-node! t (new-node #\a) char-cmp) ;requires rebalancing (add-node! t (new-node #\e) char-cmp) ;doesn't (add-node! t (new-node #\f) char-cmp) ;again (tree->string t)) "(((.a1.).b2.).d3.(.e2.(.f1.)))") ;; Test balancing a tree (here rooted at d), where the subtree (b) has not ;; had to be rebalanced. The following will fail if the caching/invalidating ;; of (height) is not done correctly (make-test "growing-arm" ;add to tree, along one arm (let ((t (make-test-tree char-cmp #\d #\b #\e #\a #\c))) ;balanced (height t) ;fill cache (add-node! t (new-node #\1) char-cmp) ;unbalanced (tree->string (balance-tree t))) "(((.11.).a2.).b3.((.c1.).d2.(.e1.)))") ;; (balance-tree N): balance the tree rooted at the node N, ;; returning either N or the new root node. It's OK for N to be #f. ;; If N is a number, then the function is disabled if the number is zero, ;; and enabled otherwise (for testing). When disabled, the function returns ;; its argument always. (define balance-tree (let ((disabled? #f)) (lambda (n) (cond ((number? n) (set! disabled? (= n 0))) (disabled? n) ((not n) #f) (else (let* ((l (left-child n)) (r (right-child n)) (hl (if l (height l) 0)) (hr (if r (height r) 0))) ;; (format #t "<[~a]~a|~a|~a[~a]>~%" ;; hl (if l (data l) ".") (data n) ;; (if r (data r) ".") hr) (cond ((and l (<= (- hr hl) -2)) (rotate-right n)) ((and r (>= (- hr hl) +2)) (rotate-left n)) (else n)))))))) (make-test "balance1" (tree->string (balance-tree (make-test-tree char-cmp #\c #\b #\a))) "((.a1.).b2.(.c1.))") (make-test "balance2" (tree->string (balance-tree (make-test-tree char-cmp #\a #\b #\c))) "((.a1.).b2.(.c1.))") (make-test "balance3" (tree->string (balance-tree (make-test-tree char-cmp #\d #\b #\a #\c))) "((.a1.).b3.((.c1.).d2.))") ;; The following test doesn't work, because the 'c'-L->'a'-R->'b' ;; at the left-hand edge does not end up with the 'b' being rotated ;; up as would be necessary to balance this. It doesn't really matter for now. ;; (make-test "balance4" ;; (let ((t (new-node #\d))) ;; (for-each (lambda (x) (add-node! t (new-node x) char-cmp)) ;; '(#\c #\e #\f #\g #\a #\b)) ;; (tree->string t)) ;; "(((.a1.).b2.(.c1.)).d4.((.e1.).f2.(.g1.)))") ;; Do a right-rotation of a node N, returning the new top node ;; (the old left child). ;; See http://en.wikipedia.org/wiki/Tree_rotation (define (rotate-right d) (if (not (left-child d)) (error "Can't rotate-right a tree with no left child")) (let ((b (left-child d)) (c (right-child (left-child d)))) ;ok for this to be #f (set-right-child! b d) (set-left-child! d c) (for-each clear-height! (list b c d)) b)) (make-test "rotate-right-degenerate" (tree->string (rotate-right (make-test-tree char-cmp #\d #\b))) ;degenerate case "(.b2.(.d1.))") (make-test "rotate-right" (tree->string (rotate-right (make-test-tree char-cmp #\d #\b #\a #\c))) "((.a1.).b3.((.c1.).d2.))") ;; As for ROTATE-RIGHT (define (rotate-left b) (if (not (right-child b)) (error "Can't rotate-left a tree with no right child")) (let ((d (right-child b)) (c (left-child (right-child b)))) (set-left-child! d b) (set-right-child! b c) (for-each clear-height! (list b c d)) d)) (make-test "rotate-left-degenerate" (tree->string (rotate-left (make-test-tree char-cmp #\b #\d))) ;degenerate case "((.b1.).d2.)") (make-test "rotate-left" (tree->string (rotate-left (make-test-tree char-cmp #\b #\d #\c #\e))) "((.b2.(.c1.)).d3.(.e1.))") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Other tree operations ;; Find a given node in the tree, and return its data, or #f if none ;; can be found. Always apply QUERY as the first argument of ;; procedure <=> (define (find-in-tree node query <=>) (if node (let ((cmp (<=> query (data node)))) (if (= cmp 0) (data node) (if (< cmp 0) (if (left-child node) (find-in-tree (left-child node) query <=>) #f) (if (right-child node) (find-in-tree (right-child node) query <=>) #f)))) #f)) (make-test "find" (let ((cmp (lambda (x1 x2) (char-cmp (car x1) (car x2)))) (t (new-node '(#\d . 4)))) (for-each (lambda (p) (add-node! t (new-node p) cmp)) '((#\b . 2) (#\f . 6) (#\a . 1) (#\c . 3) (#\e . 5) (#\g . 7))) (format #f "~a/~a/~a/~a" (cdr (find-in-tree t '(#\a . #f) cmp)) (cdr (find-in-tree t '(#\e . #f) cmp)) (cdr (find-in-tree t '(#\d . 9999) cmp)) (find-in-tree t '(#\z . 99) cmp))) "1/5/4/#f") ;; Drill down into the tree, visiting each node on the way down and up. ;; ;; Apply VISITOR-DOWN to the data of the current node. If it returns ;; 0, return the result of applying VISITOR-UP to (this-node #f ???). ;; ;; If it returns negative/positive, return the result of applying ;; VISITOR-UP to this node, and the result of applying DRILL-INTO-TREE ;; to the left/right child, and a boolean which is true if the child ;; in question was from the right tree. If we run out of tree (that ;; is, if NODE is #f), then call VISITOR-UP on (#f #f ??). (define (drill-into-tree node visitor-down visitor-up) (if node (let ((lr (visitor-down (data node)))) (if (= lr 0) (visitor-up (data node) #f #f) (visitor-up (data node) (drill-into-tree (if (< lr 0) (left-child node) (right-child node)) visitor-down visitor-up) (> lr 0)))) (visitor-up #f #f #f))) ;; Traverse the tree rooted at NODE (which may be #f), applying the ;; one-argument procedure VISITOR to the data of each node. (define (traverse-tree node visitor) (if node (begin (if (left-child node) (traverse-tree (left-child node) visitor)) (visitor (data node)) (if (right-child node) (traverse-tree (right-child node) visitor))) #f)) ;; 'Traverse' the tree by applying VISITOR, which is a three-argument ;; procedure, to NODE's data plus the result of applying VISITOR to ;; each of NODE's children or #f, returning the result. (define (sum-tree node visitor) (if node (visitor (data node) (sum-tree (left-child node) visitor) (sum-tree (right-child node) visitor)) #f)) ;; Serialise the tree rooted at NODE to the given PORT, using the procedure ;; DATA-SERIALISER turn the node data into a writable form. (define (tree-serialise node data-serialiser port) (define (filter-true l) (if (null? l) '() (if (car l) (cons (car l) (filter-true (cdr l))) (filter-true (cdr l))))) (let loop ((level (list node))) (if (not (null? level)) (begin (for-each (lambda (n) (write (data-serialiser (data n)) port) (newline port)) level) (loop (filter-true (apply append (map (lambda (parent) (list (left-child parent) (right-child parent))) level)))))))) ;; Read from the given PORT a sequence of objects as written by TREE-SERIALISE, ;; Procedure DATA-DESERIALISER turns the printed form into node data. ;; Return multiple values: tree, number-of-nodes (define (tree-deserialise data-deserialiser <=> port) (let ((tree #f)) (let loop ((s (read port)) (n 0)) (if (eof-object? s) (values tree n) (begin (if tree (add-node! tree (new-node (data-deserialiser s)) <=>) (set! tree (new-node (data-deserialiser s)))) (loop (read port) (+ n 1))))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Debugging and testing (define (debug-tree node) (if (left-child node) (debug-tree (left-child node))) (format #t "~a [~a]~s ~a~%" (if (left-child node) "/" ".") (height node) (data node) (if (right-child node) "\\" ".")) (if (right-child node) (debug-tree (right-child node)))) (define (tree->string n) (if n (format #f "(~a.~a~a.~a)" (if (left-child n) (tree->string (left-child n)) "") (data n) (height n) (if (right-child n) (tree->string (right-child n)) "")) "EMPTY")) (define (debug x . rest) (if (null? rest) (format #t "(DEBUG:~s)~%" x) (format #t "(DEBUG~s:~s)~%" rest x)) x) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Generate testing trees ;; Make a new tree from the elements of the SEQ, in order. Return the new tree. ;; We disable balance-tree while constructing the tree, so that we can make ;; unbalanced trees. (define (make-test-tree cmp . seq) (let ((top (new-node (car seq)))) (balance-tree 0) (let loop ((l (cdr seq))) (if (null? l) (begin (balance-tree 1) top) (begin (add-node! top (new-node (car l)) cmp) (loop (cdr l))))))) ;; Test function -- return -1,0,+1 if c1 <,=,> c2 (define (char-cmp c1 c2) (cond ((char=? c1 c2) 0) ((char