1#!/usr/bin/env python 2#coding:utf-8 3# Author: Mozman 4# Purpose: treemixin provides top level functions for binary trees 5# Created: 03.05.2010 6# Copyright (c) 2010-2013 by Manfred Moitzi 7# License: MIT License 8 9from __future__ import absolute_import 10 11from .walker import Walker 12from .treeslice import TreeSlice 13 14class TreeMixin(object): 15 """ 16 Abstract-Base-Class for the pure Python Trees: BinaryTree, AVLTree and RBTree 17 Mixin-Class for the Cython based Trees: FastBinaryTree, FastAVLTree, FastRBTree 18 19 The TreeMixin Class 20 =================== 21 22 T has to implement following properties 23 --------------------------------------- 24 25 count -- get node count 26 27 T has to implement following methods 28 ------------------------------------ 29 30 get_walker(...) 31 get a tree walker object, provides methods to traverse the tree see walker.py 32 33 insert(...) 34 insert(key, value) <==> T[key] = value, insert key into T 35 36 remove(...) 37 remove(key) <==> del T[key], remove key from T 38 39 clear(...) 40 T.clear() -> None. Remove all items from T. 41 42 Methods defined here 43 -------------------- 44 45 * __contains__(k) -> True if T has a key k, else False, O(log(n)) 46 * __delitem__(y) <==> del T[y], del T[s:e], O(log(n)) 47 * __getitem__(y) <==> T[y], T[s:e], O(log(n)) 48 * __iter__() <==> iter(T) 49 * __len__() <==> len(T), O(1) 50 * __max__() <==> max(T), get max item (k,v) of T, O(log(n)) 51 * __min__() <==> min(T), get min item (k,v) of T, O(log(n)) 52 * __and__(other) <==> T & other, intersection 53 * __or__(other) <==> T | other, union 54 * __sub__(other) <==> T - other, difference 55 * __xor__(other) <==> T ^ other, symmetric_difference 56 * __repr__() <==> repr(T) 57 * __setitem__(k, v) <==> T[k] = v, O(log(n)) 58 * clear() -> None, Remove all items from T, , O(n) 59 * copy() -> a shallow copy of T, O(n*log(n)) 60 * discard(k) -> None, remove k from T, if k is present, O(log(n)) 61 * get(k[,d]) -> T[k] if k in T, else d, O(log(n)) 62 * is_empty() -> True if len(T) == 0, O(1) 63 * items([reverse]) -> generator for (k, v) items of T, O(n) 64 * keys([reverse]) -> generator for keys of T, O(n) 65 * values([reverse]) -> generator for values of T, O(n) 66 * pop(k[,d]) -> v, remove specified key and return the corresponding value, O(log(n)) 67 * popitem() -> (k, v), remove and return some (key, value) pair as a 2-tuple, O(log(n)) 68 * setdefault(k[,d]) -> T.get(k, d), also set T[k]=d if k not in T, O(log(n)) 69 * update(E) -> None. Update T from dict/iterable E, O(E*log(n)) 70 * foreach(f, [order]) -> visit all nodes of tree and call f(k, v) for each node, O(n) 71 72 slicing by keys 73 74 * itemslice(s, e) -> generator for (k, v) items of T for s <= key < e, O(n) 75 * keyslice(s, e) -> generator for keys of T for s <= key < e, O(n) 76 * valueslice(s, e) -> generator for values of T for s <= key < e, O(n) 77 * T[s:e] -> TreeSlice object, with keys in range s <= key < e, O(n) 78 * del T[s:e] -> remove items by key slicing, for s <= key < e, O(n) 79 80 if 's' is None or T[:e] TreeSlice/iterator starts with value of min_key() 81 if 'e' is None or T[s:] TreeSlice/iterator ends with value of max_key() 82 T[:] is a TreeSlice which represents the whole tree. 83 84 TreeSlice is a tree wrapper with range check, and contains no references 85 to objects, deleting objects in the associated tree also deletes the object 86 in the TreeSlice. 87 88 * TreeSlice[k] -> get value for key k, raises KeyError if k not exists in range s:e 89 * TreeSlice[s1:e1] -> TreeSlice object, with keys in range s1 <= key < e1 90 91 * new lower bound is max(s, s1) 92 * new upper bound is min(e, e1) 93 94 TreeSlice methods: 95 96 * items() -> generator for (k, v) items of T, O(n) 97 * keys() -> generator for keys of T, O(n) 98 * values() -> generator for values of T, O(n) 99 * __iter__ <==> keys() 100 * __repr__ <==> repr(T) 101 * __contains__(key)-> True if TreeSlice has a key k, else False, O(log(n)) 102 103 prev/succ operations 104 105 * prev_item(key) -> get (k, v) pair, where k is predecessor to key, O(log(n)) 106 * prev_key(key) -> k, get the predecessor of key, O(log(n)) 107 * succ_item(key) -> get (k,v) pair as a 2-tuple, where k is successor to key, O(log(n)) 108 * succ_key(key) -> k, get the successor of key, O(log(n)) 109 * floor_item(key) -> get (k, v) pair, where k is the greatest key less than or equal to key, O(log(n)) 110 * floor_key(key) -> k, get the greatest key less than or equal to key, O(log(n)) 111 * ceiling_item(key) -> get (k, v) pair, where k is the smallest key greater than or equal to key, O(log(n)) 112 * ceiling_key(key) -> k, get the smallest key greater than or equal to key, O(log(n)) 113 114 Heap methods 115 116 * max_item() -> get largest (key, value) pair of T, O(log(n)) 117 * max_key() -> get largest key of T, O(log(n)) 118 * min_item() -> get smallest (key, value) pair of T, O(log(n)) 119 * min_key() -> get smallest key of T, O(log(n)) 120 * pop_min() -> (k, v), remove item with minimum key, O(log(n)) 121 * pop_max() -> (k, v), remove item with maximum key, O(log(n)) 122 * nlargest(i[,pop]) -> get list of i largest items (k, v), O(i*log(n)) 123 * nsmallest(i[,pop]) -> get list of i smallest items (k, v), O(i*log(n)) 124 125 Set methods (using frozenset) 126 127 * intersection(t1, t2, ...) -> Tree with keys *common* to all trees 128 * union(t1, t2, ...) -> Tree with keys from *either* trees 129 * difference(t1, t2, ...) -> Tree with keys in T but not any of t1, t2, ... 130 * symmetric_difference(t1) -> Tree with keys in either T and t1 but not both 131 * issubset(S) -> True if every element in T is in S 132 * issuperset(S) -> True if every element in S is in T 133 * isdisjoint(S) -> True if T has a null intersection with S 134 135 Classmethods 136 137 * fromkeys(S[,v]) -> New tree with keys from S and values equal to v. 138 139 """ 140 def get_walker(self): 141 return Walker(self) 142 143 def __repr__(self): 144 """ x.__repr__(...) <==> repr(x) """ 145 tpl = "%s({%s})" % (self.__class__.__name__, '%s') 146 return tpl % ", ".join( ("%r: %r" % item for item in self.items()) ) 147 148 def copy(self): 149 """ T.copy() -> get a shallow copy of T. """ 150 tree = self.__class__() 151 self.foreach(tree.insert, order=-1) 152 return tree 153 __copy__ = copy 154 155 def __contains__(self, key): 156 """ k in T -> True if T has a key k, else False """ 157 try: 158 self.get_value(key) 159 return True 160 except KeyError: 161 return False 162 163 def __len__(self): 164 """ x.__len__() <==> len(x) """ 165 return self.count 166 167 def __min__(self): 168 """ x.__min__() <==> min(x) """ 169 return self.min_item() 170 171 def __max__(self): 172 """ x.__max__() <==> max(x) """ 173 return self.max_item() 174 175 def __and__(self, other): 176 """ x.__and__(other) <==> self & other """ 177 return self.intersection(other) 178 179 def __or__(self, other): 180 """ x.__or__(other) <==> self | other """ 181 return self.union(other) 182 183 def __sub__(self, other): 184 """ x.__sub__(other) <==> self - other """ 185 return self.difference(other) 186 187 def __xor__(self, other): 188 """ x.__xor__(other) <==> self ^ other """ 189 return self.symmetric_difference(other) 190 191 def discard(self, key): 192 """ x.discard(k) -> None, remove k from T, if k is present """ 193 try: 194 self.remove(key) 195 except KeyError: 196 pass 197 198 def __del__(self): 199 self.clear() 200 201 def is_empty(self): 202 """ x.is_empty() -> False if T contains any items else True""" 203 return self.count == 0 204 205 def keys(self, reverse=False): 206 """ T.iterkeys([reverse]) -> an iterator over the keys of T, in ascending 207 order if reverse is True, iterate in descending order, reverse defaults 208 to False 209 """ 210 return ( item[0] for item in self.items(reverse) ) 211 __iter__ = keys 212 213 def __reversed__(self): 214 return self.keys(reverse=True) 215 216 def values(self, reverse=False): 217 """ T.values([reverse]) -> an iterator over the values of T, in ascending order 218 if reverse is True, iterate in descending order, reverse defaults to False 219 """ 220 return ( item[1] for item in self.items(reverse) ) 221 222 def items(self, reverse=False): 223 """ T.items([reverse]) -> an iterator over the (key, value) items of T, 224 in ascending order if reverse is True, iterate in descending order, 225 reverse defaults to False 226 """ 227 if self.is_empty(): 228 return [] 229 230 def default_iterator(node): 231 direction = 1 if reverse else 0 232 other = 1 - direction 233 go_down = True 234 while True: 235 if node.has_child(direction) and go_down: 236 node.push() 237 node.down(direction) 238 else: 239 yield node.item 240 if node.has_child(other): 241 node.down(other) 242 go_down = True 243 else: 244 if node.stack_is_empty(): 245 return # all done 246 node.pop() 247 go_down = False 248 249 treewalker = self.get_walker() 250 try: # specialized iterators 251 if reverse: 252 return treewalker.iter_items_backward() 253 else: 254 return treewalker.iter_items_forward() 255 except AttributeError: 256 return default_iterator(treewalker) 257 258 def __getitem__(self, key): 259 """ x.__getitem__(y) <==> x[y] """ 260 if isinstance(key, slice): 261 return TreeSlice(self, key.start, key.stop) 262 else: 263 return self.get_value(key) 264 265 def __setitem__(self, key, value): 266 """ x.__setitem__(i, y) <==> x[i]=y """ 267 if isinstance(key, slice): 268 raise ValueError('setslice is not supported') 269 self.insert(key, value) 270 271 def __delitem__(self, key): 272 """ x.__delitem__(y) <==> del x[y] """ 273 if isinstance(key, slice): 274 self.delitems(self.keyslice(key.start, key.stop)) 275 else: 276 self.remove(key) 277 278 def delitems(self, keys): 279 """ T.delitems(keys) -> remove all items by keys 280 """ 281 # convert generator to a set, because the content of the 282 # tree will be modified! 283 for key in frozenset(keys): 284 self.remove(key) 285 286 def keyslice(self, startkey, endkey): 287 """ T.keyslice(startkey, endkey) -> key iterator: 288 startkey <= key < endkey. 289 """ 290 return ( item[0] for item in self.itemslice(startkey, endkey) ) 291 292 def itemslice(self, startkey, endkey): 293 """ T.itemslice(s, e) -> item iterator: s <= key < e. 294 295 if s is None: start with min element -> T[:e] 296 if e is None: end with max element -> T[s:] 297 T[:] -> all items 298 299 """ 300 if self.is_empty(): 301 return 302 303 if startkey is None: 304 # no lower bound 305 can_go_left = lambda: node.has_left() and visit_left 306 else: 307 # don't visit subtrees without keys in search range 308 can_go_left = lambda: node.key > startkey and node.has_left() and visit_left 309 310 if endkey is None: 311 # no upper bound 312 can_go_right = lambda: node.has_right() 313 else: 314 # don't visit subtrees without keys in search range 315 can_go_right = lambda: node.key < endkey and node.has_right() 316 317 if (startkey, endkey) == (None, None): 318 key_in_range = lambda: True 319 elif startkey is None: 320 key_in_range = lambda: node.key < endkey 321 elif endkey is None: 322 key_in_range = lambda: node.key >= startkey 323 else: 324 key_in_range = lambda: startkey <= node.key < endkey 325 326 node = self.get_walker() 327 visit_left = True 328 while True: 329 if can_go_left(): 330 node.push() 331 node.go_left() 332 else: 333 if key_in_range(): 334 yield node.item 335 if can_go_right(): 336 node.go_right() 337 visit_left = True 338 else: 339 if node.stack_is_empty(): 340 return 341 node.pop() 342 # left side is already done 343 visit_left = False 344 345 def valueslice(self, startkey, endkey): 346 """ T.valueslice(startkey, endkey) -> value iterator: 347 startkey <= key < endkey. 348 """ 349 return ( item[1] for item in self.itemslice(startkey, endkey) ) 350 351 def get_value(self, key): 352 node = self.root 353 while node is not None: 354 if key == node.key: 355 return node.value 356 elif key < node.key: 357 node = node.left 358 else: 359 node = node.right 360 raise KeyError(str(key)) 361 362 def __getstate__(self): 363 return dict(self.items()) 364 365 def __setstate__(self, state): 366 # note for myself: this is called like __init__, so don't use clear() 367 # to remove existing data! 368 self._root = None 369 self._count = 0 370 self.update(state) 371 372 def setdefault(self, key, default=None): 373 """ T.setdefault(k[,d]) -> T.get(k,d), also set T[k]=d if k not in T """ 374 try: 375 return self.get_value(key) 376 except KeyError: 377 self.insert(key, default) 378 return default 379 380 def update(self, *args): 381 """ T.update(E) -> None. Update T from E : for (k, v) in E: T[k] = v """ 382 for items in args: 383 try: 384 generator = items.items() 385 except AttributeError: 386 generator = iter(items) 387 388 for key, value in generator: 389 self.insert(key, value) 390 391 @classmethod 392 def fromkeys(cls, iterable, value=None): 393 """ T.fromkeys(S[,v]) -> New tree with keys from S and values equal to v. 394 395 v defaults to None. 396 """ 397 tree = cls() 398 for key in iterable: 399 tree.insert(key, value) 400 return tree 401 402 def get(self, key, default=None): 403 """ T.get(k[,d]) -> T[k] if k in T, else d. d defaults to None. """ 404 try: 405 return self.get_value(key) 406 except KeyError: 407 return default 408 409 def pop(self, key, *args): 410 """ T.pop(k[,d]) -> v, remove specified key and return the corresponding value 411 If key is not found, d is returned if given, otherwise KeyError is raised 412 """ 413 if len(args) > 1: 414 raise TypeError("pop expected at most 2 arguments, got %d" % (1 + len(args))) 415 try: 416 value = self.get_value(key) 417 self.remove(key) 418 return value 419 except KeyError: 420 if len(args) == 0: 421 raise 422 else: 423 return args[0] 424 425 def popitem(self): 426 """ T.popitem() -> (k, v), remove and return some (key, value) pair as a 427 2-tuple; but raise KeyError if T is empty 428 """ 429 if self.is_empty(): 430 raise KeyError("popitem(): tree is empty") 431 walker = self.get_walker() 432 walker.goto_leaf() 433 result = walker.item 434 self.remove(walker.key) 435 return result 436 437 def foreach(self, func, order=0): 438 """ Visit all tree nodes and process key, value. 439 440 parm func: function(key, value) 441 param int order: inorder = 0, preorder = -1, postorder = +1 442 443 """ 444 def _traverse(): 445 if order == -1: 446 func(node.key, node.value) 447 if node.has_left(): 448 node.push() 449 node.go_left() 450 _traverse() 451 node.pop() 452 if order == 0: 453 func(node.key, node.value) 454 if node.has_right(): 455 node.push() 456 node.go_right() 457 _traverse() 458 node.pop() 459 if order == +1: 460 func(node.key, node.value) 461 462 node = self.get_walker() 463 _traverse() 464 465 def min_item(self): 466 """ Get item with min key of tree, raises ValueError if tree is empty. """ 467 if self.count == 0: 468 raise ValueError("Tree is empty") 469 node = self._root 470 while node.left is not None: 471 node = node.left 472 return (node.key, node.value) 473 474 def pop_min(self): 475 """ T.pop_min() -> (k, v), remove item with minimum key, raise ValueError 476 if T is empty. 477 """ 478 item = self.min_item() 479 self.remove(item[0]) 480 return item 481 482 def min_key(self): 483 """ Get min key of tree, raises ValueError if tree is empty. """ 484 key, value = self.min_item() 485 return key 486 487 def prev_item(self, key): 488 """ Get predecessor (k,v) pair of key, raises KeyError if key is min key 489 or key does not exist. 490 """ 491 if self.count == 0: 492 raise KeyError("Tree is empty") 493 walker = self.get_walker() 494 return walker.prev_item(key) 495 496 def succ_item(self, key): 497 """ Get successor (k,v) pair of key, raises KeyError if key is max key 498 or key does not exist. 499 """ 500 if self.count == 0: 501 raise KeyError("Tree is empty") 502 walker = self.get_walker() 503 return walker.succ_item(key) 504 505 def prev_key(self, key): 506 """ Get predecessor to key, raises KeyError if key is min key 507 or key does not exist. 508 """ 509 key, value = self.prev_item(key) 510 return key 511 512 def succ_key(self, key): 513 """ Get successor to key, raises KeyError if key is max key 514 or key does not exist. 515 """ 516 key, value = self.succ_item(key) 517 return key 518 519 def floor_item(self, key): 520 """ Get the element (k,v) pair associated with the greatest key less 521 than or equal to the given key, raises KeyError if there is no such key. 522 """ 523 if self.count == 0: 524 raise KeyError("Tree is empty") 525 walker = self.get_walker() 526 return walker.floor_item(key) 527 528 def floor_key(self, key): 529 """ Get the greatest key less than or equal to the given key, raises 530 KeyError if there is no such key. 531 """ 532 key, value = self.floor_item(key) 533 return key 534 535 def ceiling_item(self, key): 536 """ Get the element (k,v) pair associated with the smallest key greater 537 than or equal to the given key, raises KeyError if there is no such key. 538 """ 539 if self.count == 0: 540 raise KeyError("Tree is empty") 541 walker = self.get_walker() 542 return walker.ceiling_item(key) 543 544 def ceiling_key(self, key): 545 """ Get the smallest key greater than or equal to the given key, raises 546 KeyError if there is no such key. 547 """ 548 key, value = self.ceiling_item(key) 549 return key 550 551 def max_item(self): 552 """ Get item with max key of tree, raises ValueError if tree is empty. """ 553 if self.count == 0: 554 raise ValueError("Tree is empty") 555 node = self._root 556 while node.right is not None: 557 node = node.right 558 return (node.key, node.value) 559 560 def pop_max(self): 561 """ T.pop_max() -> (k, v), remove item with maximum key, raise ValueError 562 if T is empty. 563 """ 564 item = self.max_item() 565 self.remove(item[0]) 566 return item 567 568 def max_key(self): 569 """ Get max key of tree, raises ValueError if tree is empty. """ 570 key, value = self.max_item() 571 return key 572 573 def nsmallest(self, n, pop=False): 574 """ T.nsmallest(n) -> get list of n smallest items (k, v). 575 If pop is True, remove items from T. 576 """ 577 if pop: 578 return [self.pop_min() for _ in range(min(len(self), n))] 579 else: 580 items = self.items() 581 return [ next(items) for _ in range(min(len(self), n)) ] 582 583 def nlargest(self, n, pop=False): 584 """ T.nlargest(n) -> get list of n largest items (k, v). 585 If pop is True remove items from T. 586 """ 587 if pop: 588 return [self.pop_max() for _ in range(min(len(self), n))] 589 else: 590 items = self.items(reverse=True) 591 return [ next(items) for _ in range(min(len(self), n)) ] 592 593 def intersection(self, *trees): 594 """ x.intersection(t1, t2, ...) -> Tree, with keys *common* to all trees 595 """ 596 thiskeys = frozenset(self.keys()) 597 sets = _build_sets(trees) 598 rkeys = thiskeys.intersection(*sets) 599 return self.__class__( ((key, self.get(key)) for key in rkeys) ) 600 601 def union(self, *trees): 602 """ x.union(t1, t2, ...) -> Tree with keys from *either* trees 603 """ 604 thiskeys = frozenset(self.keys()) 605 rkeys = thiskeys.union(*_build_sets(trees)) 606 return self.__class__( ((key, self.get(key)) for key in rkeys) ) 607 608 def difference(self, *trees): 609 """ x.difference(t1, t2, ...) -> Tree with keys in T but not any of t1, 610 t2, ... 611 """ 612 thiskeys = frozenset(self.keys()) 613 rkeys = thiskeys.difference(*_build_sets(trees)) 614 return self.__class__( ((key, self.get(key)) for key in rkeys) ) 615 616 def symmetric_difference(self, tree): 617 """ x.symmetric_difference(t1) -> Tree with keys in either T and t1 but 618 not both 619 """ 620 thiskeys = frozenset(self.keys()) 621 rkeys = thiskeys.symmetric_difference(frozenset(tree.keys())) 622 return self.__class__( ((key, self.get(key)) for key in rkeys) ) 623 624 def issubset(self, tree): 625 """ x.issubset(tree) -> True if every element in x is in tree """ 626 thiskeys = frozenset(self.keys()) 627 return thiskeys.issubset(frozenset(tree.keys())) 628 629 def issuperset(self, tree): 630 """ x.issubset(tree) -> True if every element in tree is in x """ 631 thiskeys = frozenset(self.keys()) 632 return thiskeys.issuperset(frozenset(tree.keys())) 633 634 def isdisjoint(self, tree): 635 """ x.isdisjoint(S) -> True if x has a null intersection with tree """ 636 thiskeys = frozenset(self.keys()) 637 return thiskeys.isdisjoint(frozenset(tree.keys())) 638 639 640def _build_sets(trees): 641 return [ frozenset(tree.keys()) for tree in trees ] 642 643