BST Sets and Maps
Using BSTs to implement Set and Map data structures. Understand TreeSet and TreeMap implementations.
Binary Search Trees can implement Set and Map (Dictionary) data structures. Unlike hash-based implementations, BST-based collections maintain elements in sorted order, enabling efficient range queries and ordered iteration.
TreeSet: Ordered Set
A Set stores unique elements. A TreeSet uses a BST to maintain sorted order.
class TreeSet:
def __init__(self):
self.root = None
self.size = 0
def add(self, val):
if self.contains(val):
return False # Already exists
self.root = self._insert(self.root, val)
self.size += 1
return True
def _insert(self, node, val):
if node is None:
return TreeNode(val)
if val < node.val:
node.left = self._insert(node.left, val)
else:
node.right = self._insert(node.right, val)
return node
def remove(self, val):
if not self.contains(val):
return False
self.root = self._delete(self.root, val)
self.size -= 1
return True
def _delete(self, node, val):
if node is None:
return None
if val < node.val:
node.left = self._delete(node.left, val)
elif val > node.val:
node.right = self._delete(node.right, val)
else:
if node.left is None:
return node.right
if node.right is None:
return node.left
successor = node.right
while successor.left:
successor = successor.left
node.val = successor.val
node.right = self._delete(node.right, successor.val)
return node
def contains(self, val):
node = self.root
while node:
if val == node.val:
return True
elif val < node.val:
node = node.left
else:
node = node.right
return False
def __len__(self):
return self.size
TreeSet: Ordered Operations
What makes TreeSet special are the ordered operations:
class TreeSet:
# ... previous methods ...
def first(self):
"""Get minimum element."""
if self.root is None:
return None
node = self.root
while node.left:
node = node.left
return node.val
def last(self):
"""Get maximum element."""
if self.root is None:
return None
node = self.root
while node.right:
node = node.right
return node.val
def floor(self, val):
"""Largest element <= val."""
result = None
node = self.root
while node:
if val == node.val:
return node.val
elif val < node.val:
node = node.left
else:
result = node.val
node = node.right
return result
def ceiling(self, val):
"""Smallest element >= val."""
result = None
node = self.root
while node:
if val == node.val:
return node.val
elif val > node.val:
node = node.right
else:
result = node.val
node = node.left
return result
def range_query(self, low, high):
"""Elements in [low, high]."""
result = []
def inorder(node):
if node is None:
return
if node.val > low:
inorder(node.left)
if low <= node.val <= high:
result.append(node.val)
if node.val < high:
inorder(node.right)
inorder(self.root)
return result
def to_list(self):
"""Get all elements in sorted order."""
result = []
self._inorder(self.root, result)
return result
def _inorder(self, node, result):
if node:
self._inorder(node.left, result)
result.append(node.val)
self._inorder(node.right, result)
Usage:
tree_set = TreeSet()
for val in [5, 3, 7, 1, 4, 6, 8]:
tree_set.add(val)
print(tree_set.first()) # 1
print(tree_set.last()) # 8
print(tree_set.floor(4.5)) # 4
print(tree_set.ceiling(4.5)) # 5
print(tree_set.range_query(3, 6)) # [3, 4, 5, 6]
print(tree_set.to_list()) # [1, 3, 4, 5, 6, 7, 8]
TreeSet vs HashSet
HashSet offers O(1) add/remove/contains but no ordering. TreeSet offers O(log n) operations but supports min, max, floor, ceiling, and range queries. Choose based on whether you need ordered operations.
TreeMap: Ordered Dictionary
A Map stores key-value pairs. A TreeMap maintains keys in sorted order.
class TreeMapNode:
def __init__(self, key, value):
self.key = key
self.value = value
self.left = None
self.right = None
class TreeMap:
def __init__(self):
self.root = None
self.size = 0
def put(self, key, value):
self.root = self._put(self.root, key, value)
def _put(self, node, key, value):
if node is None:
self.size += 1
return TreeMapNode(key, value)
if key < node.key:
node.left = self._put(node.left, key, value)
elif key > node.key:
node.right = self._put(node.right, key, value)
else:
node.value = value # Update existing
return node
def get(self, key):
node = self.root
while node:
if key == node.key:
return node.value
elif key < node.key:
node = node.left
else:
node = node.right
return None
def contains_key(self, key):
return self.get(key) is not None
def remove(self, key):
if not self.contains_key(key):
return None
old_value = self.get(key)
self.root = self._delete(self.root, key)
self.size -= 1
return old_value
def _delete(self, node, key):
if node is None:
return None
if key < node.key:
node.left = self._delete(node.left, key)
elif key > node.key:
node.right = self._delete(node.right, key)
else:
if node.left is None:
return node.right
if node.right is None:
return node.left
successor = node.right
while successor.left:
successor = successor.left
node.key = successor.key
node.value = successor.value
node.right = self._delete(node.right, successor.key)
return node
def first_key(self):
if self.root is None:
return None
node = self.root
while node.left:
node = node.left
return node.key
def last_key(self):
if self.root is None:
return None
node = self.root
while node.right:
node = node.right
return node.key
def floor_key(self, key):
result = None
node = self.root
while node:
if key == node.key:
return node.key
elif key < node.key:
node = node.left
else:
result = node.key
node = node.right
return result
def ceiling_key(self, key):
result = None
node = self.root
while node:
if key == node.key:
return node.key
elif key > node.key:
node = node.right
else:
result = node.key
node = node.left
return result
def keys(self):
result = []
self._inorder_keys(self.root, result)
return result
def _inorder_keys(self, node, result):
if node:
self._inorder_keys(node.left, result)
result.append(node.key)
self._inorder_keys(node.right, result)
def items(self):
result = []
self._inorder_items(self.root, result)
return result
def _inorder_items(self, node, result):
if node:
self._inorder_items(node.left, result)
result.append((node.key, node.value))
self._inorder_items(node.right, result)
def __len__(self):
return self.size
Usage:
tree_map = TreeMap()
tree_map.put("charlie", 30)
tree_map.put("alice", 25)
tree_map.put("bob", 28)
tree_map.put("david", 35)
print(tree_map.get("alice")) # 25
print(tree_map.first_key()) # "alice"
print(tree_map.last_key()) # "david"
print(tree_map.floor_key("cat")) # "charlie"
print(tree_map.keys()) # ["alice", "bob", "charlie", "david"]
print(tree_map.items()) # [("alice", 25), ("bob", 28), ...]
Comparison with Hash-Based Collections
| Operation | TreeSet/TreeMap | HashSet/HashMap | |-----------|-----------------|-----------------| | add/put | O(log n) | O(1)* | | remove | O(log n) | O(1)* | | contains/get | O(log n) | O(1)* | | min/max | O(log n) | O(n) | | floor/ceiling | O(log n) | O(n) | | range query | O(log n + k) | O(n) | | iterate in order | O(n) | O(n log n)** |
*Amortized **Requires sorting
Choose Based on Requirements
Use Hash collections for fastest access when order doesn't matter. Use Tree collections when you need sorted iteration, range queries, or floor/ceiling operations.
Real-World Applications
Event Scheduling
class EventScheduler:
def __init__(self):
self.events = TreeMap() # time -> event
def add_event(self, time, event):
self.events.put(time, event)
def next_event_after(self, time):
ceiling_time = self.events.ceiling_key(time)
if ceiling_time:
return (ceiling_time, self.events.get(ceiling_time))
return None
def events_in_range(self, start, end):
# Get all events between start and end
result = []
for time, event in self.events.items():
if start <= time <= end:
result.append((time, event))
return result
Stock Price Tracker
class StockTracker:
def __init__(self):
self.prices = TreeMap() # price -> count
def add_price(self, price):
count = self.prices.get(price) or 0
self.prices.put(price, count + 1)
def get_max_price(self):
return self.prices.last_key()
def get_min_price(self):
return self.prices.first_key()
def count_in_range(self, low, high):
total = 0
for price, count in self.prices.items():
if low <= price <= high:
total += count
return total
Language Standard Libraries
Most languages provide tree-based collections:
| Language | TreeSet | TreeMap | |----------|---------|---------| | Java | TreeSet<T> | TreeMap<K,V> | | C++ | std::set<T> | std::map<K,V> | | Python | sortedcontainers.SortedSet | sortedcontainers.SortedDict | | C# | SortedSet<T> | SortedDictionary<K,V> | | Rust | BTreeSet<T> | BTreeMap<K,V> |
Key Takeaways
- TreeSet/TreeMap use BSTs to maintain sorted order
- O(log n) for add, remove, contains (vs O(1) for hash)
- Support floor, ceiling, min, max in O(log n)
- Enable efficient range queries
- Iterate elements in sorted order
- Use when ordered operations are needed
BST-based collections bridge the gap between arrays (sorted but slow insertion) and hash tables (fast but unordered). When your problem requires both dynamic updates and ordered access, TreeSet and TreeMap are the right choice.
