Hash Usage
Count word frequencies in a book in O(n) time. Detect whether two strings are anagrams in O(n). Solve the two-sum problem in a single pass. All of these become trivial with a hash map.
This chapter is about the patterns — the recurring ways that hash maps and sets appear in real problems.
Pattern 1: Frequency Counter
Count how many times each element appears. The canonical use is word counting, but the same pattern applies to characters, numbers, or any hashable value.
from collections import Counter
text = "the quick brown fox jumps over the lazy dog the fox"
# Manual approach
freq = {}
for word in text.split():
freq[word] = freq.get(word, 0) + 1
print("Manual frequency count:")
for word, count in sorted(freq.items(), key=lambda x: -x[1]):
print(f" {word}: {count}")
# Python shortcut: Counter
counter = Counter(text.split())
print("\nTop 3 words:", counter.most_common(3))
Real-world use: search engines count term frequencies to rank pages. Spam filters count word patterns to detect junk email.
Pattern 2: Two-Sum (Complement Lookup)
Given an array and a target, find two numbers that add up to the target. The naive approach is O(n²) — check every pair. The hash map approach is O(n).
def two_sum(nums, target):
"""Return indices of two numbers that add to target. O(n) time, O(n) space."""
seen = {} # value -> index
for i, num in enumerate(nums):
complement = target - num
if complement in seen:
return [seen[complement], i]
seen[num] = i
return []
print(two_sum([2, 7, 11, 15], 9)) # [0, 1] (2 + 7 = 9)
print(two_sum([3, 2, 4], 6)) # [1, 2] (2 + 4 = 6)
print(two_sum([1, 5, 3, 7], 10)) # [1, 3] (5 + 7 = 10... wait: 3 + 7 = 10)
# Walkthrough of the algorithm
print("\nStep-by-step for [2, 7, 11, 15], target=9:")
nums, target = [2, 7, 11, 15], 9
seen = {}
for i, num in enumerate(nums):
complement = target - num
print(f" i={i} num={num} need {complement} seen={seen} found={complement in seen}")
seen[num] = i
The key insight: for each number x, instead of scanning the array for target - x, you look it up in the hash map in O(1).
Pattern 3: Anagram Detection
Two strings are anagrams if they contain the same characters with the same frequencies.
from collections import Counter
def is_anagram(s, t):
"""Check if s and t are anagrams. O(n) time."""
return Counter(s) == Counter(t)
def group_anagrams(words):
"""Group a list of words by their anagram signature. O(n * k) where k = max word length."""
groups = {}
for word in words:
key = tuple(sorted(word)) # canonical form: sorted characters
groups.setdefault(key, []).append(word)
return list(groups.values())
print(is_anagram("listen", "silent")) # True
print(is_anagram("hello", "world")) # False
word_list = ["eat", "tea", "tan", "ate", "nat", "bat"]
print("\nAnagram groups:", group_anagrams(word_list))
Pattern 4: Duplicate Detection and Uniqueness
A hash set answers “have I seen this before?” in O(1).
def contains_duplicate(nums):
return len(nums) != len(set(nums))
def first_duplicate(nums):
seen = set()
for num in nums:
if num in seen:
return num
seen.add(num)
return None
def longest_unique_substring(s):
"""Sliding window + set: O(n)."""
char_set = set()
left = 0
best = 0
for right in range(len(s)):
while s[right] in char_set:
char_set.remove(s[left])
left += 1
char_set.add(s[right])
best = max(best, right - left + 1)
return best
print(contains_duplicate([1, 2, 3, 1])) # True
print(contains_duplicate([1, 2, 3, 4])) # False
print(first_duplicate([3, 1, 4, 1, 5, 9])) # 1
print(longest_unique_substring("abcabcbb")) # 3 ("abc")
print(longest_unique_substring("pwwkew")) # 3 ("wke")
Hash vs List: A Speed Comparison
import time
import random
n = 100_000
data = list(range(n))
data_set = set(data)
lookups = [random.randint(0, n * 2) for _ in range(10_000)]
# List: O(n) per lookup
start = time.perf_counter()
for val in lookups:
_ = val in data
list_time = time.perf_counter() - start
# Set: O(1) per lookup
start = time.perf_counter()
for val in lookups:
_ = val in data_set
set_time = time.perf_counter() - start
print(f"List lookup (10k queries in {n} items): {list_time * 1000:.1f} ms")
print(f"Set lookup (10k queries in {n} items): {set_time * 1000:.1f} ms")
print(f"Speedup: {list_time / set_time:.0f}x faster with a set")
Pattern 5: Memoization (Caching)
Store the results of expensive function calls in a dict so you never compute the same input twice.
import time
# Without memoization — exponential time
def fib_slow(n):
if n <= 1:
return n
return fib_slow(n - 1) + fib_slow(n - 2)
# With memoization — O(n) time
def fib_fast(n, cache={}):
if n in cache:
return cache[n]
if n <= 1:
return n
cache[n] = fib_fast(n - 1) + fib_fast(n - 2)
return cache[n]
# Python provides @functools.lru_cache as a decorator
from functools import lru_cache
@lru_cache(maxsize=None)
def fib(n):
if n <= 1:
return n
return fib(n - 1) + fib(n - 2)
start = time.perf_counter()
result_slow = fib_slow(35)
slow_time = time.perf_counter() - start
start = time.perf_counter()
result_fast = fib(35)
fast_time = time.perf_counter() - start
print(f"fib(35) = {result_slow} (slow: {slow_time*1000:.0f} ms)")
print(f"fib(35) = {result_fast} (fast: {fast_time*1000:.2f} ms)")
print(f"Speedup: {slow_time / fast_time:.0f}x")
Common Hash Map Operations at a Glance
d = {}
# Insert / update
d["key"] = "value"
# Lookup (raises KeyError if missing)
# val = d["key"]
# Safe lookup with default
val = d.get("key", "default")
print("get:", val)
# Check membership
print("in:", "key" in d)
# Delete
d["temp"] = 42
del d["temp"]
# Iterate
d = {"a": 1, "b": 2, "c": 3}
print("keys: ", list(d.keys()))
print("values:", list(d.values()))
print("items: ", list(d.items()))
# setdefault: insert only if key absent
d.setdefault("d", 0)
d["d"] += 1
print("setdefault:", d)
# defaultdict: auto-initialise missing keys
from collections import defaultdict
graph = defaultdict(list)
graph["A"].append("B")
graph["A"].append("C")
print("defaultdict graph:", dict(graph))