Skip to content

Chapter 11: Search and String Matching

Vol 2: Algorithm Forest Adventure · Station 11


Metadata Card

AttributeValue
Difficulty(Advanced)
PrerequisitesBST (Chapter 5), Recursion & Divide & Conquer
KeywordsBinary Search Boundary Conditions Rotated Array Rabin-Karp Rolling Hash KMP PMT Trie

Breakthrough · Origins

python
def binary_search(arr, target):
    left, right = 0, len(arr) - 1
    while left <= right:
        mid = left + (right - left) // 2
        if arr[mid] == target:
            return mid
        elif arr[mid] < target:
            left = mid + 1
        else:
            right = mid - 1
    return -1
python
def lower_bound(arr, target):
    """First index with value >= target"""
    left, right = 0, len(arr)
    while left < right:
        mid = left + (right - left) // 2
        if arr[mid] < target:
            left = mid + 1
        else:
            right = mid
    return left

def upper_bound(arr, target):
    """First index with value > target"""
    left, right = 0, len(arr)
    while left < right:
        mid = left + (right - left) // 2
        if arr[mid] <= target:
            left = mid + 1
        else:
            right = mid
    return left

Search in Rotated Array

python
def search_rotated(arr, target):
    left, right = 0, len(arr) - 1
    while left <= right:
        mid = left + (right - left) // 2
        if arr[mid] == target:
            return mid
        if arr[left] <= arr[mid]:
            if arr[left] <= target < arr[mid]:
                right = mid - 1
            else:
                left = mid + 1
        else:
            if arr[mid] < target <= arr[right]:
                left = mid + 1
            else:
                right = mid - 1
    return -1

Rabin-Karp String Matching

python
def rabin_karp(text, pattern):
    n, m = len(text), len(pattern)
    if m > n or m == 0:
        return []
    base, mod = 256, 10**9 + 7
    pat_hash = 0
    for ch in pattern:
        pat_hash = (pat_hash * base + ord(ch)) % mod
    txt_hash = 0
    for ch in text[:m]:
        txt_hash = (txt_hash * base + ord(ch)) % mod
    power = pow(base, m - 1, mod)
    result = []
    for i in range(n - m + 1):
        if pat_hash == txt_hash:
            if text[i:i+m] == pattern:
                result.append(i)
        if i < n - m:
            txt_hash = (txt_hash - ord(text[i]) * power) % mod
            txt_hash = (txt_hash * base + ord(text[i + m])) % mod
    return result

Trie (Prefix Tree)

python
class TrieNode:
    def __init__(self):
        self.children = {}
        self.is_end = False

class Trie:
    def __init__(self):
        self.root = TrieNode()

    def insert(self, word):
        node = self.root
        for ch in word:
            if ch not in node.children:
                node.children[ch] = TrieNode()
            node = node.children[ch]
        node.is_end = True

    def search(self, word):
        node = self._find(word)
        return node is not None and node.is_end

    def starts_with(self, prefix):
        return self._find(prefix) is not None

    def _find(self, prefix):
        node = self.root
        for ch in prefix:
            if ch not in node.children:
                return None
            node = node.children[ch]
        return node

KMP (Knuth-Morris-Pratt)

python
def build_pmt(pattern):
    m = len(pattern)
    pmt = [0] * m
    j = 0
    for i in range(1, m):
        while j > 0 and pattern[i] != pattern[j]:
            j = pmt[j - 1]
        if pattern[i] == pattern[j]:
            j += 1
        pmt[i] = j
    return pmt

def kmp_search(text, pattern):
    n, m = len(text), len(pattern)
    if m == 0 or m > n:
        return []
    pmt = build_pmt(pattern)
    result = []
    j = 0
    for i in range(n):
        while j > 0 and text[i] != pattern[j]:
            j = pmt[j - 1]
        if text[i] == pattern[j]:
            j += 1
        if j == m:
            result.append(i - m + 1)
            j = pmt[j - 1]
    return result

Verification Checklist

  • [ ] Can hand-write binary search (3 variants: exist, leftmost, rightmost)
  • [ ] Can explain rotated array binary search
  • [ ] Can implement Trie (insert, search, prefix match)
  • [ ] Understands KMP — text pointer never goes backward

Traveler's Notes

  • Binary search: 90% of people get it wrong on the first try
  • Rotated array: demonstrates that "sorted" is a relative concept
  • KMP: algorithmic beauty — precompute + reuse historical information
  • Trie: space-time tradeoff at its most extreme

Next Stop Preview

Chapter 12: Divide & Conquer and Backtracking

Built with VitePress | Software Systems Atlas