Reading from a Leaf Node

Insertion alone is not useful without retrieval. We need two read operations:

Point lookup — find the cell with a specific key, or determine that it does not exist.
Ordered scan — iterate through all cells in key order, starting from an arbitrary position.

Both operations depend on cell-level binary search, which we already wrote as leaf_node_find(). This section extends that function into a complete read API and refactors the Cursor class to navigate leaf nodes instead of raw page slots.

Point Lookup

A point lookup returns the deserialized Row for a given key, or None if the key is absent:

from row import Row

def leaf_node_search(page: memoryview, key: int) -> Row | None:
    """Search for `key` in a leaf node. Return the Row if found, None otherwise."""
    num_cells = leaf_node_num_cells(page)
    index = leaf_node_find(page, key)

    if index >= num_cells:
        return None  # key is greater than all existing keys

    if leaf_node_key(page, index) != key:
        return None  # key does not exist

    value = leaf_node_value(page, index)
    return Row.deserialize(bytes(value))

Three lines of logic, no loops. The binary search in leaf_node_find does the heavy lifting. The bytes(value) call copies the memoryview into an immutable bytes object for deserialization — necessary because Row.deserialize uses struct.unpack, which requires a bytes-like object.

Refactoring the Cursor

The Cursor from Chapter 3 navigated pages by row slot index: “page N, slot M.” That model assumed rows were stored in insertion order. With B-Tree leaf nodes, cells are stored in key-sorted order within each node, and the concept of “slot” is replaced by “cell index.”

Here is the updated Cursor for leaf node traversal:

from dataclasses import dataclass
from pager import Pager


@dataclass
class Cursor:
    """Points to a specific cell within a leaf node."""
    pager: Pager
    page_num: int       # which leaf node (page) we're in
    cell_num: int       # which cell within that leaf node
    end_of_table: bool  # True if positioned past the last cell

    @classmethod
    def table_start(cls, pager: Pager, root_page_num: int) -> "Cursor":
        """Create a cursor pointing to cell 0 of the root leaf node."""
        page = pager.get_page(root_page_num)
        num_cells = leaf_node_num_cells(page)
        return cls(
            pager=pager,
            page_num=root_page_num,
            cell_num=0,
            end_of_table=(num_cells == 0),
        )

    @classmethod
    def find(cls, pager: Pager, root_page_num: int, key: int) -> "Cursor":
        """Create a cursor pointing to the cell with `key`, or where it would go."""
        page = pager.get_page(root_page_num)
        num_cells = leaf_node_num_cells(page)
        index = leaf_node_find(page, key)
        return cls(
            pager=pager,
            page_num=root_page_num,
            cell_num=index,
            end_of_table=(index >= num_cells),
        )

    def value(self) -> memoryview:
        """Return a memoryview of the current cell's value (serialized Row)."""
        page = self.pager.get_page(self.page_num)
        return leaf_node_value(page, self.cell_num)

    def key(self) -> int:
        """Return the key of the current cell."""
        page = self.pager.get_page(self.page_num)
        return leaf_node_key(page, self.cell_num)

    def advance(self) -> None:
        """Move the cursor to the next cell."""
        self.cell_num += 1
        page = self.pager.get_page(self.page_num)
        if self.cell_num >= leaf_node_num_cells(page):
            self.end_of_table = True

What changed from Chapter 3:

Aspect	Old Cursor (CH3)	New Cursor (CH4)
Position	`(page_num, row_slot)`	`(page_num, cell_num)`
Start	Slot 0 of page 0	Cell 0 of root leaf node
Find	Sequential scan	Binary search via `leaf_node_find`
Advance	Next slot, may cross page	Next cell within leaf
Value access	Raw page offset	`leaf_node_value()`

Note that advance() does not yet cross node boundaries. When we have multiple leaf nodes connected through a parent (Chapter 5), the cursor will need to follow sibling pointers. For now, with a single root leaf node, this implementation is complete.

Integrating with the Virtual Machine

The VirtualMachine from Chapter 3 used the old cursor API. Here are the updated execute_select and execute_insert methods:

def execute_insert(self, key: int, row: Row) -> ExecuteResult:
    """Insert a row into the B-Tree."""
    page = self.pager.get_page(self.root_page_num)
    result = leaf_node_insert(page, key, row.serialize())

    if result == InsertResult.SUCCESS:
        return ExecuteResult.SUCCESS
    elif result == InsertResult.DUPLICATE_KEY:
        return ExecuteResult.DUPLICATE_KEY
    else:
        return ExecuteResult.TABLE_FULL


def execute_select(self) -> list[Row]:
    """Read all rows by scanning the leaf node in order."""
    cursor = Cursor.table_start(self.pager, self.root_page_num)
    rows: list[Row] = []

    while not cursor.end_of_table:
        value = cursor.value()
        rows.append(Row.deserialize(bytes(value)))
        cursor.advance()

    return rows

The execute_select method produces rows in ascending key order automatically — no sorting needed — because cells are stored sorted within the leaf node.

Verifying $O(\log k)$ Search Performance

With at most 102 cells per leaf, binary search performs at most $\lceil \log_2(102) \rceil = 7$ key comparisons. We can verify this empirically:

def test_search_performance():
    """Verify that binary search makes O(log k) comparisons."""
    page = memoryview(bytearray(PAGE_SIZE))
    initialize_leaf_node(page, is_root=True)

    # Fill the node to capacity
    for k in range(LEAF_NODE_MAX_CELLS):
        row = Row(id=k * 10, username=f"user_{k}")
        leaf_node_insert(page, k * 10, row.serialize())

    # Point lookups — every key should be found
    for k in range(LEAF_NODE_MAX_CELLS):
        result = leaf_node_search(page, k * 10)
        assert result is not None
        assert result.id == k * 10

    # Missing key — should return None
    assert leaf_node_search(page, 999999) is None

    # Scan via cursor — should produce sorted output
    from pager import Pager
    pager = Pager("test_btree.db")
    pager.pages[0] = page
    pager.num_pages = 1

    cursor = Cursor.table_start(pager, root_page_num=0)
    prev_key = -1
    count = 0
    while not cursor.end_of_table:
        current_key = cursor.key()
        assert current_key > prev_key, "Keys must be strictly ascending"
        prev_key = current_key
        count += 1
        cursor.advance()

    assert count == LEAF_NODE_MAX_CELLS
    print(f"All {count} keys retrieved in sorted order. Search verified.")


test_search_performance()

Where We Stand

Capability	Status
Serialize/deserialize rows	CH1
Read/write pages to disk	CH2
Sequential scan via cursor	CH3
Sorted storage in leaf node	CH4
Point lookup via binary search	CH4
Ordered scan via B-Tree cursor	CH4
Handle overflow (splitting)	Next: CH5
Multi-level tree traversal	Next: CH5

We have a single leaf node that stores 102 rows in sorted order, supports $O(\log k)$ point lookups, and scans in key order. The glaring limitation: when row 103 arrives, we cannot accommodate it. The next chapter introduces internal nodes and the splitting algorithm that transforms our single-node index into a true multi-level B-Tree.