Expand-Contract Pattern and Backward-Compatible Migrations
Expand-Contract Pattern and Backward-Compatible Migrations
The Failure
The inventory team needed to change the quantity column from INTEGER to DECIMAL(10,2) to support fractional units (liquids measured in liters). They ran ALTER TABLE products ALTER COLUMN quantity TYPE DECIMAL(10,2). PostgreSQL rewrote the entire table (2.3 million rows). The table was locked for 47 seconds. During those 47 seconds, every inventory check, every reservation, and every stock update failed. The checkout service returned errors for 47 seconds during peak traffic.
An expand-contract approach avoids the table lock entirely. Add a new column, backfill in batches, migrate reads, drop the old column. Zero downtime.
The Mechanism
Three Common Schema Changes and Their Expand-Contract Patterns
| Change | Naive Approach | Expand-Contract Approach | Deployments |
|---|---|---|---|
| Column rename | ALTER TABLE RENAME COLUMN | Add new column, sync trigger, migrate code, drop old | 3 |
| Type change | ALTER TABLE ALTER COLUMN TYPE | Add new column with new type, backfill, migrate code, drop old | 3 |
| Table split | Create new tables, migrate data, drop old | Add new tables, dual-write, migrate reads, drop old writes, drop old table | 4 |
The Implementation
Pattern 1: Column Type Change (INTEGER → DECIMAL)
Deployment 1: Expand
-- V001__expand_quantity_decimal.sql
-- Add new column with correct type
ALTER TABLE products ADD COLUMN quantity_decimal DECIMAL(10,2);
-- Backfill in batches to avoid long-running transactions
DO $$
DECLARE
batch_size INT := 10000;
total_rows INT;
processed INT := 0;
BEGIN
SELECT count(*) INTO total_rows FROM products WHERE quantity_decimal IS NULL;
WHILE processed < total_rows LOOP
UPDATE products
SET quantity_decimal = quantity::DECIMAL(10,2)
WHERE id IN (
SELECT id FROM products
WHERE quantity_decimal IS NULL
LIMIT batch_size
);
processed := processed + batch_size;
RAISE NOTICE 'Processed % of % rows', processed, total_rows;
COMMIT;
END LOOP;
END $$;
-- Sync trigger: any write to quantity also writes to quantity_decimal
CREATE OR REPLACE FUNCTION sync_quantity()
RETURNS TRIGGER AS $$
BEGIN
IF NEW.quantity IS NOT NULL THEN
NEW.quantity_decimal := NEW.quantity::DECIMAL(10,2);
END IF;
IF NEW.quantity_decimal IS NOT NULL THEN
NEW.quantity := ROUND(NEW.quantity_decimal)::INTEGER;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER trg_sync_quantity
BEFORE INSERT OR UPDATE ON products
FOR EACH ROW EXECUTE FUNCTION sync_quantity();
Deployment 1: Code change
// HARDENED: Write to both columns, read from old
// inventory-service/src/main/java/com/acme/inventory/ProductRepository.java
@Query("UPDATE products SET quantity = :qty, quantity_decimal = :qtyDecimal WHERE id = :id")
void updateQuantity(@Param("id") Long id,
@Param("qty") int qty,
@Param("qtyDecimal") BigDecimal qtyDecimal);
// Still reading from 'quantity' — old code compatibility
@Query("SELECT quantity FROM products WHERE id = :id")
int getQuantity(@Param("id") Long id);
Deployment 2: Migrate reads
// HARDENED: Read from new column, write to both
@Query("SELECT quantity_decimal FROM products WHERE id = :id")
BigDecimal getQuantity(@Param("id") Long id);
// Still writing to both — rollback safety
@Query("UPDATE products SET quantity = ROUND(:qtyDecimal), quantity_decimal = :qtyDecimal WHERE id = :id")
void updateQuantity(@Param("id") Long id,
@Param("qtyDecimal") BigDecimal qtyDecimal);
Deployment 3: Contract
-- V003__contract_quantity_decimal.sql
-- Remove sync trigger
DROP TRIGGER trg_sync_quantity ON products;
DROP FUNCTION sync_quantity();
-- Remove old column
ALTER TABLE products DROP COLUMN quantity;
-- Rename new column
ALTER TABLE products RENAME COLUMN quantity_decimal TO quantity;
-- Add NOT NULL constraint
ALTER TABLE products ALTER COLUMN quantity SET NOT NULL;
Pattern 2: Table Split (orders → orders + order_items)
Deployment 1: Expand
-- V010__expand_order_items.sql
CREATE TABLE order_items (
id BIGSERIAL PRIMARY KEY,
order_id BIGINT NOT NULL REFERENCES orders(id),
product_id BIGINT NOT NULL,
quantity DECIMAL(10,2) NOT NULL,
unit_price DECIMAL(10,2) NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_order_items_order_id ON order_items(order_id);
-- Backfill from JSON column in orders table
INSERT INTO order_items (order_id, product_id, quantity, unit_price)
SELECT
o.id,
(item->>'productId')::BIGINT,
(item->>'quantity')::DECIMAL(10,2),
(item->>'unitPrice')::DECIMAL(10,2)
FROM orders o,
jsonb_array_elements(o.items) AS item
WHERE o.items IS NOT NULL;
Deployment 1: Code change — dual write
// HARDENED: Dual-write to both orders.items JSON and order_items table
func (r *OrderRepo) CreateOrder(ctx context.Context, order Order) error {
tx, err := r.db.BeginTx(ctx, nil)
if err != nil {
return err
}
defer tx.Rollback()
// Write to orders table (includes items JSON for backward compat)
_, err = tx.ExecContext(ctx,
`INSERT INTO orders (id, customer_id, total, items, status)
VALUES ($1, $2, $3, $4, $5)`,
order.ID, order.CustomerID, order.Total,
order.ItemsJSON(), order.Status)
if err != nil {
return err
}
// Also write to order_items table (new normalized structure)
for _, item := range order.Items {
_, err = tx.ExecContext(ctx,
`INSERT INTO order_items (order_id, product_id, quantity, unit_price)
VALUES ($1, $2, $3, $4)`,
order.ID, item.ProductID, item.Quantity, item.UnitPrice)
if err != nil {
return err
}
}
return tx.Commit()
}
Deployment 2: Migrate reads
// HARDENED: Read from order_items, still dual-write
func (r *OrderRepo) GetOrderItems(ctx context.Context, orderID string) ([]OrderItem, error) {
rows, err := r.db.QueryContext(ctx,
`SELECT product_id, quantity, unit_price
FROM order_items WHERE order_id = $1`, orderID)
// ... read from normalized table
}
Deployment 3: Stop dual-write, drop old column
-- V012__contract_order_items.sql
ALTER TABLE orders DROP COLUMN items;
The Gate
Each expand migration is tested in CI with the backward-compatibility check described in CH9. The CI pipeline:
- Applies the expand migration to a test database
- Runs the current production code’s tests against the new schema (backward compat)
- Runs the new code’s tests against the new schema (forward compat)
Both must pass before the migration is promoted to staging.
The Recovery
Expand migration fails: The migration transaction rolls back. No schema changes applied. Fix the migration SQL and retry.
Code deployment after expand fails: Roll back the code. The old code still works because the expand migration only added new columns/tables. The old code ignores them.
Contract migration removes a column that is still needed: The contract migration should never run until all code reading the old column has been deployed and verified. Add a CI check that greps the codebase for references to the old column name before allowing the contract migration to run.