diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 158b598..3a34c3d 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -115,7 +115,7 @@ jobs:
         # exercise Flask routes via app.test_client(). Only listed files — not
         # `pytest tests/` — to avoid re-collecting unittest.TestCase classes above.
         # -o addopts= avoids inheriting benchmark-only options from pyproject.toml.
-        run: python -m pytest tests/test_api_search.py tests/test_api_workspaces.py tests/test_api_export.py tests/test_pdf_export.py tests/test_search_helpers.py tests/test_check_benchmark_regression.py -v --tb=short -o addopts=
+        run: python -m pytest tests/test_api_search.py tests/test_api_workspaces.py tests/test_api_export.py tests/test_pdf_export.py tests/test_search_helpers.py tests/test_check_benchmark_regression.py tests/test_reduce_baselines.py -v --tb=short -o addopts=
 
       # ── PyInstaller desktop build (Windows only, once per workflow) ────────
       # Closes #44. Builds the onedir bundle and smoke-tests --help so the
@@ -215,7 +215,7 @@ jobs:
             --redact \
             --exit-code 1
 
-  # ── Performance benchmarks: summary cache (issue #115) ─────────────────────
+  # ── Performance benchmarks: unified suite (issues #115, #110) ──────────────
   benchmarks:
     name: Performance benchmarks (gated)
     needs: [unittest]
@@ -236,7 +236,7 @@ jobs:
           python -m pip install -r requirements-lock.txt
           python -m pip install 'pytest>=8,<9' 'pytest-benchmark==4.0.0'
 
-      - name: Run summary-cache benchmarks
+      - name: Run benchmark suite
         run: >
           python -m pytest tests/benchmarks/
           --benchmark-only
diff --git a/.gitignore b/.gitignore
index f204306..0f8d574 100644
--- a/.gitignore
+++ b/.gitignore
@@ -46,3 +46,5 @@ coverage.xml
 .hypothesis/
 benchmark-results.json
 benchmarks/_raw.json
+benchmarks/_merged.json
+benchmarks/_ci/
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..599d5a1
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,21 @@
+.PHONY: seed-baselines-local update-baselines check-benchmarks clean-benchmark-artifacts
+
+# WARNING: captures timings on THIS machine. Production baselines must match ubuntu-latest CI.
+# Prefer downloading benchmark-results.json from a CI artifact, then:
+#   python scripts/reduce_baselines.py benchmark-results.json benchmarks/baselines.json --slack 1.5
+seed-baselines-local:
+	@echo "WARNING: seed-baselines-local uses this host's timings; CI gates on ubuntu-latest." >&2
+	python -m pytest tests/benchmarks/ --benchmark-only --benchmark-json=benchmarks/_raw.json -o addopts=
+	python -c "import os, subprocess, sys; \
+	  cmd = [sys.executable, 'scripts/reduce_baselines.py', 'benchmarks/_raw.json', 'benchmarks/baselines.json', '--slack', '1.5', '--source', 'local']; \
+	  (subprocess.run(cmd, check=True), print('Updated benchmarks/baselines.json', file=sys.stderr)) if os.environ.get('FORCE') == '1' else print('Wrote benchmarks/_raw.json only. Set FORCE=1 to overwrite benchmarks/baselines.json.', file=sys.stderr)"
+
+# Deprecated alias — kept for muscle memory; see seed-baselines-local warning above.
+update-baselines: seed-baselines-local
+
+check-benchmarks:
+	python -m pytest tests/benchmarks/ --benchmark-only --benchmark-json=benchmark-results.json -o addopts=
+	python scripts/check_benchmark_regression.py benchmark-results.json benchmarks/baselines.json
+
+clean-benchmark-artifacts:
+	python -c "import pathlib; [p.unlink(missing_ok=True) for p in (pathlib.Path('benchmarks/_raw.json'), pathlib.Path('benchmark-results.json'))]"
diff --git a/benchmarks/README.md b/benchmarks/README.md
new file mode 100644
index 0000000..e2e0064
--- /dev/null
+++ b/benchmarks/README.md
@@ -0,0 +1,70 @@
+# Performance benchmarks
+
+Test files live under `tests/benchmarks/`; this directory holds documentation and `baselines.json` for the CI regression gate.
+
+Repeatable local measurements for workspace listing, export, search, and summary-cache hot paths.
+
+## Run locally
+
+```bash
+pip install -r requirements-lock.txt
+pip install 'pytest>=8,<9' 'pytest-benchmark==4.0.0'
+pytest tests/benchmarks/ --benchmark-only -o addopts= -v
+```
+
+## Scenarios
+
+| Group | What |
+|-------|------|
+| parse | `list_workspace_projects(..., nocache=True)` over 10 / 50 / 200 synthetic composers |
+| export | `POST /api/export` (ZIP) over 10 / 50 composer corpora (capped at 50 for CI runtime; parse goes to 200) |
+| search | `GET /api/search` over a 50-composer corpus — **live-scan** (`test_search_full_corpus_live_scan`, `NO_SEARCH_INDEX=1`) and **FTS index** (`test_search_full_corpus_indexed`, pre-built index) |
+| summary-cache | projects lookup (hit/miss), composer-map lookup (hit/miss), fingerprint (10/50/200), round-trip, tab-summary lookup |
+
+Synthetic corpora are built in `tests/benchmarks/conftest.py` — no real Cursor storage dependency.
+
+### Adding a benchmark group
+
+Every `@pytest.mark.benchmark(group="...")` name must appear in `GATED_GROUPS` inside `scripts/reduce_baselines.py`. Otherwise `reduce_baselines.py` fails at refresh time with an unknown-group error. Update both the test marker and `GATED_GROUPS` when introducing a new group.
+
+## CI gate
+
+The `benchmarks` job on **ubuntu-latest** runs the full `tests/benchmarks/` suite (`--benchmark-json=benchmark-results.json`), then `scripts/check_benchmark_regression.py benchmark-results.json benchmarks/baselines.json`.
+
+- **Fail** when a gated mean exceeds its baseline by **>20%**
+- **Fail** when a gated mean is **<50%** of baseline (stale — refresh after intentional speedups)
+- **Fail** when a gated baseline name has no current result
+- **Warn** for benchmarks without a baseline entry
+- All benchmarks listed in `baselines.json` are gated unless named in `EXCLUDED_FROM_GATE` in `scripts/check_benchmark_regression.py`
+
+Pinned runner: `ubuntu-latest`, `--benchmark-min-rounds=5`.
+
+Sub-millisecond benches (e.g. `test_summary_cache_lookup`, `test_composer_map_cache_lookup`) can be high-variance on shared runners. If the gate becomes flaky, raise `--slack` for those entries or add targeted exclusions in `EXCLUDED_FROM_GATE`.
+
+`test_summary_cache_round_trip` is intentionally excluded from the gate: it calls `set_cached_projects` (file write) + `get_cached_projects` (file read) each round, so OS page-cache state on shared runners causes 3–5x variation between consecutive CI runs. The baseline entry is kept for observation only.
+
+## Refresh baselines
+
+After intentional performance work, capture on **ubuntu-latest** (same OS as the gated CI job). Download `benchmark-results.json` from a CI artifact when possible:
+
+```bash
+python scripts/reduce_baselines.py benchmark-results.json benchmarks/baselines.json --slack 1.5 --source ubuntu-latest-ci
+```
+
+For a quick local snapshot only (may not match CI timings):
+
+```bash
+make seed-baselines-local
+# writes benchmarks/_raw.json only; does not overwrite benchmarks/baselines.json
+make seed-baselines-local FORCE=1   # also runs reduce_baselines into benchmarks/baselines.json
+```
+
+`make update-baselines` is a deprecated alias for `seed-baselines-local`. Do not commit baselines from macOS/Windows unless you accept cross-OS gate skew.
+
+## Makefile targets
+
+| Target | Purpose |
+|--------|---------|
+| `make check-benchmarks` | Run suite + regression gate locally |
+| `make seed-baselines-local` | Capture local timings to `benchmarks/_raw.json` (use `FORCE=1` to update `baselines.json`) |
+| `make clean-benchmark-artifacts` | Remove `benchmark-results.json` and `benchmarks/_raw.json` |
diff --git a/benchmarks/baselines.json b/benchmarks/baselines.json
index 131b638..1f3a5c0 100644
--- a/benchmarks/baselines.json
+++ b/benchmarks/baselines.json
@@ -1,15 +1,32 @@
 {
-  "_note": "Gated means from ubuntu-latest CI benchmark-results.json (PR #120, run 28123677675). Refresh after intentional perf changes: download benchmark-results.json from the CI artifacts job, then `python scripts/check_benchmark_regression.py benchmark-results.json benchmarks/baselines.json` (re-seed with reduce_baselines or edit means). Local capture: `pytest tests/benchmarks/ --benchmark-only --benchmark-json=benchmark-results.json -o addopts=` on ubuntu-latest.",
-  "updated": "2026-06-24T19:20:27Z",
+  "_note": "Gated means from ubuntu-latest CI benchmark-results.json. Values multiplied by 1.5x slack at generation time. Excluded from gate (recorded for reference): test_summary_cache_round_trip. Refresh after intentional speedups via reduce_baselines.py.",
+  "updated": "2026-06-25T23:36:11Z",
   "machine": "Linux",
   "groups": {
+    "parse": {
+      "test_list_workspace_projects_nocache[composers-10]": 0.016421750017237738,
+      "test_list_workspace_projects_nocache[composers-50]": 0.07185380692856874,
+      "test_list_workspace_projects_nocache[composers-200]": 0.2388664538571439
+    },
+    "export": {
+      "test_post_export_zip[composers-10]": 0.010621589857140498,
+      "test_post_export_zip[composers-50]": 0.03968703356250458
+    },
+    "search": {
+      "test_search_full_corpus_live_scan": 0.04461661563157736,
+      "test_search_full_corpus_indexed": 0.05512249660713918
+    },
     "summary-cache": {
-      "test_summary_cache_hit": 6.3e-05,
-      "test_summary_cache_miss": 6.3e-05,
-      "test_fingerprint_workspace_entries[10]": 0.001844,
-      "test_fingerprint_workspace_entries[50]": 0.007759,
-      "test_fingerprint_workspace_entries[200]": 0.022231,
-      "test_summary_cache_round_trip": 0.000351
+      "test_summary_cache_lookup[hit]": 7.249851343825762e-05,
+      "test_summary_cache_lookup[miss]": 7.193702095574013e-05,
+      "test_composer_map_cache_lookup[hit]": 7.151645086519804e-05,
+      "test_composer_map_cache_lookup[miss]": 7.112598943352091e-05,
+      "test_fingerprint_workspace_entries[10]": 0.0024127972424549185,
+      "test_fingerprint_workspace_entries[50]": 0.010196820941858245,
+      "test_fingerprint_workspace_entries[200]": 0.029070524094341035,
+      "test_summary_cache_round_trip": 0.0004703680658560554,
+      "test_tab_summary_cache_lookup[hit]": 7.844850562859133e-05,
+      "test_tab_summary_cache_lookup[miss]": 7.843399021512e-05
     }
   }
 }
diff --git a/scripts/check_benchmark_regression.py b/scripts/check_benchmark_regression.py
index d2fc79c..6655460 100644
--- a/scripts/check_benchmark_regression.py
+++ b/scripts/check_benchmark_regression.py
@@ -4,10 +4,24 @@
 
 import argparse
 import json
+import math
 import sys
 from pathlib import Path
 
 THRESHOLD = 1.20
+STALE_FLOOR = 0.50
+
+# Benchmarks recorded in baselines.json but excluded from the regression gate.
+# Use sparingly — only for benches whose timing is inherently noisy across CI runs
+# (e.g. file I/O operations that depend on OS page-cache state).
+EXCLUDED_FROM_GATE: frozenset[str] = frozenset(
+    {
+        # round_trip calls set_cached_projects (file write) + get_cached_projects (file read)
+        # each round. OS page-cache state on shared runners causes 3–5x variation between
+        # consecutive CI runs, making this ungatable with any reasonable slack.
+        "test_summary_cache_round_trip",
+    }
+)
 
 
 class BenchmarkDataError(ValueError):
@@ -97,19 +111,35 @@ def load_baseline_means(baselines_path: str | Path) -> dict[str, float]:
     return means
 
 
+def _validate_gate_ratios(threshold: float, stale_floor: float) -> None:
+    if not math.isfinite(threshold):
+        raise BenchmarkDataError("threshold must be finite")
+    if threshold <= 1:
+        raise BenchmarkDataError("threshold must be greater than 1")
+    if not math.isfinite(stale_floor):
+        raise BenchmarkDataError("stale_floor must be finite")
+    if not 0 < stale_floor < 1:
+        raise BenchmarkDataError("stale_floor must be between 0 and 1 (exclusive)")
+
+
 def check_regression(
     results_path: str | Path,
     baselines_path: str | Path,
     *,
     threshold: float = THRESHOLD,
+    stale_floor: float = STALE_FLOOR,
 ) -> int:
-    """Return 0 when within threshold; 1 when any gated benchmark regresses."""
+    """Return 0 when within threshold; 1 when any gated benchmark regresses or is stale."""
+    _validate_gate_ratios(threshold, stale_floor)
     flat = load_results(results_path)
     baseline_means = load_baseline_means(baselines_path)
 
     failures: list[str] = []
+    stale: list[str] = []
     missing: list[str] = []
     for name, base in baseline_means.items():
+        if name in EXCLUDED_FROM_GATE:
+            continue
         cur = flat.get(name)
         if cur is None:
             print(f"FAIL: no current result for gated baseline {name!r}")
@@ -119,20 +149,32 @@ def check_regression(
             print(f"WARN: baseline for {name!r} is zero; skipping ratio check")
             continue
         ratio = cur / base
-        tag = "FAIL" if ratio > threshold else "ok"
-        print(f"[{tag}] {name}: {cur:.6f}s vs {base:.6f}s ({ratio:.2f}x)")
         if ratio > threshold:
+            tag = "FAIL"
             failures.append(name)
+        elif ratio < stale_floor:
+            tag = "STALE"
+            stale.append(name)
+        else:
+            tag = "ok"
+        print(f"[{tag}] {name}: {cur:.6f}s vs {base:.6f}s ({ratio:.2f}x)")
 
     for name in flat:
+        if name in EXCLUDED_FROM_GATE:
+            continue
         if name not in baseline_means:
             print(f"WARN: {name!r} has no baseline yet; not gated")
 
     if failures:
         print(f"\nREGRESSION: {len(failures)} benchmark(s) exceeded {threshold:.0%}")
+    if stale:
+        print(
+            f"\nSTALE: {len(stale)} benchmark(s) are faster than {stale_floor:.0%} of baseline "
+            "(refresh baselines after intentional speedups)"
+        )
     if missing:
         print(f"\nMISSING: {len(missing)} gated benchmark(s) absent from current results")
-    if failures or missing:
+    if failures or stale or missing:
         return 1
     return 0
 
@@ -147,12 +189,19 @@ def main(argv: list[str] | None = None) -> int:
         default=THRESHOLD,
         help="fail when current mean exceeds baseline by more than this ratio (default: 1.20)",
     )
+    parser.add_argument(
+        "--stale-floor",
+        type=float,
+        default=STALE_FLOOR,
+        help="fail when current mean is below this fraction of baseline (default: 0.50)",
+    )
     args = parser.parse_args(argv)
     try:
         return check_regression(
             args.results_path,
             args.baselines_path,
             threshold=args.threshold,
+            stale_floor=args.stale_floor,
         )
     except BenchmarkDataError as exc:
         print(f"ERROR: {exc}", file=sys.stderr)
diff --git a/scripts/reduce_baselines.py b/scripts/reduce_baselines.py
new file mode 100644
index 0000000..78bfbd1
--- /dev/null
+++ b/scripts/reduce_baselines.py
@@ -0,0 +1,142 @@
+"""Reduce pytest-benchmark JSON into benchmarks/baselines.json."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import math
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+
+_REPO_ROOT = Path(__file__).resolve().parent.parent
+if str(_REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(_REPO_ROOT))
+
+from scripts.check_benchmark_regression import (
+    EXCLUDED_FROM_GATE,
+    BenchmarkDataError,
+    normalize_benchmark_name,
+)
+
+GATED_GROUPS = ("parse", "export", "search", "summary-cache")
+
+
+def _positive_float(value: str) -> float:
+    parsed = float(value)
+    if not math.isfinite(parsed):
+        raise argparse.ArgumentTypeError("slack must be a finite number")
+    if parsed <= 0:
+        raise argparse.ArgumentTypeError("slack must be greater than zero")
+    return parsed
+
+
+def reduce_baselines(
+    raw_path: str | Path,
+    out_path: str | Path,
+    *,
+    slack: float = 1.0,
+    source: str = "local",
+) -> dict[str, object]:
+    path = Path(raw_path)
+    try:
+        raw = json.loads(path.read_text(encoding="utf-8"))
+    except json.JSONDecodeError as exc:
+        raise BenchmarkDataError(f"invalid JSON in {path}: {exc}") from exc
+    except OSError as exc:
+        raise BenchmarkDataError(f"cannot read {path}: {exc}") from exc
+
+    try:
+        entries = raw["benchmarks"]
+    except (KeyError, TypeError) as exc:
+        raise BenchmarkDataError(f"{path} missing top-level 'benchmarks' array") from exc
+    if not isinstance(entries, list):
+        raise BenchmarkDataError(f"{path} 'benchmarks' must be an array")
+
+    groups: dict[str, dict[str, float]] = {group: {} for group in GATED_GROUPS}
+    for index, entry in enumerate(entries):
+        if not isinstance(entry, dict):
+            raise BenchmarkDataError(f"{path} benchmarks[{index}] must be an object")
+        try:
+            raw_name = entry["name"]
+            mean = float(entry["stats"]["mean"])
+        except (KeyError, TypeError, ValueError) as exc:
+            raise BenchmarkDataError(
+                f"{path} benchmarks[{index}] missing 'name' or 'stats.mean'"
+            ) from exc
+        bench_name = normalize_benchmark_name(str(raw_name))
+        group = entry.get("group")
+        if group is None:
+            raise BenchmarkDataError(
+                f"{path} benchmarks[{index}] ({bench_name!r}) missing required 'group'"
+            )
+        if group not in GATED_GROUPS:
+            raise BenchmarkDataError(
+                f"{path} benchmarks[{index}] ({bench_name!r}) has unknown group {group!r}; "
+                f"expected one of {GATED_GROUPS}"
+            )
+        if bench_name in groups[group]:
+            raise BenchmarkDataError(
+                f"{path} benchmarks[{index}] ({raw_name!r}) duplicates normalized "
+                f"benchmark {group!r}/{bench_name!r}"
+            )
+        groups[group][bench_name] = mean * slack
+
+    excluded = ", ".join(sorted(EXCLUDED_FROM_GATE))
+    excluded_note = (
+        f" Excluded from gate (recorded for reference): {excluded}."
+        if excluded
+        else ""
+    )
+    slack_note = f" Values multiplied by {slack}x slack at generation time." if slack != 1.0 else ""
+    machine_info = raw.get("machine_info")
+    machine = machine_info.get("system") if isinstance(machine_info, dict) else None
+    source_labels = {
+        "ubuntu-latest-ci": "ubuntu-latest CI benchmark-results.json",
+        "local": "local benchmark-results.json",
+    }
+    source_label = source_labels.get(source, source)
+    output: dict[str, object] = {
+        "_note": (
+            f"Gated means from {source_label}."
+            f"{slack_note}{excluded_note} "
+            "Refresh after intentional speedups via reduce_baselines.py."
+        ),
+        "updated": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
+        "machine": machine,
+        "groups": groups,
+    }
+    out = Path(out_path)
+    try:
+        out.write_text(json.dumps(output, indent=2) + "\n", encoding="utf-8")
+    except OSError as exc:
+        raise BenchmarkDataError(f"cannot write {out}: {exc}") from exc
+    return output
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("raw_path", help="pytest-benchmark --benchmark-json output")
+    parser.add_argument("out_path", help="destination baselines.json path")
+    parser.add_argument(
+        "--slack",
+        type=_positive_float,
+        default=1.0,
+        help="multiply means by this factor (must be > 0)",
+    )
+    parser.add_argument(
+        "--source",
+        default="local",
+        help="provenance label for _note (e.g. ubuntu-latest-ci, local)",
+    )
+    args = parser.parse_args(argv)
+    try:
+        reduce_baselines(args.raw_path, args.out_path, slack=args.slack, source=args.source)
+    except BenchmarkDataError as exc:
+        print(f"ERROR: {exc}", file=sys.stderr)
+        return 2
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/benchmarks/conftest.py b/tests/benchmarks/conftest.py
index e3e17e2..cfc133f 100644
--- a/tests/benchmarks/conftest.py
+++ b/tests/benchmarks/conftest.py
@@ -1,14 +1,20 @@
-"""Synthetic workspace trees for summary-cache performance benchmarks."""
+"""Shared synthetic fixtures for pytest-benchmark hot paths."""
 
 from __future__ import annotations
 
+import contextlib
+import json
+import sqlite3
 from pathlib import Path
 from typing import Any
 
 import pytest
+from flask.testing import FlaskClient
 
+from app import create_app
 from services import summary_cache
 from services.summary_cache import fingerprint_workspace_storage
+from tests.benchmarks.constants import BENCH_SEARCH_TERM
 
 
 def make_workspace_entries(workspace_root: Path, count: int) -> list[dict[str, Any]]:
@@ -30,13 +36,123 @@ def make_workspace_entries(workspace_root: Path, count: int) -> list[dict[str, A
     return entries
 
 
+def _composer_ids(count: int) -> list[tuple[str, str, str]]:
+    return [(f"ws_{i:04d}", f"cmp_{i:04d}", f"bub_{i:04d}") for i in range(count)]
+
+
+def build_bench_storage(root: Path, composer_count: int) -> dict[str, str]:
+    """Create workspaceStorage, globalStorage, and cli_chats trees for *composer_count* composers."""
+    ws_root = root / "workspaceStorage"
+    global_root = root / "globalStorage"
+    cli_root = root / "cli_chats"
+    projects_root = root / "projects"
+    ws_root.mkdir(parents=True)
+    global_root.mkdir(parents=True)
+    cli_root.mkdir(parents=True)
+    projects_root.mkdir(parents=True)
+
+    global_db_path = global_root / "state.vscdb"
+    with contextlib.closing(sqlite3.connect(global_db_path)) as conn:
+        conn.execute("CREATE TABLE cursorDiskKV ([key] TEXT PRIMARY KEY, value TEXT)")
+        base_ts = 1_715_000_000_000
+        for i, (workspace_id, composer_id, bubble_id) in enumerate(_composer_ids(composer_count)):
+            project_folder = projects_root / f"proj_{i:04d}"
+            project_folder.mkdir(parents=True, exist_ok=True)
+
+            ws_dir = ws_root / workspace_id
+            ws_dir.mkdir(parents=True, exist_ok=True)
+            (ws_dir / "workspace.json").write_text(
+                json.dumps({"folder": str(project_folder)}),
+                encoding="utf-8",
+            )
+            with contextlib.closing(sqlite3.connect(ws_dir / "state.vscdb")) as ws_conn:
+                ws_conn.execute("CREATE TABLE ItemTable ([key] TEXT PRIMARY KEY, value TEXT)")
+                ws_conn.execute(
+                    "INSERT INTO ItemTable ([key], value) VALUES (?, ?)",
+                    (
+                        "composer.composerData",
+                        json.dumps({"allComposers": [{"composerId": composer_id}]}),
+                    ),
+                )
+                ws_conn.commit()
+
+            created_at = base_ts + i * 1_000
+            conn.execute(
+                "INSERT INTO cursorDiskKV ([key], value) VALUES (?, ?)",
+                (
+                    f"composerData:{composer_id}",
+                    json.dumps(
+                        {
+                            "name": f"Bench chat {i:04d}",
+                            "createdAt": created_at,
+                            "lastUpdatedAt": created_at + 500,
+                            "fullConversationHeadersOnly": [
+                                {"bubbleId": bubble_id, "type": 1},
+                            ],
+                            "modelConfig": {"modelName": "gpt-4o"},
+                        }
+                    ),
+                ),
+            )
+            conn.execute(
+                "INSERT INTO cursorDiskKV ([key], value) VALUES (?, ?)",
+                (
+                    f"bubbleId:{composer_id}:{bubble_id}",
+                    json.dumps(
+                        {
+                            "text": f"find {BENCH_SEARCH_TERM} in composer {i:04d}",
+                            "type": "user",
+                            "createdAt": created_at + 400,
+                        }
+                    ),
+                ),
+            )
+        conn.commit()
+
+    return {
+        "workspace_path": str(ws_root),
+        "cli_chats_path": str(cli_root),
+        "storage_root": str(root),
+    }
+
+
+def _make_bench_flask_client(
+    storage: dict[str, str],
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+    *,
+    state_subdir: str = ".cursor-chat-browser",
+    live_scan_search: bool = False,
+) -> FlaskClient:
+    """Flask test client with env + export state patched for synthetic storage.
+
+    When *live_scan_search* is True, set ``CURSOR_CHAT_BROWSER_NO_SEARCH_INDEX=1`` so
+    ``/api/search`` measures the live-scan fallback. Otherwise the FTS index path
+    from #113 may be used when an index is built (see indexed search fixtures).
+    """
+    monkeypatch.setenv("WORKSPACE_PATH", storage["workspace_path"])
+    monkeypatch.setenv("CLI_CHATS_PATH", storage["cli_chats_path"])
+    if live_scan_search:
+        monkeypatch.setenv("CURSOR_CHAT_BROWSER_NO_SEARCH_INDEX", "1")
+    else:
+        monkeypatch.delenv("CURSOR_CHAT_BROWSER_NO_SEARCH_INDEX", raising=False)
+        monkeypatch.delenv("CURSOR_CHAT_BROWSER_NOCACHE", raising=False)
+    state_dir = tmp_path / state_subdir
+    state_dir.mkdir()
+    monkeypatch.setattr("api.export_api._get_state_dir", lambda: str(state_dir))
+    app = create_app()
+    app.config["TESTING"] = True
+    app.config["EXCLUSION_RULES"] = []
+    return app.test_client()
+
+
 @pytest.fixture
 def summary_cache_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
     """Redirect summary-cache files to an isolated temp directory.
 
-    Patches ``CACHE_DIR`` (also used by tab-summary paths via ``_tab_summaries_path``)
-    plus the projects/composer-map file constants used by current benchmarks.
-    Tab-summary cache benchmarks are deferred to issue #110 (unified benchmark suite).
+    Tab-summary files use ``CACHE_DIR`` + hashed filenames only (see
+    ``summary_cache._tab_summaries_path``); they do not use
+    ``PROJECTS_CACHE_FILE`` or ``COMPOSER_MAP_CACHE_FILE``.
     """
     cache_dir = tmp_path / "cache"
     cache_dir.mkdir()
@@ -87,3 +203,78 @@ def workspace_fingerprint(synthetic_workspace: tuple[str, list[dict[str, Any]]])
 def stale_fingerprint(workspace_fingerprint: dict[str, Any]) -> dict[str, Any]:
     """Return a fingerprint guaranteed to differ from the stored one."""
     return {**workspace_fingerprint, "rules_digest": "deadbeefdeadbeef"}
+
+
+@pytest.fixture
+def bench_storage(tmp_path: Path, request: pytest.FixtureRequest) -> dict[str, str]:
+    """On-disk Cursor layout with N composers (indirect ``composer_count`` param)."""
+    count = getattr(request, "param", 10)
+    return build_bench_storage(tmp_path / "storage", count)
+
+
+@pytest.fixture
+def bench_env(
+    bench_storage: dict[str, str],
+    monkeypatch: pytest.MonkeyPatch,
+) -> dict[str, str]:
+    """Set WORKSPACE_PATH / CLI_CHATS_PATH for the synthetic storage tree."""
+    monkeypatch.setenv("WORKSPACE_PATH", bench_storage["workspace_path"])
+    monkeypatch.setenv("CLI_CHATS_PATH", bench_storage["cli_chats_path"])
+    monkeypatch.setenv("CURSOR_CHAT_BROWSER_NO_SEARCH_INDEX", "1")
+    return bench_storage
+
+
+@pytest.fixture
+def bench_client(bench_env: dict[str, str], tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> FlaskClient:
+    """Flask test client bound to synthetic bench storage."""
+    return _make_bench_flask_client(bench_env, tmp_path, monkeypatch, live_scan_search=True)
+
+
+@pytest.fixture
+def bench_client_search_corpus(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> FlaskClient:
+    """Flask client over a fixed 50-composer corpus (live-scan search path)."""
+    storage = build_bench_storage(tmp_path / "search_storage", 50)
+    return _make_bench_flask_client(
+        storage,
+        tmp_path,
+        monkeypatch,
+        state_subdir=".cursor-chat-browser-search",
+        live_scan_search=True,
+    )
+
+
+@pytest.fixture
+def bench_client_search_corpus_indexed(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> FlaskClient:
+    """Flask client with FTS index built for the 50-composer search corpus."""
+    from services.search_index import build_search_index
+
+    monkeypatch.delenv("CURSOR_CHAT_BROWSER_NO_SEARCH_INDEX", raising=False)
+    monkeypatch.delenv("CURSOR_CHAT_BROWSER_NOCACHE", raising=False)
+
+    storage = build_bench_storage(tmp_path / "search_indexed_storage", 50)
+    cache_dir = tmp_path / "search_index_cache"
+    cache_dir.mkdir()
+    monkeypatch.setattr("services.search_index.CACHE_DIR", cache_dir)
+    monkeypatch.setattr(
+        "services.search_index.SEARCH_INDEX_POINTER_FILE",
+        cache_dir / "search_index.active",
+    )
+    monkeypatch.setattr(
+        "services.search_index.SEARCH_INDEX_FILE",
+        cache_dir / "search_index.sqlite",
+    )
+    built = build_search_index(storage["workspace_path"], [], force=True)
+    assert built is True
+    return _make_bench_flask_client(
+        storage,
+        tmp_path,
+        monkeypatch,
+        state_subdir=".cursor-chat-browser-search-indexed",
+        live_scan_search=False,
+    )
diff --git a/tests/benchmarks/constants.py b/tests/benchmarks/constants.py
new file mode 100644
index 0000000..ab682d3
--- /dev/null
+++ b/tests/benchmarks/constants.py
@@ -0,0 +1,3 @@
+"""Shared constants for benchmark corpora (importable outside conftest)."""
+
+BENCH_SEARCH_TERM = "bench-search-token"
diff --git a/tests/benchmarks/test_export_bench.py b/tests/benchmarks/test_export_bench.py
new file mode 100644
index 0000000..c5e3051
--- /dev/null
+++ b/tests/benchmarks/test_export_bench.py
@@ -0,0 +1,30 @@
+"""Benchmark POST /api/export (ZIP) over synthetic workspace + global DB."""
+
+from __future__ import annotations
+
+import pytest
+from flask.testing import FlaskClient
+
+
+@pytest.mark.benchmark(group="export")
+@pytest.mark.parametrize(
+    "bench_storage",
+    [10, 50],
+    indirect=True,
+    ids=["composers-10", "composers-50"],
+)
+def test_post_export_zip(
+    benchmark,
+    bench_client: FlaskClient,
+) -> None:
+    def _run() -> object:
+        return bench_client.post(
+            "/api/export",
+            json={},
+            content_type="application/json",
+        )
+
+    response = benchmark(_run)
+    assert response.status_code == 200
+    assert response.content_type.startswith("application/zip")
+    assert int(response.headers.get("X-Export-Count", "0")) >= 1
diff --git a/tests/benchmarks/test_parse_bench.py b/tests/benchmarks/test_parse_bench.py
new file mode 100644
index 0000000..9f23872
--- /dev/null
+++ b/tests/benchmarks/test_parse_bench.py
@@ -0,0 +1,28 @@
+"""Benchmark list_workspace_projects (nocache) over synthetic composer corpora."""
+
+from __future__ import annotations
+
+import pytest
+
+from services.workspace_listing import list_workspace_projects
+
+
+@pytest.mark.benchmark(group="parse")
+@pytest.mark.parametrize(
+    "bench_storage",
+    [10, 50, 200],
+    indirect=True,
+    ids=["composers-10", "composers-50", "composers-200"],
+)
+def test_list_workspace_projects_nocache(
+    benchmark,
+    bench_env: dict[str, str],
+) -> None:
+    workspace_path = bench_env["workspace_path"]
+
+    def _run() -> object:
+        return list_workspace_projects(workspace_path, [], nocache=True)
+
+    projects, warnings = benchmark(_run)
+    assert isinstance(projects, list) and len(projects) > 0
+    assert warnings == []
diff --git a/tests/benchmarks/test_search_bench.py b/tests/benchmarks/test_search_bench.py
new file mode 100644
index 0000000..33eee5b
--- /dev/null
+++ b/tests/benchmarks/test_search_bench.py
@@ -0,0 +1,48 @@
+"""Benchmark GET /api/search over a 50-composer synthetic corpus."""
+
+from __future__ import annotations
+
+import pytest
+from flask.testing import FlaskClient
+
+from tests.benchmarks.constants import BENCH_SEARCH_TERM
+
+
+def _search_url() -> str:
+    return f"/api/search?q={BENCH_SEARCH_TERM}&all_history=1"
+
+
+def _assert_search_response(response: object) -> None:
+    assert response.status_code == 200  # type: ignore[attr-defined]
+    body = response.get_json()  # type: ignore[attr-defined]
+    assert isinstance(body, dict)
+    results = body.get("results")
+    assert isinstance(results, list) and len(results) > 0
+
+
+@pytest.mark.benchmark(group="search")
+def test_search_full_corpus_live_scan(
+    benchmark,
+    bench_client_search_corpus: FlaskClient,
+) -> None:
+    """Live-scan fallback only (``CURSOR_CHAT_BROWSER_NO_SEARCH_INDEX=1``)."""
+
+    def _run() -> object:
+        return bench_client_search_corpus.get(_search_url())
+
+    response = benchmark(_run)
+    _assert_search_response(response)
+
+
+@pytest.mark.benchmark(group="search")
+def test_search_full_corpus_indexed(
+    benchmark,
+    bench_client_search_corpus_indexed: FlaskClient,
+) -> None:
+    """FTS index path (#113) with pre-built ``search_index.sqlite``."""
+
+    def _run() -> object:
+        return bench_client_search_corpus_indexed.get(_search_url())
+
+    response = benchmark(_run)
+    _assert_search_response(response)
diff --git a/tests/benchmarks/test_summary_cache_bench.py b/tests/benchmarks/test_summary_cache_bench.py
index dad4a15..16552d2 100644
--- a/tests/benchmarks/test_summary_cache_bench.py
+++ b/tests/benchmarks/test_summary_cache_bench.py
@@ -1,44 +1,64 @@
-"""pytest-benchmark coverage for services/summary_cache.py hot paths.
-
-``test_summary_cache_hit`` and ``test_summary_cache_miss`` both time ``get_cached_projects``
-only. Miss means fingerprint mismatch (cache not used), not a full cache rebuild.
-"""
+"""pytest-benchmark coverage for services/summary_cache.py hot paths."""
 
 from __future__ import annotations
 
 from pathlib import Path
-from typing import Any
+from typing import Any, Literal
 
 import pytest
 
 from services.summary_cache import (
     fingerprint_workspace_storage,
+    get_cached_composer_id_to_ws,
     get_cached_projects,
+    get_cached_tab_summaries,
+    set_cached_composer_id_to_ws,
     set_cached_projects,
+    set_cached_tab_summaries,
 )
 
 
 @pytest.mark.benchmark(group="summary-cache")
-def test_summary_cache_hit(
+@pytest.mark.parametrize("mode", ["hit", "miss"], ids=["hit", "miss"])
+def test_summary_cache_lookup(
     benchmark,
+    mode: Literal["hit", "miss"],
     summary_cache_dir: Path,
     workspace_fingerprint: dict[str, Any],
+    stale_fingerprint: dict[str, Any],
     sample_projects: list[dict[str, Any]],
 ) -> None:
+    """Time ``get_cached_projects`` only; miss = fingerprint mismatch, not rebuild."""
     set_cached_projects(workspace_fingerprint, sample_projects, [])
-    benchmark(get_cached_projects, workspace_fingerprint)
+    lookup_fp = workspace_fingerprint if mode == "hit" else stale_fingerprint
+    result = benchmark(get_cached_projects, lookup_fp)
+    if mode == "hit":
+        assert result is not None
+        projects, warnings = result
+        assert projects == sample_projects
+        assert warnings == []
+    else:
+        assert result is None
 
 
 @pytest.mark.benchmark(group="summary-cache")
-def test_summary_cache_miss(
+@pytest.mark.parametrize("mode", ["hit", "miss"], ids=["hit", "miss"])
+def test_composer_map_cache_lookup(
     benchmark,
+    mode: Literal["hit", "miss"],
     summary_cache_dir: Path,
     workspace_fingerprint: dict[str, Any],
     stale_fingerprint: dict[str, Any],
-    sample_projects: list[dict[str, Any]],
 ) -> None:
-    set_cached_projects(workspace_fingerprint, sample_projects, [])
-    benchmark(get_cached_projects, stale_fingerprint)
+    """Time ``get_cached_composer_id_to_ws`` hit/miss (fingerprint mismatch on miss)."""
+    mapping = {"cmp_0000": "ws_0000"}
+    set_cached_composer_id_to_ws(workspace_fingerprint, mapping)
+    lookup_fp = workspace_fingerprint if mode == "hit" else stale_fingerprint
+    result = benchmark(get_cached_composer_id_to_ws, lookup_fp)
+    if mode == "hit":
+        assert result == mapping
+    else:
+        assert result is None
 
 
 @pytest.mark.benchmark(group="summary-cache")
@@ -76,3 +96,31 @@ def _run() -> None:
         get_cached_projects(fp)
 
     benchmark(_run)
+    cached = get_cached_projects(fp)
+    assert cached is not None
+    cached_projects, cached_warnings = cached
+    assert cached_projects == projects
+    assert cached_warnings == []
+
+
+@pytest.mark.benchmark(group="summary-cache")
+@pytest.mark.parametrize("mode", ["hit", "miss"], ids=["hit", "miss"])
+def test_tab_summary_cache_lookup(
+    benchmark,
+    mode: Literal["hit", "miss"],
+    summary_cache_dir: Path,
+    workspace_fingerprint: dict[str, Any],
+    stale_fingerprint: dict[str, Any],
+) -> None:
+    workspace_id = "ws_0000"
+    payload = {"tabs": [{"id": "cmp_0000", "title": "Bench"}]}
+    set_cached_tab_summaries(workspace_fingerprint, workspace_id, payload, 200)
+    lookup_fp = workspace_fingerprint if mode == "hit" else stale_fingerprint
+    result = benchmark(get_cached_tab_summaries, lookup_fp, workspace_id)
+    if mode == "hit":
+        assert result is not None
+        cached_payload, status = result
+        assert status == 200
+        assert cached_payload == payload
+    else:
+        assert result is None
diff --git a/tests/test_check_benchmark_regression.py b/tests/test_check_benchmark_regression.py
index 8de10a8..873d68a 100644
--- a/tests/test_check_benchmark_regression.py
+++ b/tests/test_check_benchmark_regression.py
@@ -14,7 +14,7 @@
     normalize_benchmark_name,
 )
 
-GATED_BENCH = "test_summary_cache_hit"
+GATED_BENCH = "test_summary_cache_lookup[hit]"
 
 
 def _write_results(path, benchmarks: list[dict]) -> None:
@@ -32,9 +32,9 @@ def _write_baselines(path, groups: dict[str, dict[str, float]]) -> None:
 
 
 def test_normalize_benchmark_name_strips_module_prefix() -> None:
-    full = "tests/benchmarks/test_summary_cache_bench.py::test_summary_cache_hit"
-    assert normalize_benchmark_name(full) == "test_summary_cache_hit"
-    assert normalize_benchmark_name("test_summary_cache_hit") == "test_summary_cache_hit"
+    full = "tests/benchmarks/test_summary_cache_bench.py::test_summary_cache_lookup[hit]"
+    assert normalize_benchmark_name(full) == "test_summary_cache_lookup[hit]"
+    assert normalize_benchmark_name("test_summary_cache_lookup[hit]") == "test_summary_cache_lookup[hit]"
 
 
 def test_normalize_benchmark_name_preserves_colons_in_param_values() -> None:
@@ -50,13 +50,13 @@ def test_load_results_normalizes_full_node_id(tmp_path) -> None:
         path,
         [
             {
-                "name": "tests/benchmarks/test_summary_cache_bench.py::test_summary_cache_hit",
+                "name": "tests/benchmarks/test_summary_cache_bench.py::test_summary_cache_lookup[hit]",
                 "stats": {"mean": 0.0001},
             }
         ],
     )
 
-    assert load_results(path)["test_summary_cache_hit"] == pytest.approx(0.0001)
+    assert load_results(path)["test_summary_cache_lookup[hit]"] == pytest.approx(0.0001)
 
 
 def test_missing_baseline_warns_without_failing(
@@ -213,3 +213,76 @@ def test_load_baseline_means_rejects_non_dict_group(tmp_path) -> None:
 
     with pytest.raises(BenchmarkDataError, match="must be an object"):
         load_baseline_means(baselines)
+
+
+def test_stale_baseline_fails(tmp_path, capsys: pytest.CaptureFixture[str]) -> None:
+    results = tmp_path / "results.json"
+    baselines = tmp_path / "baselines.json"
+    _write_results(
+        results,
+        [{"name": GATED_BENCH, "stats": {"mean": 0.00005}}],
+    )
+    _write_baselines(
+        baselines,
+        {"summary-cache": {GATED_BENCH: 0.0002}},
+    )
+
+    assert check_regression(results, baselines) == 1
+    out = capsys.readouterr().out
+    assert "STALE" in out
+
+
+def test_main_rejects_invalid_threshold(tmp_path, capsys: pytest.CaptureFixture[str]) -> None:
+    from scripts.check_benchmark_regression import main
+
+    results = tmp_path / "results.json"
+    baselines = tmp_path / "baselines.json"
+    _write_results(results, [{"name": GATED_BENCH, "stats": {"mean": 0.0001}}])
+    _write_baselines(baselines, {"summary-cache": {GATED_BENCH: 0.0002}})
+
+    assert main([str(results), str(baselines), "--threshold", "1.0"]) == 2
+    assert "threshold must be greater than 1" in capsys.readouterr().err
+
+
+def test_main_rejects_invalid_stale_floor(tmp_path, capsys: pytest.CaptureFixture[str]) -> None:
+    from scripts.check_benchmark_regression import main
+
+    results = tmp_path / "results.json"
+    baselines = tmp_path / "baselines.json"
+    _write_results(results, [{"name": GATED_BENCH, "stats": {"mean": 0.0001}}])
+    _write_baselines(baselines, {"summary-cache": {GATED_BENCH: 0.0002}})
+
+    assert main([str(results), str(baselines), "--stale-floor", "1.5"]) == 2
+    assert "stale_floor must be between 0 and 1" in capsys.readouterr().err
+
+
+def test_check_regression_rejects_invalid_threshold(tmp_path) -> None:
+    results = tmp_path / "results.json"
+    baselines = tmp_path / "baselines.json"
+    _write_results(results, [{"name": GATED_BENCH, "stats": {"mean": 0.0001}}])
+    _write_baselines(baselines, {"summary-cache": {GATED_BENCH: 0.0002}})
+
+    with pytest.raises(BenchmarkDataError, match="threshold must be greater than 1"):
+        check_regression(results, baselines, threshold=1.0)
+
+
+def test_check_regression_rejects_non_finite_threshold(tmp_path) -> None:
+    results = tmp_path / "results.json"
+    baselines = tmp_path / "baselines.json"
+    _write_results(results, [{"name": GATED_BENCH, "stats": {"mean": 0.0001}}])
+    _write_baselines(baselines, {"summary-cache": {GATED_BENCH: 0.0002}})
+
+    with pytest.raises(BenchmarkDataError, match="threshold must be finite"):
+        check_regression(results, baselines, threshold=float("nan"))
+
+
+def test_main_rejects_non_finite_threshold(tmp_path, capsys: pytest.CaptureFixture[str]) -> None:
+    from scripts.check_benchmark_regression import main
+
+    results = tmp_path / "results.json"
+    baselines = tmp_path / "baselines.json"
+    _write_results(results, [{"name": GATED_BENCH, "stats": {"mean": 0.0001}}])
+    _write_baselines(baselines, {"summary-cache": {GATED_BENCH: 0.0002}})
+
+    assert main([str(results), str(baselines), "--threshold", "inf"]) == 2
+    assert "threshold must be finite" in capsys.readouterr().err
diff --git a/tests/test_reduce_baselines.py b/tests/test_reduce_baselines.py
new file mode 100644
index 0000000..9cc24e4
--- /dev/null
+++ b/tests/test_reduce_baselines.py
@@ -0,0 +1,156 @@
+"""Tests for scripts/reduce_baselines.py."""
+
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from scripts.reduce_baselines import reduce_baselines
+from scripts.check_benchmark_regression import BenchmarkDataError
+
+
+def _write_raw(path, benchmarks: list[dict], *, machine: str = "Linux") -> None:
+    path.write_text(
+        json.dumps(
+            {
+                "machine_info": {"system": machine},
+                "benchmarks": benchmarks,
+            },
+            indent=2,
+        ),
+        encoding="utf-8",
+    )
+
+
+def test_reduce_baselines_groups_and_slack(tmp_path) -> None:
+    raw = tmp_path / "raw.json"
+    out = tmp_path / "baselines.json"
+    _write_raw(
+        raw,
+        [
+            {
+                "name": "test_list_workspace_projects_nocache[composers-50]",
+                "group": "parse",
+                "stats": {"mean": 0.05},
+            },
+            {
+                "name": "test_post_export_zip[composers-10]",
+                "group": "export",
+                "stats": {"mean": 0.01},
+            },
+            {
+                "name": "test_search_full_corpus",
+                "group": "search",
+                "stats": {"mean": 0.04},
+            },
+            {
+                "name": "test_summary_cache_lookup[hit]",
+                "group": "summary-cache",
+                "stats": {"mean": 0.0001},
+            },
+        ],
+    )
+
+    output = reduce_baselines(raw, out, slack=1.5, source="ubuntu-latest-ci")
+    data = json.loads(out.read_text(encoding="utf-8"))
+    groups = data["groups"]
+
+    assert groups["parse"]["test_list_workspace_projects_nocache[composers-50]"] == pytest.approx(0.075)
+    assert groups["export"]["test_post_export_zip[composers-10]"] == pytest.approx(0.015)
+    assert groups["search"]["test_search_full_corpus"] == pytest.approx(0.06)
+    assert groups["summary-cache"]["test_summary_cache_lookup[hit]"] == pytest.approx(0.00015)
+    assert data["machine"] == "Linux"
+    assert "ubuntu-latest CI benchmark-results.json" in data["_note"]
+    assert "1.5x slack" in data["_note"]
+    assert output["groups"] == groups
+
+
+def test_reduce_baselines_local_source_note(tmp_path) -> None:
+    raw = tmp_path / "raw.json"
+    out = tmp_path / "baselines.json"
+    _write_raw(
+        raw,
+        [
+            {
+                "name": "test_summary_cache_lookup[hit]",
+                "group": "summary-cache",
+                "stats": {"mean": 0.0001},
+            },
+        ],
+        machine="Windows",
+    )
+
+    reduce_baselines(raw, out, source="local")
+    data = json.loads(out.read_text(encoding="utf-8"))
+    assert "local benchmark-results.json" in data["_note"]
+    assert data["machine"] == "Windows"
+
+
+def test_reduce_baselines_rejects_unknown_group(tmp_path) -> None:
+    raw = tmp_path / "raw.json"
+    out = tmp_path / "baselines.json"
+    _write_raw(
+        raw,
+        [
+            {
+                "name": "test_cache_only",
+                "group": "cache",
+                "stats": {"mean": 0.001},
+            },
+        ],
+    )
+
+    with pytest.raises(BenchmarkDataError, match="unknown group 'cache'"):
+        reduce_baselines(raw, out)
+
+
+def test_reduce_baselines_rejects_missing_group(tmp_path) -> None:
+    raw = tmp_path / "raw.json"
+    out = tmp_path / "baselines.json"
+    _write_raw(
+        raw,
+        [
+            {
+                "name": "test_no_group",
+                "stats": {"mean": 0.001},
+            },
+        ],
+    )
+
+    with pytest.raises(BenchmarkDataError, match="missing required 'group'"):
+        reduce_baselines(raw, out)
+
+
+def test_reduce_baselines_rejects_duplicate_normalized_name(tmp_path) -> None:
+    raw = tmp_path / "raw.json"
+    out = tmp_path / "baselines.json"
+    _write_raw(
+        raw,
+        [
+            {
+                "name": "test_summary_cache_lookup[hit]",
+                "group": "summary-cache",
+                "stats": {"mean": 0.0001},
+            },
+            {
+                "name": "tests/benchmarks/test_summary_cache_bench.py::test_summary_cache_lookup[hit]",
+                "group": "summary-cache",
+                "stats": {"mean": 0.0002},
+            },
+        ],
+    )
+
+    with pytest.raises(BenchmarkDataError, match="duplicates normalized"):
+        reduce_baselines(raw, out)
+
+
+def test_positive_float_rejects_non_finite() -> None:
+    import argparse
+
+    from scripts.reduce_baselines import _positive_float
+
+    with pytest.raises(argparse.ArgumentTypeError, match="finite"):
+        _positive_float("nan")
+    with pytest.raises(argparse.ArgumentTypeError, match="finite"):
+        _positive_float("inf")