Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 42 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ jobs:
run: |
python -m pip install --upgrade pip
python -m pip install -r requirements-lock.txt
python -m pip install 'pytest>=8,<9' 'hypothesis>=6.100,<7'
python -m pip install 'pytest>=8,<9' 'pytest-benchmark==4.0.0' 'hypothesis>=6.100,<7'

- name: Run unittest suite
run: python -m unittest discover tests -v
Expand All @@ -114,7 +114,8 @@ jobs:
# Pytest fixtures (tests/conftest.py) build a temp workspaceStorage and
# exercise Flask routes via app.test_client(). Only listed files — not
# `pytest tests/` — to avoid re-collecting unittest.TestCase classes above.
run: python -m pytest tests/test_api_search.py tests/test_api_workspaces.py tests/test_api_export.py tests/test_pdf_export.py tests/test_search_helpers.py -v --tb=short
# -o addopts= avoids inheriting benchmark-only options from pyproject.toml.
run: python -m pytest tests/test_api_search.py tests/test_api_workspaces.py tests/test_api_export.py tests/test_pdf_export.py tests/test_search_helpers.py tests/test_check_benchmark_regression.py -v --tb=short -o addopts=

# ── PyInstaller desktop build (Windows only, once per workflow) ────────
# Closes #44. Builds the onedir bundle and smoke-tests --help so the
Expand Down Expand Up @@ -213,3 +214,42 @@ jobs:
--verbose \
--redact \
--exit-code 1

# ── Performance benchmarks: summary cache (issue #115) ─────────────────────
benchmarks:
name: Performance benchmarks (gated)
needs: [unittest]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false

- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: "3.12"

- name: Install runtime + benchmark dependencies
run: |
python -m pip install --upgrade pip
python -m pip install -r requirements-lock.txt
python -m pip install 'pytest>=8,<9' 'pytest-benchmark==4.0.0'

- name: Run summary-cache benchmarks
run: >
python -m pytest tests/benchmarks/
--benchmark-only
--benchmark-min-rounds=5
--benchmark-json=benchmark-results.json
--benchmark-columns=min,max,mean,stddev,rounds
-o addopts=
Comment thread
clean6378-max-it marked this conversation as resolved.

- name: Regression gate
run: python scripts/check_benchmark_regression.py benchmark-results.json benchmarks/baselines.json

- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
if: always()
with:
name: benchmark-results
path: benchmark-results.json
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,5 @@ Thumbs.db
htmlcov/
coverage.xml
.hypothesis/
benchmark-results.json
benchmarks/_raw.json
15 changes: 15 additions & 0 deletions benchmarks/baselines.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"_note": "Gated means from ubuntu-latest CI benchmark-results.json (PR #120, run 28123677675). Refresh after intentional perf changes: download benchmark-results.json from the CI artifacts job, then `python scripts/check_benchmark_regression.py benchmark-results.json benchmarks/baselines.json` (re-seed with reduce_baselines or edit means). Local capture: `pytest tests/benchmarks/ --benchmark-only --benchmark-json=benchmark-results.json -o addopts=` on ubuntu-latest.",
"updated": "2026-06-24T19:20:27Z",
"machine": "Linux",
"groups": {
"summary-cache": {
"test_summary_cache_hit": 6.3e-05,
"test_summary_cache_miss": 6.3e-05,
"test_fingerprint_workspace_entries[10]": 0.001844,
"test_fingerprint_workspace_entries[50]": 0.007759,
"test_fingerprint_workspace_entries[200]": 0.022231,
"test_summary_cache_round_trip": 0.000351
}
}
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
10 changes: 10 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,20 @@ desktop = ["pywebview>=5.0,<6"]
# Development tooling: testing + type checking.
dev = [
"pytest>=8,<9",
"pytest-benchmark>=4,<5",
"mypy>=1.10,<2",
"hypothesis>=6.100,<7",
]

[tool.pytest.ini_options]
# Requires pytest-benchmark (pip install -e ".[dev]") for default `pytest` invocations.
pythonpath = ["."]
addopts = "--benchmark-skip"
testpaths = ["tests"]
Comment thread
clean6378-max-it marked this conversation as resolved.
markers = [
"benchmark: performance benchmarks (pytest-benchmark)",
]
Comment thread
coderabbitai[bot] marked this conversation as resolved.

[project.scripts]
# Primary CLI: export Cursor chat histories to Markdown / zip.
# Usage: cursor-chat-export [--since all|last] [--out DIR] [--no-zip] [--help]
Expand Down
2 changes: 1 addition & 1 deletion requirements-lock.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# Lock is generated on Linux (CI / update-lock.yml). Windows-only transitives (e.g.
# colorama via click) are omitted — pip still installs them on Windows when needed.
blinker==1.9.0 # via flask
click==8.4.1 # via flask
click==8.4.2 # via flask
defusedxml==0.7.1 # via fpdf2
flask==3.1.3 # via -r requirements.txt
fonttools==4.63.0 # via fpdf2
Expand Down
163 changes: 163 additions & 0 deletions scripts/check_benchmark_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
"""Compare pytest-benchmark JSON output against stored baselines."""

from __future__ import annotations

import argparse
import json
import sys
from pathlib import Path

THRESHOLD = 1.20


class BenchmarkDataError(ValueError):
"""Raised when benchmark JSON input is malformed or missing required fields."""


def normalize_benchmark_name(name: str) -> str:
"""Strip pytest file node prefix so baselines match short or full benchmark names."""
text = str(name)
if "::" not in text:
return text
prefix, _, suffix = text.partition("::")
# Only strip module paths (…/test_foo.py::test_name); leave "::" inside [param::value] intact.
if prefix.endswith(".py"):
return suffix
return text


def load_results(results_path: str | Path) -> dict[str, float]:
path = Path(results_path)
try:
data = json.loads(path.read_text(encoding="utf-8"))
except OSError as exc:
raise BenchmarkDataError(f"cannot read {path}: {exc}") from exc
except json.JSONDecodeError as exc:
raise BenchmarkDataError(f"invalid JSON in {path}: {exc}") from exc
try:
benchmarks = data["benchmarks"]
except (KeyError, TypeError) as exc:
raise BenchmarkDataError(f"{path} missing top-level 'benchmarks' array") from exc
if not isinstance(benchmarks, list):
raise BenchmarkDataError(f"{path} 'benchmarks' must be an array")

results: dict[str, float] = {}
for index, entry in enumerate(benchmarks):
if not isinstance(entry, dict):
raise BenchmarkDataError(f"{path} benchmarks[{index}] must be an object")
try:
raw_name = entry["name"]
mean = float(entry["stats"]["mean"])
except (KeyError, TypeError, ValueError) as exc:
raise BenchmarkDataError(
f"{path} benchmarks[{index}] missing 'name' or 'stats.mean'"
) from exc
name = normalize_benchmark_name(str(raw_name))
if name in results:
raise BenchmarkDataError(f"{path} duplicate benchmark name {name!r}")
results[name] = mean
return results


def load_baseline_means(baselines_path: str | Path) -> dict[str, float]:
path = Path(baselines_path)
try:
data = json.loads(path.read_text(encoding="utf-8"))
except OSError as exc:
raise BenchmarkDataError(f"cannot read {path}: {exc}") from exc
except json.JSONDecodeError as exc:
raise BenchmarkDataError(f"invalid JSON in {path}: {exc}") from exc
if not isinstance(data, dict):
raise BenchmarkDataError(f"{path} root value must be an object")

if "groups" not in data:
raise BenchmarkDataError(f"{path} missing required 'groups' key")
groups = data["groups"]
if not isinstance(groups, dict):
raise BenchmarkDataError(f"{path} 'groups' must be an object")

means: dict[str, float] = {}
for group_name, value in groups.items():
if not isinstance(value, dict):
raise BenchmarkDataError(
f"{path} groups[{group_name!r}] must be an object of benchmark means"
)
for name, mean in value.items():
bench_name = normalize_benchmark_name(str(name))
if bench_name in means:
raise BenchmarkDataError(
f"{path} duplicate benchmark name {bench_name!r} across groups"
)
try:
means[bench_name] = float(mean)
except (TypeError, ValueError) as exc:
raise BenchmarkDataError(
f"{path} groups[{group_name!r}][{name!r}] is not a numeric mean"
) from exc
return means


def check_regression(
results_path: str | Path,
baselines_path: str | Path,
*,
threshold: float = THRESHOLD,
) -> int:
"""Return 0 when within threshold; 1 when any gated benchmark regresses."""
flat = load_results(results_path)
baseline_means = load_baseline_means(baselines_path)

failures: list[str] = []
missing: list[str] = []
for name, base in baseline_means.items():
cur = flat.get(name)
if cur is None:
print(f"FAIL: no current result for gated baseline {name!r}")
missing.append(name)
continue
if base == 0:
print(f"WARN: baseline for {name!r} is zero; skipping ratio check")
continue
ratio = cur / base
tag = "FAIL" if ratio > threshold else "ok"
print(f"[{tag}] {name}: {cur:.6f}s vs {base:.6f}s ({ratio:.2f}x)")
if ratio > threshold:
failures.append(name)

Comment thread
clean6378-max-it marked this conversation as resolved.
for name in flat:
if name not in baseline_means:
print(f"WARN: {name!r} has no baseline yet; not gated")

if failures:
print(f"\nREGRESSION: {len(failures)} benchmark(s) exceeded {threshold:.0%}")
if missing:
print(f"\nMISSING: {len(missing)} gated benchmark(s) absent from current results")
if failures or missing:
return 1
return 0


def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("results_path", help="pytest-benchmark --benchmark-json output")
parser.add_argument("baselines_path", help="path to benchmarks/baselines.json")
parser.add_argument(
"--threshold",
type=float,
default=THRESHOLD,
help="fail when current mean exceeds baseline by more than this ratio (default: 1.20)",
)
args = parser.parse_args(argv)
try:
return check_regression(
args.results_path,
args.baselines_path,
threshold=args.threshold,
)
except BenchmarkDataError as exc:
print(f"ERROR: {exc}", file=sys.stderr)
return 2


if __name__ == "__main__":
sys.exit(main())
Empty file added tests/benchmarks/__init__.py
Empty file.
89 changes: 89 additions & 0 deletions tests/benchmarks/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""Synthetic workspace trees for summary-cache performance benchmarks."""

from __future__ import annotations

from pathlib import Path
from typing import Any

import pytest

from services import summary_cache
from services.summary_cache import fingerprint_workspace_storage


def make_workspace_entries(workspace_root: Path, count: int) -> list[dict[str, Any]]:
"""Build *count* synthetic workspace entries with on-disk state files."""
entries: list[dict[str, Any]] = []
for i in range(count):
name = f"ws_{i:04d}"
entry_dir = workspace_root / name
entry_dir.mkdir(parents=True, exist_ok=True)
(entry_dir / "state.vscdb").write_bytes(b"bench")
workspace_json = entry_dir / "workspace.json"
workspace_json.write_text('{"folder": "/bench"}', encoding="utf-8")
entries.append(
{
"name": name,
"workspaceJsonPath": str(workspace_json),
}
)
return entries


@pytest.fixture
def summary_cache_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
"""Redirect summary-cache files to an isolated temp directory.

Patches ``CACHE_DIR`` (also used by tab-summary paths via ``_tab_summaries_path``)
plus the projects/composer-map file constants used by current benchmarks.
Tab-summary cache benchmarks are deferred to issue #110 (unified benchmark suite).
"""
cache_dir = tmp_path / "cache"
cache_dir.mkdir()
monkeypatch.setattr(summary_cache, "CACHE_DIR", cache_dir)
monkeypatch.setattr(summary_cache, "PROJECTS_CACHE_FILE", cache_dir / "projects.json")
monkeypatch.setattr(
summary_cache,
"COMPOSER_MAP_CACHE_FILE",
cache_dir / "composer-id-to-ws.json",
)
Comment thread
clean6378-max-it marked this conversation as resolved.
return cache_dir


@pytest.fixture
def sample_projects() -> list[dict[str, Any]]:
return [
{
"id": "ws_0000",
"name": "Bench Project",
"conversationCount": 3,
"lastModified": "2026-06-24T00:00:00Z",
}
]


@pytest.fixture
def synthetic_workspace(tmp_path: Path, request: pytest.FixtureRequest) -> tuple[str, list[dict[str, Any]]]:
"""Workspace path + entries. Parametrize via indirect ``workspace_entry_count``."""
count = getattr(request, "param", 10)
workspace_root = tmp_path / "workspaceStorage"
workspace_root.mkdir()
entries = make_workspace_entries(workspace_root, count)
return str(workspace_root), entries


@pytest.fixture
def workspace_fingerprint(synthetic_workspace: tuple[str, list[dict[str, Any]]]) -> dict[str, Any]:
workspace_path, entries = synthetic_workspace
return fingerprint_workspace_storage(
workspace_path,
entries,
global_db_path=None,
rules=[],
)


@pytest.fixture
def stale_fingerprint(workspace_fingerprint: dict[str, Any]) -> dict[str, Any]:
"""Return a fingerprint guaranteed to differ from the stored one."""
return {**workspace_fingerprint, "rules_digest": "deadbeefdeadbeef"}
Loading
Loading