diff --git a/.github/workflows/memory-benchmark.yml b/.github/workflows/memory-benchmark.yml new file mode 100644 index 00000000..5232e1ee --- /dev/null +++ b/.github/workflows/memory-benchmark.yml @@ -0,0 +1,40 @@ +name: Python SDK memray memory benchmark + +on: + pull_request: + types: + - opened + - reopened + - synchronize + - labeled + +permissions: + contents: read + +jobs: + memory-benchmark: + name: Python SDK memray memory benchmark + # Needs to match the arch the baseline was generated on. + runs-on: ubuntu-24.04-arm + if: | + contains(github.event.pull_request.labels.*.name, 'check-memory-benchmark') && + ( + github.event.pull_request.author_association == 'COLLABORATOR' || + github.event.pull_request.author_association == 'MEMBER' || + github.event.pull_request.author_association == 'OWNER' + ) + steps: + - uses: actions/checkout@v4 + + # Uses the Dockerfile environment for repeatable runs. + - name: Run memray memory benchmark + run: make memory-use-bench + + # Upload all three flamegraph views per scenario (peak/leaks/temporary). + - name: Upload flamegraph reports + if: always() + uses: actions/upload-artifact@v4 + with: + name: memray-flamegraphs + path: tests/perf/reports/*.html + if-no-files-found: warn diff --git a/Makefile b/Makefile index 06afe998..8a192083 100644 --- a/Makefile +++ b/Makefile @@ -148,10 +148,12 @@ MEMRAY_ITERATIONS ?= 100 MEMRAY_THRESHOLD ?= 1.1 SCENARIO ?= SCENARIO_ARG := $(if $(SCENARIO),--scenario $(SCENARIO),) +# In CI, use en vars to write the report to the job run +GH_SUMMARY_MOUNT := $(if $(GITHUB_STEP_SUMMARY),-v $(GITHUB_STEP_SUMMARY):$(GITHUB_STEP_SUMMARY),) .PHONY: memory-use-bench memory-use-bench: docker build -f tests/perf/Dockerfiles/$(PERF_ENV)-perf-Dockerfile -t c2pa-memray-$(PERF_ENV) . - docker run --rm -v $(PWD):/workspace -e PYTHONPATH=/workspace/src -e PERF_ENV=$(PERF_ENV) -e MEMRAY_ITERATIONS=$(MEMRAY_ITERATIONS) -e MEMRAY_THRESHOLD=$(MEMRAY_THRESHOLD) c2pa-memray-$(PERF_ENV) python -m tests.perf.run_profile $(SCENARIO_ARG) $(PERF_ARGS) + docker run --rm -v $(PWD):/workspace $(GH_SUMMARY_MOUNT) -e PYTHONPATH=/workspace/src -e PERF_ENV=$(PERF_ENV) -e MEMRAY_ITERATIONS=$(MEMRAY_ITERATIONS) -e MEMRAY_THRESHOLD=$(MEMRAY_THRESHOLD) -e GITHUB_TOKEN -e GITHUB_STEP_SUMMARY c2pa-memray-$(PERF_ENV) python -m tests.perf.run_profile $(SCENARIO_ARG) $(PERF_ARGS) @echo "" @echo "Reports written to tests/perf/reports/" @echo "Open tests/perf/reports/-{peak,leaks,temporary}.html in a browser" diff --git a/tests/perf/README.md b/tests/perf/README.md index cccc60de..1e2baf41 100644 --- a/tests/perf/README.md +++ b/tests/perf/README.md @@ -67,6 +67,16 @@ The trailing `VAR=value` arguments (e.g. `PERF_ENV=ubuntu-24.04`, `PERF_ARGS=--u Reports are written to `tests/perf/reports/` on the local machine. Three HTML files per scenario, one per suffix (described below). Open any in a browser. After a run, the run also reports if the scenarios were or were not all within baseline threshold (baseline +10% memory use tolerance). +## Running in CI + +The `.github/workflows/memory-benchmark.yml` workflow runs the Docker-based benchmarks on a PR, but only when the PR has the `check-memory-benchmark` label. This runs `make memory-use-bench`, so: + +- A regression (peak or leaked > baseline +10%) makes the benchmark job exit non-zero. +- A values report table is written to the job's Step Summary. +- All three flamegraph HTML views per scenario are uploaded as the `memray-flamegraphs` artifact. + +The gate only acts as regression test once a `tests/perf/baseline.json` is committed on the branch. Without one, `run_profile.py` treats the run as baseline creation (exits 0, no gating). + ## Report views Each scenario produces three [memray flamegraphs](https://bloomberg.github.io/memray/flamegraph.html). All three are flamegraphs of the same run. They differ only in which allocations they count. diff --git a/tests/perf/run_profile.py b/tests/perf/run_profile.py index 6c0edc93..afeb4f26 100644 --- a/tests/perf/run_profile.py +++ b/tests/perf/run_profile.py @@ -177,6 +177,49 @@ def _fmt(n: int) -> str: return f"{n} B" +def _delta_pct(current: int, base: int) -> str: + """Signed percentage change vs baseline, or '-' when no baseline.""" + if not base: + return "-" + return f"{(current - base) / base * 100:+.1f}%" + + +def _write_github_summary(results: dict, baseline: dict) -> None: + """Append a values table to $GITHUB_STEP_SUMMARY when running in CI. + """ + summary_path = os.environ.get("GITHUB_STEP_SUMMARY") + if not summary_path or not results: + return + + lines = [ + "## Memory benchmark (memray)", + "", + f"Iterations: {ITERATIONS} · threshold: +{(THRESHOLD - 1) * 100:.0f}%" + f"{f' · env: {PERF_ENV}' if PERF_ENV else ''}", + "", + "| scenario | peak | leaked | allocs | peak Δ% | leaked Δ% | status |", + "|----------|------|--------|--------|---------|-----------|--------|", + ] + for name, m in results.items(): + b = baseline.get(name, {}) if baseline else {} + peak_base = b.get("peak_bytes", 0) + leaked_base = b.get("leaked_bytes", 0) + regressed = ( + (peak_base and m["peak_bytes"] > peak_base * THRESHOLD) + or (leaked_base and m["leaked_bytes"] > leaked_base * THRESHOLD) + ) + status = "REGRESSED" if regressed else "ok" + lines.append( + f"| {name} | {_fmt(m['peak_bytes'])} | {_fmt(m['leaked_bytes'])} " + f"| {m['total_allocations']} | {_delta_pct(m['peak_bytes'], peak_base)} " + f"| {_delta_pct(m['leaked_bytes'], leaked_base)} | {status} |" + ) + lines.append("") + + with open(summary_path, "a", encoding="utf-8") as fh: + fh.write("\n".join(lines) + "\n") + + def main() -> None: parser = argparse.ArgumentParser(description="c2pa-python memory profiler") parser.add_argument( @@ -319,6 +362,9 @@ def main() -> None: verb = "Updated" if prior_baseline else "Created" print(f"\n{verb} baseline: {BASELINE_FILE}") + # Emit the report table to the PR's Step Summary in CI. + _write_github_summary(results, baseline) + if render_failures: print("\nFLAMEGRAPH RENDERS FAILED (capture + metrics still recorded):", file=sys.stderr) for r in render_failures: