diff --git a/.github/workflows/memory-benchmark.yml b/.github/workflows/memory-benchmark.yml
new file mode 100644
index 00000000..5232e1ee
--- /dev/null
+++ b/.github/workflows/memory-benchmark.yml
@@ -0,0 +1,40 @@
+name: Python SDK memray memory benchmark
+
+on:
+  pull_request:
+    types:
+      - opened
+      - reopened
+      - synchronize
+      - labeled
+
+permissions:
+  contents: read
+
+jobs:
+  memory-benchmark:
+    name: Python SDK memray memory benchmark
+    # Needs to match the arch the baseline was generated on.
+    runs-on: ubuntu-24.04-arm
+    if: |
+      contains(github.event.pull_request.labels.*.name, 'check-memory-benchmark') &&
+      (
+        github.event.pull_request.author_association == 'COLLABORATOR' ||
+        github.event.pull_request.author_association == 'MEMBER' ||
+        github.event.pull_request.author_association == 'OWNER'
+      )
+    steps:
+      - uses: actions/checkout@v4
+
+      # Uses the Dockerfile environment for repeatable runs.
+      - name: Run memray memory benchmark
+        run: make memory-use-bench
+
+      # Upload all three flamegraph views per scenario (peak/leaks/temporary).
+      - name: Upload flamegraph reports
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: memray-flamegraphs
+          path: tests/perf/reports/*.html
+          if-no-files-found: warn
diff --git a/Makefile b/Makefile
index 06afe998..8a192083 100644
--- a/Makefile
+++ b/Makefile
@@ -148,10 +148,12 @@ MEMRAY_ITERATIONS ?= 100
 MEMRAY_THRESHOLD ?= 1.1
 SCENARIO ?=
 SCENARIO_ARG := $(if $(SCENARIO),--scenario $(SCENARIO),)
+# In CI, use en vars to write the report to the job run
+GH_SUMMARY_MOUNT := $(if $(GITHUB_STEP_SUMMARY),-v $(GITHUB_STEP_SUMMARY):$(GITHUB_STEP_SUMMARY),)
 .PHONY: memory-use-bench
 memory-use-bench:
 	docker build -f tests/perf/Dockerfiles/$(PERF_ENV)-perf-Dockerfile -t c2pa-memray-$(PERF_ENV) .
-	docker run --rm -v $(PWD):/workspace -e PYTHONPATH=/workspace/src -e PERF_ENV=$(PERF_ENV) -e MEMRAY_ITERATIONS=$(MEMRAY_ITERATIONS) -e MEMRAY_THRESHOLD=$(MEMRAY_THRESHOLD) c2pa-memray-$(PERF_ENV) python -m tests.perf.run_profile $(SCENARIO_ARG) $(PERF_ARGS)
+	docker run --rm -v $(PWD):/workspace $(GH_SUMMARY_MOUNT) -e PYTHONPATH=/workspace/src -e PERF_ENV=$(PERF_ENV) -e MEMRAY_ITERATIONS=$(MEMRAY_ITERATIONS) -e MEMRAY_THRESHOLD=$(MEMRAY_THRESHOLD) -e GITHUB_TOKEN -e GITHUB_STEP_SUMMARY c2pa-memray-$(PERF_ENV) python -m tests.perf.run_profile $(SCENARIO_ARG) $(PERF_ARGS)
 	@echo ""
 	@echo "Reports written to tests/perf/reports/"
 	@echo "Open tests/perf/reports/<scenario>-{peak,leaks,temporary}.html in a browser"
diff --git a/tests/perf/README.md b/tests/perf/README.md
index cccc60de..1e2baf41 100644
--- a/tests/perf/README.md
+++ b/tests/perf/README.md
@@ -67,6 +67,16 @@ The trailing `VAR=value` arguments (e.g. `PERF_ENV=ubuntu-24.04`, `PERF_ARGS=--u
 
 Reports are written to `tests/perf/reports/` on the local machine. Three HTML files per scenario, one per suffix (described below). Open any in a browser. After a run, the run also reports if the scenarios were or were not all within baseline threshold (baseline +10% memory use tolerance).
 
+## Running in CI
+
+The `.github/workflows/memory-benchmark.yml` workflow runs the  Docker-based benchmarks on a PR, but only when the PR has the `check-memory-benchmark` label. This runs `make memory-use-bench`, so:
+
+- A regression (peak or leaked > baseline +10%) makes the benchmark job exit non-zero.
+- A values report table is written to the job's Step Summary.
+- All three flamegraph HTML views per scenario are uploaded as the `memray-flamegraphs` artifact.
+
+The gate only acts as regression test once a `tests/perf/baseline.json` is committed on the branch. Without one, `run_profile.py` treats the run as baseline creation (exits 0, no gating).
+
 ## Report views
 
 Each scenario produces three [memray flamegraphs](https://bloomberg.github.io/memray/flamegraph.html). All three are flamegraphs of the same run. They differ only in which allocations they count.
diff --git a/tests/perf/run_profile.py b/tests/perf/run_profile.py
index 6c0edc93..afeb4f26 100644
--- a/tests/perf/run_profile.py
+++ b/tests/perf/run_profile.py
@@ -177,6 +177,49 @@ def _fmt(n: int) -> str:
     return f"{n} B"
 
 
+def _delta_pct(current: int, base: int) -> str:
+    """Signed percentage change vs baseline, or '-' when no baseline."""
+    if not base:
+        return "-"
+    return f"{(current - base) / base * 100:+.1f}%"
+
+
+def _write_github_summary(results: dict, baseline: dict) -> None:
+    """Append a values table to $GITHUB_STEP_SUMMARY when running in CI.
+    """
+    summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
+    if not summary_path or not results:
+        return
+
+    lines = [
+        "## Memory benchmark (memray)",
+        "",
+        f"Iterations: {ITERATIONS} · threshold: +{(THRESHOLD - 1) * 100:.0f}%"
+        f"{f' · env: {PERF_ENV}' if PERF_ENV else ''}",
+        "",
+        "| scenario | peak | leaked | allocs | peak Δ% | leaked Δ% | status |",
+        "|----------|------|--------|--------|---------|-----------|--------|",
+    ]
+    for name, m in results.items():
+        b = baseline.get(name, {}) if baseline else {}
+        peak_base = b.get("peak_bytes", 0)
+        leaked_base = b.get("leaked_bytes", 0)
+        regressed = (
+            (peak_base and m["peak_bytes"] > peak_base * THRESHOLD)
+            or (leaked_base and m["leaked_bytes"] > leaked_base * THRESHOLD)
+        )
+        status = "REGRESSED" if regressed else "ok"
+        lines.append(
+            f"| {name} | {_fmt(m['peak_bytes'])} | {_fmt(m['leaked_bytes'])} "
+            f"| {m['total_allocations']} | {_delta_pct(m['peak_bytes'], peak_base)} "
+            f"| {_delta_pct(m['leaked_bytes'], leaked_base)} | {status} |"
+        )
+    lines.append("")
+
+    with open(summary_path, "a", encoding="utf-8") as fh:
+        fh.write("\n".join(lines) + "\n")
+
+
 def main() -> None:
     parser = argparse.ArgumentParser(description="c2pa-python memory profiler")
     parser.add_argument(
@@ -319,6 +362,9 @@ def main() -> None:
         verb = "Updated" if prior_baseline else "Created"
         print(f"\n{verb} baseline: {BASELINE_FILE}")
 
+    # Emit the report table to the PR's Step Summary in CI.
+    _write_github_summary(results, baseline)
+
     if render_failures:
         print("\nFLAMEGRAPH RENDERS FAILED (capture + metrics still recorded):", file=sys.stderr)
         for r in render_failures: