contentauth · tmathern · Jun 22, 2026 · Jun 22, 2026 · Jun 22, 2026 · gpeacock
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "c2pa-python"
-version = "0.35.1"
+version = "0.35.2"
 requires-python = ">=3.10"
 description = "Python bindings for the C2PA Content Authenticity Initiative (CAI) library"
 readme = { file = "README.md", content-type = "text/markdown" }

diff --git a/src/c2pa/c2pa.py b/src/c2pa/c2pa.py
@@ -2263,6 +2263,9 @@ def _init_from_context(self, context, format_or_path,
         if not context.is_valid:
             raise C2paError("Context is not valid")
 
+        if manifest_data is not None and not isinstance(manifest_data, bytes):
+            raise TypeError(Reader._ERROR_MESSAGES['manifest_error'])
+
         # Determine format and open stream
         supported = Reader.get_supported_mime_types()
 
@@ -2303,10 +2306,6 @@ def _init_from_context(self, context, format_or_path,
                 raise
 
             if manifest_data is not None:
-                if not isinstance(manifest_data, bytes):
-                    raise TypeError(
-                        Reader._ERROR_MESSAGES[
-                            'manifest_error'])
                 manifest_array = (
                     ctypes.c_ubyte *
                     len(manifest_data)).from_buffer_copy(manifest_data)
@@ -2322,14 +2321,15 @@ def _init_from_context(self, context, format_or_path,
                         len(manifest_data),
                     )
                 )
-                # reader_ptr has been invalidated(consumed)
             else:
                 # Consume reader with stream
                 new_ptr = _lib.c2pa_reader_with_stream(
                     reader_ptr, format_bytes,
                     self._own_stream._stream,
                 )
-                # reader_ptr has been invalidated(consumed)
+
+            # reader_ptr has been consumed by the FFI call.
+            reader_ptr = None
 
             self._handle = new_ptr
 

diff --git a/tests/perf/baseline.json b/tests/perf/baseline.json
@@ -186,5 +186,10 @@
     "peak_bytes": 3382176,
     "leaked_bytes": 3210242,
     "total_allocations": 111383
+  },
+  "reader_manifest_data_context": {
+    "peak_bytes": 7575548,
+    "leaked_bytes": 3399851,
+    "total_allocations": 1414270
   }
 }
diff --git a/tests/perf/run_profile.py b/tests/perf/run_profile.py
@@ -14,7 +14,10 @@
   <name>-leaks.html (--leaks), <name>-temporary.html (--temporary-allocations)
 - Reads peak_bytes and leaked_bytes from the .bin via memray.FileReader
 - Compares against baseline.json (creates it on first run)
-- Exits non-zero if any metric exceeds baseline * threshold
+- Exits non-zero only if leaked_bytes exceeds baseline * threshold. peak_bytes
+  is reported (and any over-threshold drift noted) but never fails the run: it
+  is a high-water mark that swings with allocation timing on alloc-heavy
+  scenarios, so it is informational, not a gate.
 
 Usage:
     python -m tests.perf.run_profile [--update-baseline]
@@ -194,9 +197,15 @@ def main() -> None:
 
     REPORTS_DIR.mkdir(parents=True, exist_ok=True)
 
-    baseline: dict = {}
-    if BASELINE_FILE.exists() and not args.update_baseline:
-        baseline = json.loads(BASELINE_FILE.read_text())
+    # prior_baseline: the existing file, always loaded so a single-scenario
+    # update can preserve the other scenarios' entries when it rewrites the file.
+    prior_baseline: dict = {}
+
+    # baseline: the subset used for the regression comparison below, which is
+    # suppressed when --update-baseline is set (because we are re-baselining).
+    if BASELINE_FILE.exists():
+        prior_baseline = json.loads(BASELINE_FILE.read_text())
+    baseline: dict = {} if args.update_baseline else prior_baseline
 
     results: dict = {}
     failures: list[str] = []
@@ -242,16 +251,25 @@ def main() -> None:
 
             if baseline and name in baseline:
                 b = baseline[name]
+                # Only leaked_bytes gates the run. It is the leak signal and is
+                # stable run-to-run. peak_bytes is a high-water mark that swings
+                # with transient-allocation timing on alloc-heavy scenarios,
+                # so it is reported for visibility but doesn't fail the run.
                 for metric in ("peak_bytes", "leaked_bytes"):
                     current = metrics[metric]
                     base = b.get(metric, 0)
                     limit = base * THRESHOLD
-                    if current > limit:
-                        diff_pct = (current - base) / base * 100 if base else float("inf")
-                        failures.append(
-                            f"{name}.{metric}: {_fmt(current)} > baseline {_fmt(base)}"
-                            f" (+{diff_pct:.1f}%, threshold {(THRESHOLD-1)*100:.0f}%)"
-                        )
+                    if current <= limit:
+                        continue
+                    diff_pct = (current - base) / base * 100 if base else float("inf")
+                    msg = (
+                        f"{name}.{metric}: {_fmt(current)} > baseline {_fmt(base)}"
+                        f" (+{diff_pct:.1f}%, threshold {(THRESHOLD-1)*100:.0f}%)"
+                    )
+                    if metric == "leaked_bytes":
+                        failures.append(msg)
+                    else:
+                        print(f"  note (informational): {msg}", flush=True)
         finally:
             if scenario_render_failed:
                 # Keep the capture so the failed view can be re-rendered
@@ -265,18 +283,40 @@ def main() -> None:
             else:
                 bin_path.unlink(missing_ok=True)
 
-    if args.update_baseline or not baseline:
+    if args.update_baseline or not prior_baseline:
         # When running a single scenario, merge its result into the existing
         # baseline so the other scenarios' entries are preserved. A full run
         # replaces the file wholesale.
-        if args.scenario and baseline:
-            output = dict(baseline)
+        if args.scenario and prior_baseline:
+            output = dict(prior_baseline)
         else:
             output = {}
-        output["_meta"] = _build_meta()
+        new_meta = _build_meta()
+        # On a single-scenario merge the new entry must come from the same
+        # toolchain as the entries it is being merged next to, or the numbers
+        # are not comparable. Warn if _meta would change (e.g. wrong PERF_ENV,
+        # iteration count, or native version) instead of silently overwriting it.
+        if args.scenario and prior_baseline:
+            old_meta = prior_baseline.get("_meta", {})
+            if old_meta and old_meta != new_meta:
+                diffs = sorted(
+                    set(old_meta) | set(new_meta),
+                    key=str,
+                )
+                changed = [
+                    f"{k}: {old_meta.get(k)!r} -> {new_meta.get(k)!r}"
+                    for k in diffs if old_meta.get(k) != new_meta.get(k)
+                ]
+                print(
+                    "\nWARNING: this run's environment differs from the existing "
+                    "baseline's _meta; the merged entry will NOT be comparable to "
+                    "the other scenarios:\n  " + "\n  ".join(changed),
+                    file=sys.stderr,
+                )
+        output["_meta"] = new_meta
         output.update(results)
         BASELINE_FILE.write_text(json.dumps(output, indent=2))
-        verb = "Updated" if baseline else "Created"
+        verb = "Updated" if prior_baseline else "Created"
         print(f"\n{verb} baseline: {BASELINE_FILE}")
 
     if render_failures:
@@ -288,12 +328,14 @@ def main() -> None:
               "--temporary-allocations --temporary-allocation-threshold=10 --force", file=sys.stderr)
 
     if failures:
-        print("\nREGRESSIONS DETECTED:", file=sys.stderr)
+        print("\nLEAK REGRESSIONS DETECTED (leaked_bytes over baseline):",
+              file=sys.stderr)
         for f in failures:
             print(f"  {f}", file=sys.stderr)
         sys.exit(1)
 
-    print("\nAll scenarios within baseline thresholds.")
+    print("\nAll scenarios within baseline leaked_bytes thresholds "
+          "(peak_bytes is informational only).")
 
 
 if __name__ == "__main__":

diff --git a/tests/perf/scenarios.py b/tests/perf/scenarios.py
@@ -693,6 +693,32 @@ def scenario_reader_jpeg_with_context(iterations: int = 100) -> None:
     _read_file_context(SIGNED_JPEG, "image/jpeg", iterations)
 
 
+def scenario_reader_manifest_data_context(iterations: int = 100) -> None:
+    """Reader over a detached (sidecar) manifest with a Context.
+
+    Exercises c2pa_reader_with_manifest_data_and_stream, the consume-and-swap
+    FFI path (reader_from_context handle is consumed and replaced each call).
+    The manifest is signed once outside the loop; each iteration re-reads the
+    same asset + detached manifest, so flat RSS confirms no per-iteration leak
+    in the consume-and-swap path.
+    """
+    source_bytes = SOURCE_JPEG.read_bytes()
+    signer = _make_signer()
+    builder = Builder({**MANIFEST_BASE, "format": "image/jpeg"})
+    builder.set_no_embed()
+    manifest_bytes = builder.sign(
+        signer, "image/jpeg", io.BytesIO(source_bytes), io.BytesIO())
+    builder.close()
+    signer.close()
+
+    context = Context()
+    for _ in _iterate(iterations):
+        reader = Reader("image/jpeg", io.BytesIO(source_bytes),
+                        manifest_data=manifest_bytes, context=context)
+        reader.json()
+        reader.close()
+
+
 # Parallel signing variants: one shared Context across 10 threads.
 # {split, full} x {pool, barrier} x {jpeg, png}.
 
@@ -892,6 +918,7 @@ def scenario_fork_stream_cleanup(iterations: int = 100) -> None:
 SCENARIOS = {
     "reader_jpeg_legacy": scenario_reader_jpeg_legacy,
     "reader_jpeg_with_context": scenario_reader_jpeg_with_context,
+    "reader_manifest_data_context": scenario_reader_manifest_data_context,
     "reader_mp4": scenario_reader_mp4,
     "reader_wav": scenario_reader_wav,
     "builder_sign_jpeg_legacy": scenario_builder_sign_jpeg_legacy,

diff --git a/tests/test_unit_tests.py b/tests/test_unit_tests.py
@@ -6041,6 +6041,119 @@ def test_with_fragment_with_dash_fixtures(self):
         context.close()
 
 
+class TestSidecarReader(TestContextAPIs):
+    """Reader over a detached (sidecar) manifest with a Context"""
+
+    def _make_detached_manifest(self):
+        """Sign DEFAULT_TEST_FILE with no-embed, return (asset_bytes,
+        manifest_bytes) where manifest_bytes is the "detached" sidecar
+        C2PA manifest."""
+
+        signer = self._ctx_make_signer()
+        with open(DEFAULT_TEST_FILE, "rb") as f:
+            asset_bytes = f.read()
+        builder = Builder(self.test_manifest)
+        builder.set_no_embed()
+        # Output is discarded: with no-embed the asset is unchanged and the
+        # manifest is returned by sign().
+        manifest_bytes = builder.sign(
+            signer, "image/jpeg", io.BytesIO(asset_bytes), io.BytesIO())
+        builder.close()
+        signer.close()
+        self.assertIsInstance(manifest_bytes, bytes)
+        self.assertGreater(len(manifest_bytes), 0)
+        return asset_bytes, manifest_bytes
+
+    def test_reader_with_manifest_data_and_context(self):
+        asset_bytes, manifest_bytes = self._make_detached_manifest()
+        context = Context()
+        reader = Reader(
+            "image/jpeg",
+            io.BytesIO(asset_bytes),
+            manifest_bytes,
+            context=context,
+        )
+        try:
+            data = reader.json()
+            self.assertTrue(data)
+            self.assertFalse(reader.is_embedded())
+            self.assertIn("manifests", json.loads(data))
+        finally:
+            reader.close()
+            context.close()
+
+    def test_reader_manifest_data_context_invalid_manifest_raises(self):
+        with open(DEFAULT_TEST_FILE, "rb") as f:
+            asset_bytes = f.read()
+        context = Context()
+        reader = None
+        try:
+            with self.assertRaises(Error):
+                reader = Reader(
+                    "image/jpeg",
+                    io.BytesIO(asset_bytes),
+                    b"not a real manifest",
+                    context=context,
+                )
+            # The consumed-pointer error branch must leave no usable handle.
+            if reader is not None:
+                self.assertEqual(
+                    reader._lifecycle_state, LifecycleState.CLOSED)
+                self.assertIsNone(reader._handle)
+        finally:
+            if reader is not None:
+                reader.close()
+            context.close()
+
+    def test_reader_manifest_data_context_wrong_type_raises(self):
+        with open(DEFAULT_TEST_FILE, "rb") as f:
+            asset_bytes = f.read()
+        context = Context()
+        try:
+            # Non-bytes manifest_data must raise TypeError before any native
+            # reader handle is allocated (no leak on this path).
+            with self.assertRaises(TypeError):
+                Reader(
+                    "image/jpeg",
+                    io.BytesIO(asset_bytes),
+                    "manifest as str, not bytes",
+                    context=context,
+                )
+        finally:
+            context.close()
+
+    def test_reader_manifest_data_context_use_after_close_raises(self):
+        asset_bytes, manifest_bytes = self._make_detached_manifest()
+        context = Context()
+        reader = Reader(
+            "image/jpeg",
+            io.BytesIO(asset_bytes),
+            manifest_bytes,
+            context=context,
+        )
+        reader.close()
+        self.assertEqual(reader._lifecycle_state, LifecycleState.CLOSED)
+        with self.assertRaises(Error):
+            reader.json()
+        # Idempotent close after use-after-close attempt.
+        reader.close()
+        context.close()
+
+    def test_reader_manifest_data_context_as_context_manager(self):
+        asset_bytes, manifest_bytes = self._make_detached_manifest()
+        context = Context()
+        with Reader(
+            "image/jpeg",
+            io.BytesIO(asset_bytes),
+            manifest_bytes,
+            context=context,
+        ) as reader:
+            self.assertEqual(reader._lifecycle_state, LifecycleState.ACTIVE)
+            self.assertTrue(reader.json())
+        self.assertEqual(reader._lifecycle_state, LifecycleState.CLOSED)
+        context.close()
+
+
 class TestBuilderWithContext(TestContextAPIs):
 
     def test_contextual_builder_with_default_context(self):