diff --git a/pyproject.toml b/pyproject.toml index 3929c759..6cf54fd1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "c2pa-python" -version = "0.35.1" +version = "0.35.2" requires-python = ">=3.10" description = "Python bindings for the C2PA Content Authenticity Initiative (CAI) library" readme = { file = "README.md", content-type = "text/markdown" } diff --git a/src/c2pa/c2pa.py b/src/c2pa/c2pa.py index 8b252bdc..07f12ed4 100644 --- a/src/c2pa/c2pa.py +++ b/src/c2pa/c2pa.py @@ -2263,6 +2263,9 @@ def _init_from_context(self, context, format_or_path, if not context.is_valid: raise C2paError("Context is not valid") + if manifest_data is not None and not isinstance(manifest_data, bytes): + raise TypeError(Reader._ERROR_MESSAGES['manifest_error']) + # Determine format and open stream supported = Reader.get_supported_mime_types() @@ -2303,10 +2306,6 @@ def _init_from_context(self, context, format_or_path, raise if manifest_data is not None: - if not isinstance(manifest_data, bytes): - raise TypeError( - Reader._ERROR_MESSAGES[ - 'manifest_error']) manifest_array = ( ctypes.c_ubyte * len(manifest_data)).from_buffer_copy(manifest_data) @@ -2322,14 +2321,15 @@ def _init_from_context(self, context, format_or_path, len(manifest_data), ) ) - # reader_ptr has been invalidated(consumed) else: # Consume reader with stream new_ptr = _lib.c2pa_reader_with_stream( reader_ptr, format_bytes, self._own_stream._stream, ) - # reader_ptr has been invalidated(consumed) + + # reader_ptr has been consumed by the FFI call. + reader_ptr = None self._handle = new_ptr diff --git a/tests/perf/baseline.json b/tests/perf/baseline.json index db034858..6908e7da 100644 --- a/tests/perf/baseline.json +++ b/tests/perf/baseline.json @@ -186,5 +186,10 @@ "peak_bytes": 3382176, "leaked_bytes": 3210242, "total_allocations": 111383 + }, + "reader_manifest_data_context": { + "peak_bytes": 7575548, + "leaked_bytes": 3399851, + "total_allocations": 1414270 } } \ No newline at end of file diff --git a/tests/perf/run_profile.py b/tests/perf/run_profile.py index 9b8d4651..6c0edc93 100644 --- a/tests/perf/run_profile.py +++ b/tests/perf/run_profile.py @@ -14,7 +14,10 @@ -leaks.html (--leaks), -temporary.html (--temporary-allocations) - Reads peak_bytes and leaked_bytes from the .bin via memray.FileReader - Compares against baseline.json (creates it on first run) -- Exits non-zero if any metric exceeds baseline * threshold +- Exits non-zero only if leaked_bytes exceeds baseline * threshold. peak_bytes + is reported (and any over-threshold drift noted) but never fails the run: it + is a high-water mark that swings with allocation timing on alloc-heavy + scenarios, so it is informational, not a gate. Usage: python -m tests.perf.run_profile [--update-baseline] @@ -194,9 +197,15 @@ def main() -> None: REPORTS_DIR.mkdir(parents=True, exist_ok=True) - baseline: dict = {} - if BASELINE_FILE.exists() and not args.update_baseline: - baseline = json.loads(BASELINE_FILE.read_text()) + # prior_baseline: the existing file, always loaded so a single-scenario + # update can preserve the other scenarios' entries when it rewrites the file. + prior_baseline: dict = {} + + # baseline: the subset used for the regression comparison below, which is + # suppressed when --update-baseline is set (because we are re-baselining). + if BASELINE_FILE.exists(): + prior_baseline = json.loads(BASELINE_FILE.read_text()) + baseline: dict = {} if args.update_baseline else prior_baseline results: dict = {} failures: list[str] = [] @@ -242,16 +251,25 @@ def main() -> None: if baseline and name in baseline: b = baseline[name] + # Only leaked_bytes gates the run. It is the leak signal and is + # stable run-to-run. peak_bytes is a high-water mark that swings + # with transient-allocation timing on alloc-heavy scenarios, + # so it is reported for visibility but doesn't fail the run. for metric in ("peak_bytes", "leaked_bytes"): current = metrics[metric] base = b.get(metric, 0) limit = base * THRESHOLD - if current > limit: - diff_pct = (current - base) / base * 100 if base else float("inf") - failures.append( - f"{name}.{metric}: {_fmt(current)} > baseline {_fmt(base)}" - f" (+{diff_pct:.1f}%, threshold {(THRESHOLD-1)*100:.0f}%)" - ) + if current <= limit: + continue + diff_pct = (current - base) / base * 100 if base else float("inf") + msg = ( + f"{name}.{metric}: {_fmt(current)} > baseline {_fmt(base)}" + f" (+{diff_pct:.1f}%, threshold {(THRESHOLD-1)*100:.0f}%)" + ) + if metric == "leaked_bytes": + failures.append(msg) + else: + print(f" note (informational): {msg}", flush=True) finally: if scenario_render_failed: # Keep the capture so the failed view can be re-rendered @@ -265,18 +283,40 @@ def main() -> None: else: bin_path.unlink(missing_ok=True) - if args.update_baseline or not baseline: + if args.update_baseline or not prior_baseline: # When running a single scenario, merge its result into the existing # baseline so the other scenarios' entries are preserved. A full run # replaces the file wholesale. - if args.scenario and baseline: - output = dict(baseline) + if args.scenario and prior_baseline: + output = dict(prior_baseline) else: output = {} - output["_meta"] = _build_meta() + new_meta = _build_meta() + # On a single-scenario merge the new entry must come from the same + # toolchain as the entries it is being merged next to, or the numbers + # are not comparable. Warn if _meta would change (e.g. wrong PERF_ENV, + # iteration count, or native version) instead of silently overwriting it. + if args.scenario and prior_baseline: + old_meta = prior_baseline.get("_meta", {}) + if old_meta and old_meta != new_meta: + diffs = sorted( + set(old_meta) | set(new_meta), + key=str, + ) + changed = [ + f"{k}: {old_meta.get(k)!r} -> {new_meta.get(k)!r}" + for k in diffs if old_meta.get(k) != new_meta.get(k) + ] + print( + "\nWARNING: this run's environment differs from the existing " + "baseline's _meta; the merged entry will NOT be comparable to " + "the other scenarios:\n " + "\n ".join(changed), + file=sys.stderr, + ) + output["_meta"] = new_meta output.update(results) BASELINE_FILE.write_text(json.dumps(output, indent=2)) - verb = "Updated" if baseline else "Created" + verb = "Updated" if prior_baseline else "Created" print(f"\n{verb} baseline: {BASELINE_FILE}") if render_failures: @@ -288,12 +328,14 @@ def main() -> None: "--temporary-allocations --temporary-allocation-threshold=10 --force", file=sys.stderr) if failures: - print("\nREGRESSIONS DETECTED:", file=sys.stderr) + print("\nLEAK REGRESSIONS DETECTED (leaked_bytes over baseline):", + file=sys.stderr) for f in failures: print(f" {f}", file=sys.stderr) sys.exit(1) - print("\nAll scenarios within baseline thresholds.") + print("\nAll scenarios within baseline leaked_bytes thresholds " + "(peak_bytes is informational only).") if __name__ == "__main__": diff --git a/tests/perf/scenarios.py b/tests/perf/scenarios.py index d2baae19..0e367fb0 100644 --- a/tests/perf/scenarios.py +++ b/tests/perf/scenarios.py @@ -693,6 +693,32 @@ def scenario_reader_jpeg_with_context(iterations: int = 100) -> None: _read_file_context(SIGNED_JPEG, "image/jpeg", iterations) +def scenario_reader_manifest_data_context(iterations: int = 100) -> None: + """Reader over a detached (sidecar) manifest with a Context. + + Exercises c2pa_reader_with_manifest_data_and_stream, the consume-and-swap + FFI path (reader_from_context handle is consumed and replaced each call). + The manifest is signed once outside the loop; each iteration re-reads the + same asset + detached manifest, so flat RSS confirms no per-iteration leak + in the consume-and-swap path. + """ + source_bytes = SOURCE_JPEG.read_bytes() + signer = _make_signer() + builder = Builder({**MANIFEST_BASE, "format": "image/jpeg"}) + builder.set_no_embed() + manifest_bytes = builder.sign( + signer, "image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) + builder.close() + signer.close() + + context = Context() + for _ in _iterate(iterations): + reader = Reader("image/jpeg", io.BytesIO(source_bytes), + manifest_data=manifest_bytes, context=context) + reader.json() + reader.close() + + # Parallel signing variants: one shared Context across 10 threads. # {split, full} x {pool, barrier} x {jpeg, png}. @@ -892,6 +918,7 @@ def scenario_fork_stream_cleanup(iterations: int = 100) -> None: SCENARIOS = { "reader_jpeg_legacy": scenario_reader_jpeg_legacy, "reader_jpeg_with_context": scenario_reader_jpeg_with_context, + "reader_manifest_data_context": scenario_reader_manifest_data_context, "reader_mp4": scenario_reader_mp4, "reader_wav": scenario_reader_wav, "builder_sign_jpeg_legacy": scenario_builder_sign_jpeg_legacy, diff --git a/tests/test_unit_tests.py b/tests/test_unit_tests.py index 0bb62451..e0b3289c 100644 --- a/tests/test_unit_tests.py +++ b/tests/test_unit_tests.py @@ -6041,6 +6041,119 @@ def test_with_fragment_with_dash_fixtures(self): context.close() +class TestSidecarReader(TestContextAPIs): + """Reader over a detached (sidecar) manifest with a Context""" + + def _make_detached_manifest(self): + """Sign DEFAULT_TEST_FILE with no-embed, return (asset_bytes, + manifest_bytes) where manifest_bytes is the "detached" sidecar + C2PA manifest.""" + + signer = self._ctx_make_signer() + with open(DEFAULT_TEST_FILE, "rb") as f: + asset_bytes = f.read() + builder = Builder(self.test_manifest) + builder.set_no_embed() + # Output is discarded: with no-embed the asset is unchanged and the + # manifest is returned by sign(). + manifest_bytes = builder.sign( + signer, "image/jpeg", io.BytesIO(asset_bytes), io.BytesIO()) + builder.close() + signer.close() + self.assertIsInstance(manifest_bytes, bytes) + self.assertGreater(len(manifest_bytes), 0) + return asset_bytes, manifest_bytes + + def test_reader_with_manifest_data_and_context(self): + asset_bytes, manifest_bytes = self._make_detached_manifest() + context = Context() + reader = Reader( + "image/jpeg", + io.BytesIO(asset_bytes), + manifest_bytes, + context=context, + ) + try: + data = reader.json() + self.assertTrue(data) + self.assertFalse(reader.is_embedded()) + self.assertIn("manifests", json.loads(data)) + finally: + reader.close() + context.close() + + def test_reader_manifest_data_context_invalid_manifest_raises(self): + with open(DEFAULT_TEST_FILE, "rb") as f: + asset_bytes = f.read() + context = Context() + reader = None + try: + with self.assertRaises(Error): + reader = Reader( + "image/jpeg", + io.BytesIO(asset_bytes), + b"not a real manifest", + context=context, + ) + # The consumed-pointer error branch must leave no usable handle. + if reader is not None: + self.assertEqual( + reader._lifecycle_state, LifecycleState.CLOSED) + self.assertIsNone(reader._handle) + finally: + if reader is not None: + reader.close() + context.close() + + def test_reader_manifest_data_context_wrong_type_raises(self): + with open(DEFAULT_TEST_FILE, "rb") as f: + asset_bytes = f.read() + context = Context() + try: + # Non-bytes manifest_data must raise TypeError before any native + # reader handle is allocated (no leak on this path). + with self.assertRaises(TypeError): + Reader( + "image/jpeg", + io.BytesIO(asset_bytes), + "manifest as str, not bytes", + context=context, + ) + finally: + context.close() + + def test_reader_manifest_data_context_use_after_close_raises(self): + asset_bytes, manifest_bytes = self._make_detached_manifest() + context = Context() + reader = Reader( + "image/jpeg", + io.BytesIO(asset_bytes), + manifest_bytes, + context=context, + ) + reader.close() + self.assertEqual(reader._lifecycle_state, LifecycleState.CLOSED) + with self.assertRaises(Error): + reader.json() + # Idempotent close after use-after-close attempt. + reader.close() + context.close() + + def test_reader_manifest_data_context_as_context_manager(self): + asset_bytes, manifest_bytes = self._make_detached_manifest() + context = Context() + with Reader( + "image/jpeg", + io.BytesIO(asset_bytes), + manifest_bytes, + context=context, + ) as reader: + self.assertEqual(reader._lifecycle_state, LifecycleState.ACTIVE) + self.assertTrue(reader.json()) + self.assertEqual(reader._lifecycle_state, LifecycleState.CLOSED) + context.close() + + class TestBuilderWithContext(TestContextAPIs): def test_contextual_builder_with_default_context(self):