Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "c2pa-python"
version = "0.35.1"
version = "0.35.2"
requires-python = ">=3.10"
description = "Python bindings for the C2PA Content Authenticity Initiative (CAI) library"
readme = { file = "README.md", content-type = "text/markdown" }
Expand Down
12 changes: 6 additions & 6 deletions src/c2pa/c2pa.py
Original file line number Diff line number Diff line change
Expand Up @@ -2263,6 +2263,9 @@ def _init_from_context(self, context, format_or_path,
if not context.is_valid:
raise C2paError("Context is not valid")

if manifest_data is not None and not isinstance(manifest_data, bytes):
raise TypeError(Reader._ERROR_MESSAGES['manifest_error'])

# Determine format and open stream
supported = Reader.get_supported_mime_types()

Expand Down Expand Up @@ -2303,10 +2306,6 @@ def _init_from_context(self, context, format_or_path,
raise

if manifest_data is not None:
if not isinstance(manifest_data, bytes):
raise TypeError(
Reader._ERROR_MESSAGES[
'manifest_error'])
manifest_array = (
ctypes.c_ubyte *
len(manifest_data)).from_buffer_copy(manifest_data)
Expand All @@ -2322,14 +2321,15 @@ def _init_from_context(self, context, format_or_path,
len(manifest_data),
)
)
# reader_ptr has been invalidated(consumed)
else:
# Consume reader with stream
new_ptr = _lib.c2pa_reader_with_stream(
reader_ptr, format_bytes,
self._own_stream._stream,
)
# reader_ptr has been invalidated(consumed)

# reader_ptr has been consumed by the FFI call.
reader_ptr = None

self._handle = new_ptr

Expand Down
5 changes: 5 additions & 0 deletions tests/perf/baseline.json
Original file line number Diff line number Diff line change
Expand Up @@ -186,5 +186,10 @@
"peak_bytes": 3382176,
"leaked_bytes": 3210242,
"total_allocations": 111383
},
"reader_manifest_data_context": {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

quite a leap here in peak_bytes and total_allocations, do you know why?

"peak_bytes": 7575548,
"leaked_bytes": 3399851,
"total_allocations": 1414270
}
}
76 changes: 59 additions & 17 deletions tests/perf/run_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
<name>-leaks.html (--leaks), <name>-temporary.html (--temporary-allocations)
- Reads peak_bytes and leaked_bytes from the .bin via memray.FileReader
- Compares against baseline.json (creates it on first run)
- Exits non-zero if any metric exceeds baseline * threshold
- Exits non-zero only if leaked_bytes exceeds baseline * threshold. peak_bytes
is reported (and any over-threshold drift noted) but never fails the run: it
is a high-water mark that swings with allocation timing on alloc-heavy
scenarios, so it is informational, not a gate.

Usage:
python -m tests.perf.run_profile [--update-baseline]
Expand Down Expand Up @@ -194,9 +197,15 @@ def main() -> None:

REPORTS_DIR.mkdir(parents=True, exist_ok=True)

baseline: dict = {}
if BASELINE_FILE.exists() and not args.update_baseline:
baseline = json.loads(BASELINE_FILE.read_text())
# prior_baseline: the existing file, always loaded so a single-scenario
# update can preserve the other scenarios' entries when it rewrites the file.
prior_baseline: dict = {}

# baseline: the subset used for the regression comparison below, which is
# suppressed when --update-baseline is set (because we are re-baselining).
if BASELINE_FILE.exists():
prior_baseline = json.loads(BASELINE_FILE.read_text())
baseline: dict = {} if args.update_baseline else prior_baseline

results: dict = {}
failures: list[str] = []
Expand Down Expand Up @@ -242,16 +251,25 @@ def main() -> None:

if baseline and name in baseline:
b = baseline[name]
# Only leaked_bytes gates the run. It is the leak signal and is

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this makes we wonder if we should expose an API for c_ffi leaked bytes. But I'm not sure exactly how that would work.

# stable run-to-run. peak_bytes is a high-water mark that swings
# with transient-allocation timing on alloc-heavy scenarios,
# so it is reported for visibility but doesn't fail the run.
for metric in ("peak_bytes", "leaked_bytes"):
current = metrics[metric]
base = b.get(metric, 0)
limit = base * THRESHOLD
if current > limit:
diff_pct = (current - base) / base * 100 if base else float("inf")
failures.append(
f"{name}.{metric}: {_fmt(current)} > baseline {_fmt(base)}"
f" (+{diff_pct:.1f}%, threshold {(THRESHOLD-1)*100:.0f}%)"
)
if current <= limit:
continue
diff_pct = (current - base) / base * 100 if base else float("inf")
msg = (
f"{name}.{metric}: {_fmt(current)} > baseline {_fmt(base)}"
f" (+{diff_pct:.1f}%, threshold {(THRESHOLD-1)*100:.0f}%)"
)
if metric == "leaked_bytes":
failures.append(msg)
else:
print(f" note (informational): {msg}", flush=True)
finally:
if scenario_render_failed:
# Keep the capture so the failed view can be re-rendered
Expand All @@ -265,18 +283,40 @@ def main() -> None:
else:
bin_path.unlink(missing_ok=True)

if args.update_baseline or not baseline:
if args.update_baseline or not prior_baseline:
# When running a single scenario, merge its result into the existing
# baseline so the other scenarios' entries are preserved. A full run
# replaces the file wholesale.
if args.scenario and baseline:
output = dict(baseline)
if args.scenario and prior_baseline:
output = dict(prior_baseline)
else:
output = {}
output["_meta"] = _build_meta()
new_meta = _build_meta()
# On a single-scenario merge the new entry must come from the same
# toolchain as the entries it is being merged next to, or the numbers
# are not comparable. Warn if _meta would change (e.g. wrong PERF_ENV,
# iteration count, or native version) instead of silently overwriting it.
if args.scenario and prior_baseline:
old_meta = prior_baseline.get("_meta", {})
if old_meta and old_meta != new_meta:
diffs = sorted(
set(old_meta) | set(new_meta),
key=str,
)
changed = [
f"{k}: {old_meta.get(k)!r} -> {new_meta.get(k)!r}"
for k in diffs if old_meta.get(k) != new_meta.get(k)
]
print(
"\nWARNING: this run's environment differs from the existing "
"baseline's _meta; the merged entry will NOT be comparable to "
"the other scenarios:\n " + "\n ".join(changed),
file=sys.stderr,
)
output["_meta"] = new_meta
output.update(results)
BASELINE_FILE.write_text(json.dumps(output, indent=2))
verb = "Updated" if baseline else "Created"
verb = "Updated" if prior_baseline else "Created"
print(f"\n{verb} baseline: {BASELINE_FILE}")

if render_failures:
Expand All @@ -288,12 +328,14 @@ def main() -> None:
"--temporary-allocations --temporary-allocation-threshold=10 --force", file=sys.stderr)

if failures:
print("\nREGRESSIONS DETECTED:", file=sys.stderr)
print("\nLEAK REGRESSIONS DETECTED (leaked_bytes over baseline):",
file=sys.stderr)
for f in failures:
print(f" {f}", file=sys.stderr)
sys.exit(1)

print("\nAll scenarios within baseline thresholds.")
print("\nAll scenarios within baseline leaked_bytes thresholds "
"(peak_bytes is informational only).")


if __name__ == "__main__":
Expand Down
27 changes: 27 additions & 0 deletions tests/perf/scenarios.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,32 @@ def scenario_reader_jpeg_with_context(iterations: int = 100) -> None:
_read_file_context(SIGNED_JPEG, "image/jpeg", iterations)


def scenario_reader_manifest_data_context(iterations: int = 100) -> None:
"""Reader over a detached (sidecar) manifest with a Context.

Exercises c2pa_reader_with_manifest_data_and_stream, the consume-and-swap
FFI path (reader_from_context handle is consumed and replaced each call).
The manifest is signed once outside the loop; each iteration re-reads the
same asset + detached manifest, so flat RSS confirms no per-iteration leak
in the consume-and-swap path.
"""
source_bytes = SOURCE_JPEG.read_bytes()
signer = _make_signer()
builder = Builder({**MANIFEST_BASE, "format": "image/jpeg"})
builder.set_no_embed()
manifest_bytes = builder.sign(
signer, "image/jpeg", io.BytesIO(source_bytes), io.BytesIO())
builder.close()
signer.close()

context = Context()
for _ in _iterate(iterations):
reader = Reader("image/jpeg", io.BytesIO(source_bytes),
manifest_data=manifest_bytes, context=context)
reader.json()
reader.close()


# Parallel signing variants: one shared Context across 10 threads.
# {split, full} x {pool, barrier} x {jpeg, png}.

Expand Down Expand Up @@ -892,6 +918,7 @@ def scenario_fork_stream_cleanup(iterations: int = 100) -> None:
SCENARIOS = {
"reader_jpeg_legacy": scenario_reader_jpeg_legacy,
"reader_jpeg_with_context": scenario_reader_jpeg_with_context,
"reader_manifest_data_context": scenario_reader_manifest_data_context,
"reader_mp4": scenario_reader_mp4,
"reader_wav": scenario_reader_wav,
"builder_sign_jpeg_legacy": scenario_builder_sign_jpeg_legacy,
Expand Down
113 changes: 113 additions & 0 deletions tests/test_unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -6041,6 +6041,119 @@ def test_with_fragment_with_dash_fixtures(self):
context.close()


class TestSidecarReader(TestContextAPIs):
"""Reader over a detached (sidecar) manifest with a Context"""

def _make_detached_manifest(self):
"""Sign DEFAULT_TEST_FILE with no-embed, return (asset_bytes,
manifest_bytes) where manifest_bytes is the "detached" sidecar
C2PA manifest."""

signer = self._ctx_make_signer()
with open(DEFAULT_TEST_FILE, "rb") as f:
asset_bytes = f.read()
builder = Builder(self.test_manifest)
builder.set_no_embed()
# Output is discarded: with no-embed the asset is unchanged and the
# manifest is returned by sign().
manifest_bytes = builder.sign(
signer, "image/jpeg", io.BytesIO(asset_bytes), io.BytesIO())
builder.close()
signer.close()
self.assertIsInstance(manifest_bytes, bytes)
self.assertGreater(len(manifest_bytes), 0)
return asset_bytes, manifest_bytes

def test_reader_with_manifest_data_and_context(self):
asset_bytes, manifest_bytes = self._make_detached_manifest()
context = Context()
reader = Reader(
"image/jpeg",
io.BytesIO(asset_bytes),
manifest_bytes,
context=context,
)
try:
data = reader.json()
self.assertTrue(data)
self.assertFalse(reader.is_embedded())
self.assertIn("manifests", json.loads(data))
finally:
reader.close()
context.close()

def test_reader_manifest_data_context_invalid_manifest_raises(self):
with open(DEFAULT_TEST_FILE, "rb") as f:
asset_bytes = f.read()
context = Context()
reader = None
try:
with self.assertRaises(Error):
reader = Reader(
"image/jpeg",
io.BytesIO(asset_bytes),
b"not a real manifest",
context=context,
)
# The consumed-pointer error branch must leave no usable handle.
if reader is not None:
self.assertEqual(
reader._lifecycle_state, LifecycleState.CLOSED)
self.assertIsNone(reader._handle)
finally:
if reader is not None:
reader.close()
context.close()

def test_reader_manifest_data_context_wrong_type_raises(self):
with open(DEFAULT_TEST_FILE, "rb") as f:
asset_bytes = f.read()
context = Context()
try:
# Non-bytes manifest_data must raise TypeError before any native
# reader handle is allocated (no leak on this path).
with self.assertRaises(TypeError):
Reader(
"image/jpeg",
io.BytesIO(asset_bytes),
"manifest as str, not bytes",
context=context,
)
finally:
context.close()

def test_reader_manifest_data_context_use_after_close_raises(self):
asset_bytes, manifest_bytes = self._make_detached_manifest()
context = Context()
reader = Reader(
"image/jpeg",
io.BytesIO(asset_bytes),
manifest_bytes,
context=context,
)
reader.close()
self.assertEqual(reader._lifecycle_state, LifecycleState.CLOSED)
with self.assertRaises(Error):
reader.json()
# Idempotent close after use-after-close attempt.
reader.close()
context.close()

def test_reader_manifest_data_context_as_context_manager(self):
asset_bytes, manifest_bytes = self._make_detached_manifest()
context = Context()
with Reader(
"image/jpeg",
io.BytesIO(asset_bytes),
manifest_bytes,
context=context,
) as reader:
self.assertEqual(reader._lifecycle_state, LifecycleState.ACTIVE)
self.assertTrue(reader.json())
self.assertEqual(reader._lifecycle_state, LifecycleState.CLOSED)
context.close()


class TestBuilderWithContext(TestContextAPIs):

def test_contextual_builder_with_default_context(self):
Expand Down
Loading