diff --git a/.github/scripts/aggregate_recursion_histogram.py b/.github/scripts/aggregate_recursion_histogram.py new file mode 100755 index 000000000..8a12dc05e --- /dev/null +++ b/.github/scripts/aggregate_recursion_histogram.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +"""Format the recursion-guest per-function profile as a Markdown PR comment. + +`test_recursion_pc_histogram` prints a per-function summary table (cycles folded +over each function's PCs, computed across the *full* histogram) followed by a +per-address detail table. We extract the per-function table — the view that +shows where the cycles actually go — and render it as Markdown. + + Top 25 functions by cycle count (aggregated over their PCs): + rank cycles % cum % PCs function (file:line) + 1 5335072 24.95% 24.95% 72 <...>::visit_seq::<...> + +Reads the test's captured output from argv[1]; writes the Markdown body to +argv[2] (or stdout). +""" + +import re +import sys + +# A per-function summary row: rank, cycles, pct%, cum%, pcs, function. +# Distinguished from the per-PC detail rows by the absence of a 0x column. +FN_ROW = re.compile( + r"^\s*\d+\s+(\d+)\s+([\d.]+)%\s+([\d.]+)%\s+(\d+)\s+(.*\S)\s*$" +) +FN_TABLE_START = re.compile(r"Top \d+ functions by cycle count") +PC_TABLE_START = re.compile(r"Top \d+ PCs by cycle count") +TOTAL_CYCLES = re.compile(r"Total cycles\s*:\s*(\d+)") +UNIQUE_PCS = re.compile(r"Unique PCs\s*:\s*(\d+)") +EXEC_TIME = re.compile(r"Exec time\s*:\s*(\S+)") + + +def parse(text): + total_cycles = unique_pcs = exec_time = None + rows = [] + in_fn_table = False + for line in text.splitlines(): + if total_cycles is None and (m := TOTAL_CYCLES.search(line)): + total_cycles = int(m.group(1)) + if unique_pcs is None and (m := UNIQUE_PCS.search(line)): + unique_pcs = int(m.group(1)) + if exec_time is None and (m := EXEC_TIME.search(line)): + exec_time = m.group(1) + if FN_TABLE_START.search(line): + in_fn_table = True + continue + if PC_TABLE_START.search(line): + in_fn_table = False + continue + if in_fn_table and (m := FN_ROW.match(line)): + rows.append( + { + "cycles": int(m.group(1)), + "pct": m.group(2), + "cum": m.group(3), + "pcs": int(m.group(4)), + "fn": m.group(5), + } + ) + return total_cycles, unique_pcs, exec_time, rows + + +def short(name, width=90): + return name if len(name) <= width else name[: width - 1] + "…" + + +def render(total_cycles, unique_pcs, exec_time, rows, title="Recursion guest profile"): + if not rows: + return ( + f"### {title}\n\n" + "> ⚠️ No per-function rows found in the test output — the run may " + "have failed before printing the table. Check the workflow logs.\n" + ) + + body = f"### {title}\n\n" + if total_cycles is not None: + body += f"**Total cycles:** {total_cycles:,}" + if unique_pcs is not None: + body += f" · **Unique PCs:** {unique_pcs:,}" + if exec_time: + body += f" · **Exec time:** {exec_time}" + body += "\n\n" + + body += f"#### Top {len(rows)} functions by cycles (folded over their PCs)\n\n" + body += "| Rank | Cycles | % | Cum % | PCs | Function |\n" + body += "|-----:|-------:|--:|------:|----:|----------|\n" + for i, r in enumerate(rows, 1): + body += ( + f"| {i} | {r['cycles']:,} | {r['pct']}% | {r['cum']}% | " + f"{r['pcs']} | `{short(r['fn'])}` |\n" + ) + + last_cum = rows[-1]["cum"] + body += ( + f"\nEach function's cycles are summed over all its program counters " + f"across the full histogram; the top {len(rows)} cover {last_cum}% of total " + f"cycles. Percentages are of total cycles.\n" + ) + return body + + +def main(): + import argparse + + ap = argparse.ArgumentParser(description=__doc__) + ap.add_argument("log", help="captured test output to parse") + ap.add_argument("-o", "--out", help="write Markdown here instead of stdout") + ap.add_argument( + "-t", + "--title", + default="Recursion guest profile", + help="section heading (e.g. the test/config name)", + ) + args = ap.parse_args() + + with open(args.log, "r", errors="replace") as f: + text = f.read() + body = render(*parse(text), title=args.title) + if args.out: + with open(args.out, "w") as f: + f.write(body) + else: + sys.stdout.write(body) + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/profile-recursion.yml b/.github/workflows/profile-recursion.yml new file mode 100644 index 000000000..707b9dd6a --- /dev/null +++ b/.github/workflows/profile-recursion.yml @@ -0,0 +1,177 @@ +name: Profile Recursion (PR) + +# Runs the recursion-guest PC histogram diagnostics (single-query and +# multi-query, in parallel via a matrix) and posts a combined per-function +# profile as a PR comment. Triggered by a `/profile_recursion` comment from a +# repo member, or manually via workflow_dispatch. + +on: + workflow_dispatch: + issue_comment: + types: [created] + +permissions: + contents: read + pull-requests: write + +concurrency: + group: profile-recursion-${{ github.event.issue.number || github.run_id }} + cancel-in-progress: true + +jobs: + # One job per configuration; they run in parallel and each uploads a Markdown + # fragment artifact. The `comment` job stitches them into one PR comment. + profile: + # Skip unless: workflow_dispatch, or "/profile_recursion" comment on a PR by a member. + if: >- + github.event_name == 'workflow_dispatch' || + (github.event_name == 'issue_comment' && + github.event.issue.pull_request && + startsWith(github.event.comment.body, '/profile_recursion') && + contains(fromJSON('["MEMBER","OWNER","COLLABORATOR"]'), github.event.comment.author_association)) + runs-on: [self-hosted, bench] + timeout-minutes: 90 + strategy: + fail-fast: false + matrix: + include: + - name: single-query + test: test_recursion_pc_histogram + title: "Single query (blowup=2, 1 query)" + - name: multi-query + test: test_recursion_pc_histogram_multiquery + title: "Multi query (blowup=8, 128-bit)" + steps: + - name: React to comment + if: github.event_name == 'issue_comment' && matrix.name == 'single-query' + uses: actions/github-script@v7 + with: + script: | + await github.rest.reactions.createForIssueComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: context.payload.comment.id, + content: 'eyes' + }); + + - name: Get PR head ref + id: pr-ref + if: github.event_name == 'issue_comment' + env: + GH_TOKEN: ${{ github.token }} + PR_NUM: ${{ github.event.issue.number }} + run: | + SHA=$(gh pr view "$PR_NUM" --repo "$GITHUB_REPOSITORY" --json headRefOid -q .headRefOid) + echo "sha=$SHA" >> "$GITHUB_OUTPUT" + + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ steps.pr-ref.outputs.sha || github.sha }} + + - name: Setup Rust Environment + uses: ./.github/actions/setup-rust + + - name: Add cargo to PATH + run: echo "$HOME/.cargo/bin" >> "$GITHUB_PATH" + + - name: Run recursion PC histogram (${{ matrix.name }}) + env: + TEST: ${{ matrix.test }} + run: | + # Self-provision the RISC-V sysroot in a user-writable dir (the default + # /opt path on the bench runner is root-owned); the guest ELF build the + # test triggers picks this up via the Makefile's `SYSROOT_DIR ?=`. + export SYSROOT_DIR="$HOME/.lambda-vm-sysroot" + set -o pipefail + # The test is #[ignore]d and prints the histogram to stderr. + cargo test --package lambda-vm-prover --lib "$TEST" \ + -- --ignored --nocapture 2>&1 | tee /tmp/hist.log + + - name: Aggregate into a per-function fragment + if: always() + env: + TITLE: ${{ matrix.title }} + run: | + python3 .github/scripts/aggregate_recursion_histogram.py \ + /tmp/hist.log --title "$TITLE" --out "/tmp/fragment-${{ matrix.name }}.md" + cat "/tmp/fragment-${{ matrix.name }}.md" >> "$GITHUB_STEP_SUMMARY" + + - name: Upload fragment + if: always() + uses: actions/upload-artifact@v4 + with: + name: profile-fragment-${{ matrix.name }} + path: /tmp/fragment-${{ matrix.name }}.md + retention-days: 7 + + # Stitch the matrix fragments into a single PR comment. + comment: + needs: profile + if: always() && github.event_name == 'issue_comment' + runs-on: [self-hosted, bench] + steps: + - name: Get PR head ref + id: pr-ref + env: + GH_TOKEN: ${{ github.token }} + PR_NUM: ${{ github.event.issue.number }} + run: | + SHA=$(gh pr view "$PR_NUM" --repo "$GITHUB_REPOSITORY" --json headRefOid -q .headRefOid) + echo "sha=$SHA" >> "$GITHUB_OUTPUT" + + - name: Download fragments + uses: actions/download-artifact@v4 + with: + path: fragments + pattern: profile-fragment-* + merge-multiple: true + + - name: Assemble comment body + env: + COMMIT_SHA: ${{ steps.pr-ref.outputs.sha }} + run: | + { + echo "## Recursion guest profile" + echo + # Single-query first, then multi-query, then any others. + for frag in fragments/fragment-single-query.md \ + fragments/fragment-multi-query.md; do + [ -f "$frag" ] && { cat "$frag"; echo; } + done + echo "Commit: ${COMMIT_SHA:0:8} · Runner: self-hosted bench" + } > /tmp/profile_comment.md + cat /tmp/profile_comment.md + + - name: Comment on PR + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + const body = fs.readFileSync('/tmp/profile_comment.md', 'utf8'); + + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + // Reuse our own marker comment so repeated /profile_recursion runs update in place. + const existing = comments.find(c => + c.user.type === 'Bot' && + c.body.includes('Recursion guest profile') + ); + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body, + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body, + }); + } diff --git a/Cargo.lock b/Cargo.lock index da2929c9d..4be069a65 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,28 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "addr2line" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efe1709241908a54ef1925c6018f41d3f523d0cfe174719761eb39e7b7bf086a" +dependencies = [ + "cpp_demangle", + "fallible-iterator", + "gimli", + "memmap2", + "object", + "rustc-demangle", + "smallvec", + "typed-arena", +] + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + [[package]] name = "ahash" version = "0.8.12" @@ -230,6 +252,15 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" +[[package]] +name = "atomic-polyfill" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cf2bce30dfe09ef0bfaef228b9d414faaf7e563035494d7fe092dba54b300f4" +dependencies = [ + "critical-section", +] + [[package]] name = "atty" version = "0.2.14" @@ -465,7 +496,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" dependencies = [ "ciborium-io", - "half 2.7.1", + "half", ] [[package]] @@ -543,6 +574,15 @@ dependencies = [ "tikv-jemallocator", ] +[[package]] +name = "cobs" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1" +dependencies = [ + "thiserror", +] + [[package]] name = "colorchoice" version = "1.0.4" @@ -590,6 +630,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "cpp_demangle" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0667304c32ea56cb4cd6d2d7c0cfe9a2f8041229db8c033af7f8d69492429def" +dependencies = [ + "cfg-if", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -668,6 +717,12 @@ dependencies = [ "itertools 0.10.5", ] +[[package]] +name = "critical-section" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b" + [[package]] name = "crossbeam" version = "0.8.4" @@ -934,6 +989,18 @@ dependencies = [ "zeroize", ] +[[package]] +name = "embedded-io" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" + +[[package]] +name = "embedded-io" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" + [[package]] name = "enum-ordinalize" version = "4.3.2" @@ -1046,7 +1113,7 @@ dependencies = [ "serde", "serde_json", "sha2", - "thiserror 2.0.17", + "thiserror", "tracing", ] @@ -1069,7 +1136,7 @@ dependencies = [ "ripemd", "secp256k1", "sha2", - "thiserror 2.0.17", + "thiserror", "tiny-keccak", ] @@ -1089,7 +1156,7 @@ dependencies = [ "rkyv", "serde", "serde_with", - "thiserror 2.0.17", + "thiserror", ] [[package]] @@ -1107,7 +1174,7 @@ dependencies = [ "secp256k1", "serde", "serde_with", - "thiserror 2.0.17", + "thiserror", "tracing", ] @@ -1126,7 +1193,7 @@ dependencies = [ "rustc-hash", "serde", "strum", - "thiserror 2.0.17", + "thiserror", ] [[package]] @@ -1136,7 +1203,7 @@ source = "git+https://github.com/lambdaclass/ethrex.git?rev=156cb8d6a3974f411d71 dependencies = [ "bytes", "ethereum-types", - "thiserror 2.0.17", + "thiserror", ] [[package]] @@ -1155,7 +1222,7 @@ dependencies = [ "rkyv", "rustc-hash", "serde", - "thiserror 2.0.17", + "thiserror", ] [[package]] @@ -1173,7 +1240,7 @@ dependencies = [ "rayon", "rustc-hash", "serde", - "thiserror 2.0.17", + "thiserror", "tracing", ] @@ -1183,14 +1250,21 @@ version = "0.1.0" dependencies = [ "ecsm", "ethrex-guest-program", + "hashbrown 0.14.5", "rkyv", "rustc-demangle", "serde", "serde_json", - "thiserror 1.0.69", + "thiserror", "tiny-keccak", ] +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + [[package]] name = "fastrand" version = "2.3.0" @@ -1226,6 +1300,16 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "fnv" version = "1.0.7" @@ -1286,6 +1370,15 @@ dependencies = [ "wasip2", ] +[[package]] +name = "gimli" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1033caf0b349c518623b5396bfb2cf0bddf44f0306d543a250e5743297aafd10" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "group" version = "0.13.0" @@ -1297,12 +1390,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "half" -version = "1.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" - [[package]] name = "half" version = "2.7.1" @@ -1314,12 +1401,30 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "hash32" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67" +dependencies = [ + "byteorder", +] + [[package]] name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", +] + [[package]] name = "hashbrown" version = "0.15.5" @@ -1347,6 +1452,20 @@ version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" +[[package]] +name = "heapless" +version = "0.7.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdc6457c0eb62c71aac4bc17216026d8410337c4126773b9c5daba343f17964f" +dependencies = [ + "atomic-polyfill", + "hash32", + "rustc_version", + "serde", + "spin", + "stable_deref_trait", +] + [[package]] name = "heck" version = "0.5.0" @@ -1619,14 +1738,17 @@ dependencies = [ name = "lambda-vm-prover" version = "0.1.0" dependencies = [ + "addr2line", "bincode", "criterion 0.5.1", "crypto", "ecsm", "env_logger", "executor", + "hashbrown 0.14.5", "log", "math", + "postcard", "rayon", "serde", "sha3", @@ -1699,6 +1821,15 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + [[package]] name = "log" version = "0.4.29" @@ -1814,6 +1945,32 @@ dependencies = [ "libc", ] +[[package]] +name = "minicbor" +version = "2.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b7a5041e12946f8b7d3f5a9d96383a19d694b9335457c522be7815b9abafb02" + +[[package]] +name = "minicbor-serde" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "293c7245401f035e2dcc4b12ebdb5c9d8847247fc79fe1b5b0a0d58d7275324c" +dependencies = [ + "minicbor", + "serde", +] + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + [[package]] name = "munge" version = "0.4.7" @@ -1886,6 +2043,17 @@ dependencies = [ "autocfg", ] +[[package]] +name = "object" +version = "0.39.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e5a6c098c7a3b6547378093f5cc30bc54fd361ce711e05293a5cc589562739b" +dependencies = [ + "flate2", + "memchr", + "ruzstd", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -2030,6 +2198,19 @@ dependencies = [ "portable-atomic", ] +[[package]] +name = "postcard" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24" +dependencies = [ + "cobs", + "embedded-io 0.4.0", + "embedded-io 0.6.1", + "heapless", + "serde", +] + [[package]] name = "powerfmt" version = "0.2.0" @@ -2383,6 +2564,15 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e75f6a532d0fd9f7f13144f392b6ad56a32696bfcd9c78f797f16bbb6f072d6" +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + [[package]] name = "rustix" version = "1.1.3" @@ -2414,6 +2604,15 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "ruzstd" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7c1c839d570d835527c9a5e4db7cb2198683a988cb9d7293fc8674e6bd58fc8" +dependencies = [ + "twox-hash", +] + [[package]] name = "ryu" version = "1.0.21" @@ -2462,6 +2661,12 @@ dependencies = [ "serde_json", ] +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "sec1" version = "0.7.3" @@ -2496,6 +2701,12 @@ dependencies = [ "cc", ] +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + [[package]] name = "serde" version = "1.0.228" @@ -2517,16 +2728,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "serde_cbor" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" -dependencies = [ - "half 1.8.3", - "serde", -] - [[package]] name = "serde_core" version = "1.0.228" @@ -2637,12 +2838,33 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + [[package]] name = "simdutf8" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" +[[package]] +name = "smallvec" +version = "1.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ed6a63f02c8539c91a8685a86f4099661ba3da017932f6ebbea6de3f0fa7c90" + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + [[package]] name = "spki" version = "0.7.3" @@ -2653,6 +2875,12 @@ dependencies = [ "der", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + [[package]] name = "stark" version = "0.1.0" @@ -2661,22 +2889,23 @@ dependencies = [ "criterion 0.4.0", "crypto", "env_logger", + "hashbrown 0.14.5", "itertools 0.11.0", "libc", + "libm", "log", "math", "math-cuda", "memmap2", + "minicbor-serde", "rand 0.8.5", "rand_chacha 0.3.1", "rayon", "serde", "serde-wasm-bindgen", - "serde_cbor", "sha3", "tempfile", "test-log", - "thiserror 1.0.69", "wasm-bindgen", "web-sys", ] @@ -2791,33 +3020,13 @@ version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057" -[[package]] -name = "thiserror" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" -dependencies = [ - "thiserror-impl 1.0.69", -] - [[package]] name = "thiserror" version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" dependencies = [ - "thiserror-impl 2.0.17", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" -dependencies = [ - "proc-macro2", - "quote", - "syn", + "thiserror-impl", ] [[package]] @@ -3027,6 +3236,18 @@ dependencies = [ "tracing-log", ] +[[package]] +name = "twox-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" + +[[package]] +name = "typed-arena" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" + [[package]] name = "typenum" version = "1.19.0" diff --git a/bench_vs/build_recursion_elfs.sh b/bench_vs/build_recursion_elfs.sh new file mode 100755 index 000000000..a529b4bbe --- /dev/null +++ b/bench_vs/build_recursion_elfs.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# Build the fibonacci-bench and recursion-bench ELFs for the recursion smoke test. +# +# Uses the same toolchain + flags as bench_vs/run.sh, plus pins serde to the last +# pre-`serde_core`-split version (1.0.219) inside each guest's own workspace lock +# so build-std works on the riscv64im-lambda-vm-elf target. +set -euo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +ROOT_DIR="$(cd -- "$SCRIPT_DIR/.." &>/dev/null && pwd)" +TARGET_SPEC="$ROOT_DIR/executor/programs/riscv64im-lambda-vm-elf.json" + +TOOLCHAIN="nightly-2026-02-01" + +build_one() { + local name="$1" + local dir="$ROOT_DIR/bench_vs/lambda/$name" + echo "[recursion-elfs] building $name ..." + ( + cd "$dir" + # Recursion/deserialize-only guests pull in lambda-vm-prover and its + # serde stack; pin serde to 1.0.219 (pre-`serde_core` split) so + # `-Z build-std=core,alloc` works. + if [ "$name" = "recursion" ] || [ "$name" = "deserialize-only" ]; then + cargo "+$TOOLCHAIN" update -p serde --precise 1.0.219 2>/dev/null || true + fi + cargo "+$TOOLCHAIN" build --release \ + --target "$TARGET_SPEC" \ + -Z build-std=core,alloc \ + -Z build-std-features=compiler-builtins-mem \ + -Z json-target-spec + ) +} + +build_one empty +build_one fibonacci +build_one recursion +build_one deserialize-only + +echo "[recursion-elfs] done" diff --git a/bench_vs/lambda/deserialize-only/.cargo/config.toml b/bench_vs/lambda/deserialize-only/.cargo/config.toml new file mode 100644 index 000000000..be730c3ec --- /dev/null +++ b/bench_vs/lambda/deserialize-only/.cargo/config.toml @@ -0,0 +1,6 @@ +[target.riscv64im-lambda-vm-elf] +rustflags = [ + "-C", "link-arg=-e", + "-C", "link-arg=main", + "-C", "passes=lower-atomic" +] diff --git a/bench_vs/lambda/deserialize-only/Cargo.toml b/bench_vs/lambda/deserialize-only/Cargo.toml new file mode 100644 index 000000000..b4a4616f4 --- /dev/null +++ b/bench_vs/lambda/deserialize-only/Cargo.toml @@ -0,0 +1,13 @@ +[workspace] + +[package] +name = "deserialize-only-bench" +version = "0.1.0" +edition = "2024" + +[dependencies] +lambda-vm-prover = { path = "../../../prover", default-features = false } +embedded-alloc = "0.6" +riscv = { version = "0.15", features = ["critical-section-single-hart"] } +serde = { version = "=1.0.219", default-features = false, features = ["derive", "alloc"] } +postcard = { version = "1.0", default-features = false, features = ["alloc"] } diff --git a/bench_vs/lambda/deserialize-only/src/main.rs b/bench_vs/lambda/deserialize-only/src/main.rs new file mode 100644 index 000000000..e2cecc938 --- /dev/null +++ b/bench_vs/lambda/deserialize-only/src/main.rs @@ -0,0 +1,94 @@ +//! Deserialize-only counterpart to the recursion guest. +//! +//! Reads the same private-input blob as `recursion-bench`, postcard-decodes +//! `(VmProof, Vec, ProofOptions, VmVerifyingKey)`, then commits success +//! and halts — without ever calling `verify_with_options`. The cycle delta +//! between this guest and `recursion-bench` is the actual cost of the STARK +//! verifier inside the VM (everything else being equal). + +#![no_std] +#![no_main] + +extern crate alloc; + +use alloc::vec::Vec; +use core::arch::asm; +use core::panic::PanicInfo; + +use embedded_alloc::TlsfHeap as Heap; +use lambda_vm_prover::{ProofOptions, VmProof, VmVerifyingKey}; +// Required to pull in the riscv crate's critical-section implementation. +use riscv as _; + +const PRIVATE_INPUT_START: usize = 0xFF000000; +const SYSCALL_COMMIT: u64 = 64; +const SYSCALL_HALT: u64 = 93; +const MAX_MEMORY_SIZE: usize = 0xC000_0000; + +#[global_allocator] +static HEAP: Heap = Heap::empty(); + +#[panic_handler] +fn panic(_info: &PanicInfo) -> ! { + loop {} +} + +fn init_allocator() { + unsafe extern "C" { + static _end: u8; + } + let heap_pos = (&raw const _end) as usize; + unsafe { HEAP.init(heap_pos, MAX_MEMORY_SIZE - heap_pos) } +} + +fn read_private_input() -> &'static [u8] { + let len = unsafe { core::ptr::read_volatile(PRIVATE_INPUT_START as *const u32) } as usize; + let data = (PRIVATE_INPUT_START + 4) as *const u8; + unsafe { core::slice::from_raw_parts(data, len) } +} + +fn commit(bytes: &[u8]) { + unsafe { + asm!( + "ecall", + in("a0") 1u64, + in("a1") bytes.as_ptr(), + in("a2") bytes.len(), + in("a7") SYSCALL_COMMIT, + ); + } +} + +fn halt() -> ! { + unsafe { + asm!( + "ecall", + in("a0") 0u64, + in("a7") SYSCALL_HALT, + options(noreturn), + ); + } +} + +#[unsafe(no_mangle)] +pub fn main() -> ! { + init_allocator(); + + let blob = read_private_input(); + let decoded: (VmProof, Vec, ProofOptions, VmVerifyingKey) = + postcard::from_bytes(blob).expect("failed to deserialize"); + + // Force the commit byte to depend on the actually-decoded value. Without + // this, LLVM at -O3 was eliding the postcard decode entirely — the only + // sinks for `decoded` were `black_box(&decoded)` (which only forces the + // *reference* to materialize, not the pointee) and `Drop`, neither of + // which require the decoded bytes to be real. With the commit byte tied + // to a deep field of the decoded value, the decode has to run. + let proof_options_byte = decoded.2.blowup_factor; + let inner_elf_byte = *decoded.1.first().unwrap_or(&0); + let vkey_byte = decoded.3.bitwise[0]; + let marker = proof_options_byte ^ inner_elf_byte ^ vkey_byte; + + commit(&[marker]); + halt() +} diff --git a/bench_vs/lambda/empty/.cargo/config.toml b/bench_vs/lambda/empty/.cargo/config.toml new file mode 100644 index 000000000..be730c3ec --- /dev/null +++ b/bench_vs/lambda/empty/.cargo/config.toml @@ -0,0 +1,6 @@ +[target.riscv64im-lambda-vm-elf] +rustflags = [ + "-C", "link-arg=-e", + "-C", "link-arg=main", + "-C", "passes=lower-atomic" +] diff --git a/bench_vs/lambda/empty/Cargo.lock b/bench_vs/lambda/empty/Cargo.lock new file mode 100644 index 000000000..11dcd8cb1 --- /dev/null +++ b/bench_vs/lambda/empty/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "empty-bench" +version = "0.1.0" diff --git a/bench_vs/lambda/empty/Cargo.toml b/bench_vs/lambda/empty/Cargo.toml new file mode 100644 index 000000000..a6e4a0530 --- /dev/null +++ b/bench_vs/lambda/empty/Cargo.toml @@ -0,0 +1,8 @@ +[workspace] + +[package] +name = "empty-bench" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/bench_vs/lambda/empty/src/main.rs b/bench_vs/lambda/empty/src/main.rs new file mode 100644 index 000000000..555cae897 --- /dev/null +++ b/bench_vs/lambda/empty/src/main.rs @@ -0,0 +1,28 @@ +#![no_std] +#![no_main] + +use core::arch::asm; +use core::panic::PanicInfo; + +const SYSCALL_HALT: u64 = 93; + +#[panic_handler] +fn panic(_info: &PanicInfo) -> ! { + loop {} +} + +fn halt() -> ! { + unsafe { + asm!( + "ecall", + in("a0") 0u64, + in("a7") SYSCALL_HALT, + options(noreturn), + ); + } +} + +#[unsafe(no_mangle)] +pub fn main() -> ! { + halt() +} diff --git a/bench_vs/lambda/recursion/.cargo/config.toml b/bench_vs/lambda/recursion/.cargo/config.toml new file mode 100644 index 000000000..be730c3ec --- /dev/null +++ b/bench_vs/lambda/recursion/.cargo/config.toml @@ -0,0 +1,6 @@ +[target.riscv64im-lambda-vm-elf] +rustflags = [ + "-C", "link-arg=-e", + "-C", "link-arg=main", + "-C", "passes=lower-atomic" +] diff --git a/bench_vs/lambda/recursion/Cargo.lock b/bench_vs/lambda/recursion/Cargo.lock new file mode 100644 index 000000000..c19590031 --- /dev/null +++ b/bench_vs/lambda/recursion/Cargo.lock @@ -0,0 +1,796 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "base16ct" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" + +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "bumpalo" +version = "3.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cobs" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1" +dependencies = [ + "thiserror", +] + +[[package]] +name = "const-default" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b396d1f76d455557e1218ec8066ae14bba60b4b36ecd55577ba979f5db7ecaa" + +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "critical-section" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b" + +[[package]] +name = "crypto" +version = "0.1.0" +dependencies = [ + "digest", + "math", + "rand", + "rand_chacha", + "serde", + "sha3", +] + +[[package]] +name = "crypto-bigint" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" +dependencies = [ + "generic-array", + "rand_core", + "subtle", + "zeroize", +] + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "der" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" +dependencies = [ + "const-oid", + "zeroize", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "ecsm" +version = "0.1.0" +dependencies = [ + "k256", + "num-bigint", + "num-traits", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "elliptic-curve" +version = "0.13.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47" +dependencies = [ + "base16ct", + "crypto-bigint", + "ff", + "generic-array", + "group", + "rand_core", + "sec1", + "subtle", + "zeroize", +] + +[[package]] +name = "embedded-alloc" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f2de9133f68db0d4627ad69db767726c99ff8585272716708227008d3f1bddd" +dependencies = [ + "const-default", + "critical-section", + "linked_list_allocator", + "rlsf", +] + +[[package]] +name = "embedded-hal" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "361a90feb7004eca4019fb28352a9465666b24f840f5c3cddf0ff13920590b89" + +[[package]] +name = "embedded-io" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" + +[[package]] +name = "embedded-io" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" + +[[package]] +name = "executor" +version = "0.1.0" +dependencies = [ + "ecsm", + "hashbrown", + "thiserror", +] + +[[package]] +name = "ff" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393" +dependencies = [ + "rand_core", + "subtle", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-core", + "futures-task", + "pin-project-lite", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", + "zeroize", +] + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi", + "wasm-bindgen", +] + +[[package]] +name = "group" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63" +dependencies = [ + "ff", + "rand_core", + "subtle", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", +] + +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + +[[package]] +name = "js-sys" +version = "0.3.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67df7112613f8bfd9150013a0314e196f4800d3201ae742489d999db2f979f08" +dependencies = [ + "cfg-if", + "futures-util", + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "k256" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6e3919bbaa2945715f0bb6d3934a173d1e9a59ac23767fbaaef277265a7411b" +dependencies = [ + "cfg-if", + "elliptic-curve", +] + +[[package]] +name = "keccak" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb26cec98cce3a3d96cbb7bced3c4b16e3d13f27ec56dbd62cbc8f39cfb9d653" +dependencies = [ + "cpufeatures", +] + +[[package]] +name = "lambda-vm-prover" +version = "0.1.0" +dependencies = [ + "crypto", + "ecsm", + "executor", + "hashbrown", + "log", + "math", + "postcard", + "serde", + "sha3", + "stark", +] + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + +[[package]] +name = "linked_list_allocator" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b23ac50abb8261cb38c6e2a7192d3302e0836dac1628f6a93b82b4fad185897" + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "math" +version = "0.1.0" +dependencies = [ + "getrandom", + "num-bigint", + "num-traits", + "rand", + "serde", +] + +[[package]] +name = "minicbor" +version = "2.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b7a5041e12946f8b7d3f5a9d96383a19d694b9335457c522be7815b9abafb02" + +[[package]] +name = "minicbor-serde" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "293c7245401f035e2dcc4b12ebdb5c9d8847247fc79fe1b5b0a0d58d7275324c" +dependencies = [ + "minicbor", + "serde", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "postcard" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24" +dependencies = [ + "cobs", + "embedded-io 0.4.0", + "embedded-io 0.6.1", + "serde", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" + +[[package]] +name = "recursion-bench" +version = "0.1.0" +dependencies = [ + "embedded-alloc", + "lambda-vm-prover", + "postcard", + "riscv", + "serde", +] + +[[package]] +name = "riscv" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05cfa3f7b30c84536a9025150d44d26b8e1cc20ddf436448d74cd9591eefb25" +dependencies = [ + "critical-section", + "embedded-hal", + "paste", + "riscv-macros", + "riscv-pac", +] + +[[package]] +name = "riscv-macros" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d323d13972c1b104aa036bc692cd08b822c8bbf23d79a27c526095856499799" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "riscv-pac" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8188909339ccc0c68cfb5a04648313f09621e8b87dc03095454f1a11f6c5d436" + +[[package]] +name = "rlsf" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1646a59a9734b8b7a0ac51689388a60fe1625d4b956348e9de07591a1478457a" +dependencies = [ + "cfg-if", + "const-default", + "libc", + "rustversion", + "svgbobdoc", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "sec1" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc" +dependencies = [ + "base16ct", + "der", + "generic-array", + "subtle", + "zeroize", +] + +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "sha3" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77fd7028345d415a4034cf8777cd4f8ab1851274233b45f84e3d955502d93874" +dependencies = [ + "digest", + "keccak", +] + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "stark" +version = "0.1.0" +dependencies = [ + "crypto", + "hashbrown", + "itertools", + "libm", + "log", + "math", + "minicbor-serde", + "serde", + "sha3", +] + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "svgbobdoc" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2c04b93fc15d79b39c63218f15e3fdffaa4c227830686e3b7c5f41244eb3e50" +dependencies = [ + "base64", + "proc-macro2", + "quote", + "syn 1.0.109", + "unicode-width", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "typenum" +version = "1.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasm-bindgen" +version = "0.2.121" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49ace1d07c165b0864824eee619580c4689389afa9dc9ed3a4c75040d82e6790" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.121" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e68e6f4afd367a562002c05637acb8578ff2dea1943df76afb9e83d177c8578" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.121" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d95a9ec35c64b2a7cb35d3fead40c4238d0940c86d107136999567a4703259f2" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn 2.0.117", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.121" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4e0100b01e9f0d03189a92b96772a1fb998639d981193d7dbab487302513441" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "zerocopy" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "zeroize" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13c156562582aa81c60cb29407084cdb54c4164760106ab78e6c5b0858cf64e" diff --git a/bench_vs/lambda/recursion/Cargo.toml b/bench_vs/lambda/recursion/Cargo.toml new file mode 100644 index 000000000..dab83abf3 --- /dev/null +++ b/bench_vs/lambda/recursion/Cargo.toml @@ -0,0 +1,13 @@ +[workspace] + +[package] +name = "recursion-bench" +version = "0.1.0" +edition = "2024" + +[dependencies] +lambda-vm-prover = { path = "../../../prover", default-features = false } +embedded-alloc = "0.6" +riscv = { version = "0.15", features = ["critical-section-single-hart"] } +serde = { version = "=1.0.219", default-features = false, features = ["derive", "alloc"] } +postcard = { version = "1.0", default-features = false, features = ["alloc"] } diff --git a/bench_vs/lambda/recursion/src/main.rs b/bench_vs/lambda/recursion/src/main.rs new file mode 100644 index 000000000..a226ea225 --- /dev/null +++ b/bench_vs/lambda/recursion/src/main.rs @@ -0,0 +1,98 @@ +#![no_std] +#![no_main] + +extern crate alloc; + +use alloc::vec::Vec; +use core::arch::asm; +use core::panic::PanicInfo; + +use embedded_alloc::TlsfHeap as Heap; +use lambda_vm_prover::{ProofOptions, VmProof, VmVerifyingKey}; +// Required to pull in the riscv crate's critical-section implementation. +use riscv as _; + +const PRIVATE_INPUT_START: usize = 0xFF000000; +const SYSCALL_COMMIT: u64 = 64; +const SYSCALL_HALT: u64 = 93; +const MAX_MEMORY_SIZE: usize = 0xC000_0000; + +#[global_allocator] +static HEAP: Heap = Heap::empty(); + +#[panic_handler] +fn panic(_info: &PanicInfo) -> ! { + loop {} +} + +fn init_allocator() { + unsafe extern "C" { + static _end: u8; + } + let heap_pos = (&raw const _end) as usize; + unsafe { HEAP.init(heap_pos, MAX_MEMORY_SIZE - heap_pos) } +} + +/// Read the entire private-input region as a byte slice. +/// +/// Layout (per `syscalls::get_private_input`): 4-byte LE length prefix at +/// `PRIVATE_INPUT_START`, payload at +4. +fn read_private_input() -> &'static [u8] { + let len = unsafe { core::ptr::read_volatile(PRIVATE_INPUT_START as *const u32) } as usize; + let data = (PRIVATE_INPUT_START + 4) as *const u8; + unsafe { core::slice::from_raw_parts(data, len) } +} + +fn commit(bytes: &[u8]) { + unsafe { + asm!( + "ecall", + in("a0") 1u64, + in("a1") bytes.as_ptr(), + in("a2") bytes.len(), + in("a7") SYSCALL_COMMIT, + ); + } +} + +fn halt() -> ! { + unsafe { + asm!( + "ecall", + in("a0") 0u64, + in("a7") SYSCALL_HALT, + options(noreturn), + ); + } +} + +/// Private input layout (postcard-encoded): +/// (VmProof, Vec, ProofOptions, VmVerifyingKey) +/// where the `Vec` holds the inner program's ELF bytes, the +/// `ProofOptions` specifies the parameters the inner prover used, and the +/// `VmVerifyingKey` carries the host-derived bitwise preprocessed commitment +/// so the guest can skip the ~87% of verifier cycles that would otherwise be +/// spent recomputing it from scratch. +#[unsafe(no_mangle)] +pub fn main() -> ! { + init_allocator(); + + let blob = read_private_input(); + let (vm_proof, inner_elf, options, vkey): (VmProof, Vec, ProofOptions, VmVerifyingKey) = + postcard::from_bytes(blob).expect("failed to deserialize recursion input"); + + let ok = + lambda_vm_prover::verify_with_options_with_vkey( + &vm_proof, + &inner_elf, + &options, + None, + None, + Some(&vkey), + ) + .expect("verify errored"); + assert!(ok, "inner proof failed verification"); + + commit(&[1u8]); + halt() +} diff --git a/bench_vs/sp1/verifier/Cargo.toml b/bench_vs/sp1/verifier/Cargo.toml new file mode 100644 index 000000000..fc24039c2 --- /dev/null +++ b/bench_vs/sp1/verifier/Cargo.toml @@ -0,0 +1,3 @@ +[workspace] +members = ["program", "script"] +resolver = "2" diff --git a/bench_vs/sp1/verifier/program/Cargo.toml b/bench_vs/sp1/verifier/program/Cargo.toml new file mode 100644 index 000000000..7fbc9c5ce --- /dev/null +++ b/bench_vs/sp1/verifier/program/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "verifier-program" +version = "0.1.0" +edition = "2024" + +[dependencies] +sp1-zkvm = "6.0.1" +lambda-vm-prover = { path = "../../../../prover", default-features = false } +serde = { version = "=1.0.219", default-features = false, features = ["derive", "alloc"] } +postcard = { version = "1.0", default-features = false, features = ["alloc"] } diff --git a/bench_vs/sp1/verifier/program/src/main.rs b/bench_vs/sp1/verifier/program/src/main.rs new file mode 100644 index 000000000..c63bb67ca --- /dev/null +++ b/bench_vs/sp1/verifier/program/src/main.rs @@ -0,0 +1,34 @@ +//! SP1 guest that runs lambda-vm's `verify_with_options` on a single proof. +//! +//! Input layout (postcard-encoded `Vec` written via `SP1Stdin::write_vec`): +//! `(VmProof, Vec, ProofOptions)` +//! where the inner `Vec` is the inner program's ELF bytes. +//! +//! Output: commits `[1u8]` on successful verify; the guest panics otherwise. +//! +//! Caveats: +//! - The verifier hashes through the `keccak` crate. SP1 has a Keccak +//! precompile but it patches `tiny-keccak`, not `keccak`. We don't patch +//! here, so Keccak runs as software inside the guest. Cycle counts will be +//! inflated by that overhead. Worth keeping in mind when interpreting the +//! number relative to lambda-vm's in-VM count. + +#![no_main] + +extern crate alloc; + +use alloc::vec::Vec; + +use lambda_vm_prover::{ProofOptions, VmProof}; + +sp1_zkvm::entrypoint!(main); + +pub fn main() { + let blob = sp1_zkvm::io::read_vec(); + let (vm_proof, inner_elf, options): (VmProof, Vec, ProofOptions) = + postcard::from_bytes(&blob).expect("failed to deserialize input"); + let ok = lambda_vm_prover::verify_with_options(&vm_proof, &inner_elf, &options, None, None) + .expect("verify errored"); + assert!(ok, "inner proof failed verification"); + sp1_zkvm::io::commit_slice(&[1u8]); +} diff --git a/bench_vs/sp1/verifier/script/Cargo.toml b/bench_vs/sp1/verifier/script/Cargo.toml new file mode 100644 index 000000000..3198059bd --- /dev/null +++ b/bench_vs/sp1/verifier/script/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "verifier-script" +version = "0.1.0" +edition = "2024" + +[dependencies] +sp1-sdk = { version = "6.0.1", features = ["blocking", "profiling"] } +lambda-vm-prover = { path = "../../../../prover" } +stark = { path = "../../../../crypto/stark" } +postcard = { version = "1.0", features = ["alloc"] } + +[build-dependencies] +sp1-build = "6.0.1" diff --git a/bench_vs/sp1/verifier/script/build.rs b/bench_vs/sp1/verifier/script/build.rs new file mode 100644 index 000000000..d6cf925d6 --- /dev/null +++ b/bench_vs/sp1/verifier/script/build.rs @@ -0,0 +1,5 @@ +use sp1_build::build_program_with_args; + +fn main() { + build_program_with_args("../program", Default::default()); +} diff --git a/bench_vs/sp1/verifier/script/src/main.rs b/bench_vs/sp1/verifier/script/src/main.rs new file mode 100644 index 000000000..86e46a710 --- /dev/null +++ b/bench_vs/sp1/verifier/script/src/main.rs @@ -0,0 +1,83 @@ +//! Host driver: prove an inner empty program on lambda-vm, then execute the +//! lambda-vm verifier inside SP1's executor, printing the cycle count. +//! +//! Set `TRACE_FILE=profiles/verifier.json` to capture a DWARF-attributed +//! profile (1 sample = 1 cycle). The output can be opened with +//! `samply load profiles/verifier.json`. + +use std::path::PathBuf; + +use sp1_sdk::blocking::{Prover, ProverClient}; +use sp1_sdk::{SP1Stdin, include_elf}; + +const VERIFIER_ELF: sp1_sdk::Elf = include_elf!("verifier-program"); + +fn workspace_root() -> PathBuf { + // CARGO_MANIFEST_DIR for this crate is `/bench_vs/sp1/verifier/script`. + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .ancestors() + .nth(4) + .expect("workspace root") + .to_path_buf() +} + +fn main() { + sp1_sdk::utils::setup_logger(); + + let root = workspace_root(); + let empty_elf_path = root + .join("bench_vs/lambda/empty/target/riscv64im-lambda-vm-elf/release/empty-bench"); + assert!( + empty_elf_path.exists(), + "empty-bench ELF not found at {} — run `bash bench_vs/build_recursion_elfs.sh` first", + empty_elf_path.display(), + ); + let inner_elf = std::fs::read(&empty_elf_path).expect("read empty-bench"); + + let options = stark::proof::options::ProofOptions { + blowup_factor: 2, + fri_number_of_queries: 1, + coset_offset: 3, + grinding_factor: 1, + }; + + println!("[sp1-verifier] proving inner (empty, blowup=2, 1 query) ..."); + let inner_proof = lambda_vm_prover::prove_with_options_and_inputs( + &inner_elf, + &[], + &options, + &lambda_vm_prover::MaxRowsConfig::default(), + ) + .expect("inner prove should succeed"); + + let blob = postcard::to_allocvec(&(&inner_proof, &inner_elf, &options)) + .expect("postcard encode failed"); + println!("[sp1-verifier] postcard blob: {} bytes", blob.len()); + + let client = ProverClient::from_env(); + let mut stdin = SP1Stdin::new(); + stdin.write_vec(blob); + + println!("[sp1-verifier] executing verifier in SP1 ..."); + let (_, report) = client + .execute(VERIFIER_ELF.clone(), stdin) + .run() + .expect("execute failed"); + + let cycles = report.total_instruction_count(); + println!(); + println!("============================================================"); + println!(" SP1 EXECUTION SUMMARY — lambda-vm verifier inside SP1"); + println!("============================================================"); + println!(" Total cycles : {cycles}"); + println!(); + println!(" Compare against lambda-vm in-VM count (~40.5B for the same"); + println!(" proof). Both VMs target riscv64im, so word width is symmetric."); + println!(" Main remaining asymmetry: lambda-vm's KeccakPermute precompile"); + println!(" is patched on its guests but SP1 does not patch `keccak` (only"); + println!(" `tiny-keccak`), so Keccak rounds run as software in SP1 here."); + println!(); + println!(" If TRACE_FILE was set, the profile was written there."); + println!(" Render with: samply load "); + println!("============================================================"); +} diff --git a/crypto/crypto/Cargo.toml b/crypto/crypto/Cargo.toml index 6e3731beb..6dc2ab50a 100644 --- a/crypto/crypto/Cargo.toml +++ b/crypto/crypto/Cargo.toml @@ -8,7 +8,7 @@ license.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -math = { path = "../math", features = ["alloc"] } +math = { path = "../math", default-features = false, features = ["alloc"] } digest = "0.10.7" sha3 = { version = "0.10.8", default-features = false } # Optional diff --git a/crypto/ecsm/Cargo.toml b/crypto/ecsm/Cargo.toml index 4d2800b2c..57f368f67 100644 --- a/crypto/ecsm/Cargo.toml +++ b/crypto/ecsm/Cargo.toml @@ -5,9 +5,16 @@ version = "0.1.0" edition = "2024" license.workspace = true +[features] +default = ["std"] +# `std` re-enables the std builds of the numeric deps and the +# `std::error::Error` impl. Guests (executor/prover with no default features) +# leave it off so the crate builds `no_std` + `alloc` for riscv64. +std = ["num-bigint/std", "num-traits/std"] + [dependencies] -num-bigint = "0.4.6" -num-traits = "0.2.19" +num-bigint = { version = "0.4.6", default-features = false } +num-traits = { version = "0.2.19", default-features = false } # Audited secp256k1 arithmetic (host-side witness generation only; never in the # constraint system). Used for executor scalar multiplication and for the projective # double-and-add replay + batch inversion that builds ECDAS step witnesses efficiently. diff --git a/crypto/ecsm/src/curve.rs b/crypto/ecsm/src/curve.rs index 2f2acb0e1..0a1e74f32 100644 --- a/crypto/ecsm/src/curve.rs +++ b/crypto/ecsm/src/curve.rs @@ -6,6 +6,9 @@ //! `k in [1, N)` (see `ecsm.typ` "Point at infinity" / ECDAS soundness argument), so the //! affine formulas below are always well defined. +use alloc::vec; +use alloc::vec::Vec; + use num_bigint::BigUint; /// An affine curve point. Never the point at infinity. diff --git a/crypto/ecsm/src/lib.rs b/crypto/ecsm/src/lib.rs index 3a0a44dff..5feed9883 100644 --- a/crypto/ecsm/src/lib.rs +++ b/crypto/ecsm/src/lib.rs @@ -15,6 +15,10 @@ //! //! Curve: secp256k1, `y^2 = x^3 + 7 mod p`, `p = 2^256 - 2^32 - 977`, order `N`. +#![cfg_attr(not(feature = "std"), no_std)] + +extern crate alloc; + pub mod curve; pub mod witness; @@ -84,7 +88,7 @@ impl core::fmt::Display for EcsmError { } } -impl std::error::Error for EcsmError {} +impl core::error::Error for EcsmError {} /// Converts a `BigUint` to 32 little-endian bytes (zero-padded / truncated to 32). pub fn to_le_32(v: &BigUint) -> [u8; 32] { diff --git a/crypto/ecsm/src/witness.rs b/crypto/ecsm/src/witness.rs index 9322cba7e..2e1e99f7d 100644 --- a/crypto/ecsm/src/witness.rs +++ b/crypto/ecsm/src/witness.rs @@ -16,6 +16,8 @@ //! negative; the chip range-checks `c_i + offset` as a halfword. We reproduce the exact //! integer recurrence here; the prover converts the resulting integers to field elements. +use alloc::vec::Vec; + use num_bigint::{BigInt, BigUint}; use num_traits::{Signed, Zero}; diff --git a/crypto/math/src/fft/bowers_fft.rs b/crypto/math/src/fft/bowers_fft.rs index 60a15410e..6ed9ec46d 100644 --- a/crypto/math/src/fft/bowers_fft.rs +++ b/crypto/math/src/fft/bowers_fft.rs @@ -296,6 +296,7 @@ fn process_fused_block( /// 2-layer fusion: 8 reads + 8 writes instead of 8+8+8+8 for separate layers. #[cfg(feature = "alloc")] #[inline] +#[allow(dead_code)] fn process_triple_fused_block( block: &mut [FieldElement], twiddles_l0: &[FieldElement], @@ -604,6 +605,7 @@ fn process_ifft_fused_block( /// Process a single block with 3-layer IFFT fusion (DIT radix-8 butterfly). #[cfg(feature = "alloc")] #[inline] +#[allow(dead_code)] fn process_ifft_triple_fused_block( block: &mut [FieldElement], twiddles_hi: &[FieldElement], // innermost layer (highest index) diff --git a/crypto/stark/Cargo.toml b/crypto/stark/Cargo.toml index d0f6a51ef..92015f45f 100644 --- a/crypto/stark/Cargo.toml +++ b/crypto/stark/Cargo.toml @@ -9,16 +9,18 @@ crate-type = ["cdylib", "rlib"] [dependencies] -math = { path = "../math", features = [ - "std", +math = { path = "../math", default-features = false, features = [ + "alloc", "lambdaworks-serde-binary", ] } -crypto = { path = "../crypto", features = ["std", "serde"] } -thiserror = "1.0.38" -log = "0.4.17" -sha3 = "0.10.8" -serde = { version = "1.0", features = ["derive"] } -itertools = "0.11.0" +crypto = { path = "../crypto", default-features = false, features = ["serde"] } +log = { version = "0.4.17", default-features = false } +sha3 = { version = "0.10.8", default-features = false } +serde = { version = "1.0", default-features = false, features = ["derive", "alloc"] } +minicbor-serde = { version = "0.7.0", default-features = false, features = ["alloc"] } +itertools = { version = "0.11.0", default-features = false, features = ["use_alloc"] } +hashbrown = { version = "0.14", default-features = false, features = ["inline-more", "ahash"] } +libm = "0.2" # Parallelization crates rayon = { version = "1.8.0", optional = true } @@ -34,7 +36,6 @@ math-cuda = { path = "../math-cuda", optional = true } wasm-bindgen = { version = "0.2", optional = true } serde-wasm-bindgen = { version = "0.5", optional = true } web-sys = { version = "0.3.64", features = ['console'], optional = true } -serde_cbor = { version = "0.11.1" } [dev-dependencies] criterion = { version = "0.4", default-features = false } @@ -45,14 +46,23 @@ rand = { version = "0.8.5", features = ["std"] } rand_chacha = "0.3.1" [features] -test-utils = [] +default = ["std", "parallel"] +std = [ + "math/std", + "crypto/std", + "log/std", + "sha3/std", + "serde/std", + "itertools/use_std", +] +test-utils = ["std"] test_fiat_shamir = [] -instruments = [] # This enables timing prints in prover and verifier -debug-checks = [] # Enables validate_trace + bus balance report in prover -parallel = ["dep:rayon", "crypto/parallel"] +instruments = ["std"] # This enables timing prints in prover and verifier +debug-checks = ["std"] # Enables validate_trace + bus balance report in prover +parallel = ["dep:rayon", "crypto/parallel", "math/parallel", "std"] cuda = ["dep:math-cuda"] test-cuda-faults = ["cuda", "math-cuda/test-faults"] -wasm = ["dep:wasm-bindgen", "dep:serde-wasm-bindgen", "dep:web-sys"] +wasm = ["dep:wasm-bindgen", "dep:serde-wasm-bindgen", "dep:web-sys", "std"] disk-spill = ["dep:memmap2", "dep:tempfile", "dep:libc", "crypto/disk-spill"] diff --git a/crypto/stark/src/constraints/boundary.rs b/crypto/stark/src/constraints/boundary.rs index b34b6afec..15c546784 100644 --- a/crypto/stark/src/constraints/boundary.rs +++ b/crypto/stark/src/constraints/boundary.rs @@ -1,3 +1,5 @@ +use alloc::vec::Vec; + use math::field::{element::FieldElement, traits::IsField}; /// Represents a boundary constraint that must hold in an execution trace: diff --git a/crypto/stark/src/constraints/evaluator.rs b/crypto/stark/src/constraints/evaluator.rs index 6e94473b7..e3e608108 100644 --- a/crypto/stark/src/constraints/evaluator.rs +++ b/crypto/stark/src/constraints/evaluator.rs @@ -4,6 +4,8 @@ use crate::lookup::{BusPublicInputs, LOGUP_CHALLENGE_ALPHA, PackingShifts, compu use crate::trace::LDETraceTable; use crate::traits::{AIR, TransitionEvaluationContext, ZerofierEvaluations}; use crate::{frame::Frame, prover::evaluate_polynomial_on_lde_domain}; +use alloc::vec; +use alloc::vec::Vec; use math::field::traits::{IsFFTField, IsField, IsSubFieldOf}; use math::{fft::errors::FFTError, field::element::FieldElement}; #[cfg(feature = "parallel")] @@ -12,7 +14,7 @@ use rayon::{ prelude::{IntoParallelIterator, ParallelIterator}, }; -use std::marker::PhantomData; +use core::marker::PhantomData; pub struct ConstraintEvaluator< Field: IsSubFieldOf + IsFFTField + Send + Sync, diff --git a/crypto/stark/src/constraints/transition.rs b/crypto/stark/src/constraints/transition.rs index 1fe249c4c..6486c4652 100644 --- a/crypto/stark/src/constraints/transition.rs +++ b/crypto/stark/src/constraints/transition.rs @@ -1,3 +1,5 @@ +use alloc::boxed::Box; +use alloc::vec::Vec; use core::ops::Div; use crate::domain::Domain; diff --git a/crypto/stark/src/context.rs b/crypto/stark/src/context.rs index b83b1427b..10d94f30a 100644 --- a/crypto/stark/src/context.rs +++ b/crypto/stark/src/context.rs @@ -1,4 +1,5 @@ use super::proof::options::ProofOptions; +use alloc::vec::Vec; #[derive(Clone, Debug)] pub struct AirContext { diff --git a/crypto/stark/src/debug.rs b/crypto/stark/src/debug.rs index bf1a454a7..7c68fdf63 100644 --- a/crypto/stark/src/debug.rs +++ b/crypto/stark/src/debug.rs @@ -4,6 +4,7 @@ use super::trace::TraceTable; use super::traits::{AIR, TransitionEvaluationContext}; use crate::lookup::{LOGUP_CHALLENGE_ALPHA, PackingShifts, compute_alpha_powers}; use crate::{frame::Frame, trace::LDETraceTable}; +use alloc::vec::Vec; use log::{error, info}; use math::field::traits::IsSubFieldOf; use math::{ @@ -91,7 +92,7 @@ pub fn validate_trace< // --------- VALIDATE TRANSITION CONSTRAINTS ----------- let n_transition_constraints = air.context().num_transition_constraints; let exemption_steps: Vec = - std::iter::repeat_n(lde_trace.num_steps(), n_transition_constraints) + core::iter::repeat_n(lde_trace.num_steps(), n_transition_constraints) .zip(air.transition_constraints()) .map(|(trace_steps, constraint)| trace_steps - constraint.end_exemptions()) .collect(); diff --git a/crypto/stark/src/domain.rs b/crypto/stark/src/domain.rs index e858c502c..66d562080 100644 --- a/crypto/stark/src/domain.rs +++ b/crypto/stark/src/domain.rs @@ -1,3 +1,4 @@ +use alloc::vec::Vec; use math::{ fft::roots_of_unity::get_powers_of_primitive_root_coset, field::{ diff --git a/crypto/stark/src/examples/dummy_air.rs b/crypto/stark/src/examples/dummy_air.rs index 1409f96ba..f5ff09c90 100644 --- a/crypto/stark/src/examples/dummy_air.rs +++ b/crypto/stark/src/examples/dummy_air.rs @@ -1,4 +1,4 @@ -use std::marker::PhantomData; +use core::marker::PhantomData; use crate::{ constraints::{ diff --git a/crypto/stark/src/examples/fibonacci_2_cols_shifted.rs b/crypto/stark/src/examples/fibonacci_2_cols_shifted.rs index 76c8ea11f..afd437e32 100644 --- a/crypto/stark/src/examples/fibonacci_2_cols_shifted.rs +++ b/crypto/stark/src/examples/fibonacci_2_cols_shifted.rs @@ -8,11 +8,11 @@ use crate::{ trace::TraceTable, traits::{AIR, TransitionEvaluationContext}, }; +use core::marker::PhantomData; use math::{ field::{element::FieldElement, traits::IsFFTField}, traits::AsBytes, }; -use std::marker::PhantomData; #[derive(Clone)] struct ShiftedFibTransition1 { diff --git a/crypto/stark/src/examples/fibonacci_2_columns.rs b/crypto/stark/src/examples/fibonacci_2_columns.rs index 7662c8f98..725ed541c 100644 --- a/crypto/stark/src/examples/fibonacci_2_columns.rs +++ b/crypto/stark/src/examples/fibonacci_2_columns.rs @@ -1,4 +1,4 @@ -use std::marker::PhantomData; +use core::marker::PhantomData; use super::simple_fibonacci::FibonacciPublicInputs; use crate::{ diff --git a/crypto/stark/src/examples/fibonacci_multi_column.rs b/crypto/stark/src/examples/fibonacci_multi_column.rs index ac6069ece..9e8e8917f 100644 --- a/crypto/stark/src/examples/fibonacci_multi_column.rs +++ b/crypto/stark/src/examples/fibonacci_multi_column.rs @@ -1,4 +1,4 @@ -use std::marker::PhantomData; +use core::marker::PhantomData; use crate::{ constraints::{ diff --git a/crypto/stark/src/examples/fibonacci_rap.rs b/crypto/stark/src/examples/fibonacci_rap.rs index 10f1827d2..f6c6b4ce3 100644 --- a/crypto/stark/src/examples/fibonacci_rap.rs +++ b/crypto/stark/src/examples/fibonacci_rap.rs @@ -1,4 +1,4 @@ -use std::{marker::PhantomData, ops::Div}; +use core::{marker::PhantomData, ops::Div}; use crate::{ constraints::{ diff --git a/crypto/stark/src/examples/quadratic_air.rs b/crypto/stark/src/examples/quadratic_air.rs index d49b0050d..59bcb753c 100644 --- a/crypto/stark/src/examples/quadratic_air.rs +++ b/crypto/stark/src/examples/quadratic_air.rs @@ -1,4 +1,4 @@ -use std::marker::PhantomData; +use core::marker::PhantomData; use crate::{ constraints::{ diff --git a/crypto/stark/src/examples/read_only_memory.rs b/crypto/stark/src/examples/read_only_memory.rs index 8c3e9efac..bffa1702f 100644 --- a/crypto/stark/src/examples/read_only_memory.rs +++ b/crypto/stark/src/examples/read_only_memory.rs @@ -1,4 +1,4 @@ -use std::marker::PhantomData; +use core::marker::PhantomData; use crate::{ constraints::{ diff --git a/crypto/stark/src/examples/read_only_memory_logup.rs b/crypto/stark/src/examples/read_only_memory_logup.rs index e4f25c16c..b32a29708 100644 --- a/crypto/stark/src/examples/read_only_memory_logup.rs +++ b/crypto/stark/src/examples/read_only_memory_logup.rs @@ -2,7 +2,7 @@ //! See our blog post for detailed explanation. //! -use std::marker::PhantomData; +use core::marker::PhantomData; use crate::{ constraints::{ diff --git a/crypto/stark/src/examples/simple_addition.rs b/crypto/stark/src/examples/simple_addition.rs index 78f938838..9a48741cd 100644 --- a/crypto/stark/src/examples/simple_addition.rs +++ b/crypto/stark/src/examples/simple_addition.rs @@ -1,7 +1,7 @@ //! A minimal AIR with a simple addition constraint: col0 + col1 = col2 //! This is used to test STARK proving/verification with small traces (1-2 rows). -use std::marker::PhantomData; +use core::marker::PhantomData; use crate::{ constraints::{ diff --git a/crypto/stark/src/examples/simple_fibonacci.rs b/crypto/stark/src/examples/simple_fibonacci.rs index a39064258..51c537c8e 100644 --- a/crypto/stark/src/examples/simple_fibonacci.rs +++ b/crypto/stark/src/examples/simple_fibonacci.rs @@ -8,8 +8,8 @@ use crate::{ trace::TraceTable, traits::{AIR, TransitionEvaluationContext}, }; +use core::marker::PhantomData; use math::field::{element::FieldElement, traits::IsFFTField}; -use std::marker::PhantomData; #[derive(Clone)] struct FibConstraint { diff --git a/crypto/stark/src/examples/simple_periodic_cols.rs b/crypto/stark/src/examples/simple_periodic_cols.rs index 70f5da3b4..02660157e 100644 --- a/crypto/stark/src/examples/simple_periodic_cols.rs +++ b/crypto/stark/src/examples/simple_periodic_cols.rs @@ -1,4 +1,4 @@ -use std::marker::PhantomData; +use core::marker::PhantomData; use crate::{ constraints::{ diff --git a/crypto/stark/src/frame.rs b/crypto/stark/src/frame.rs index 952a3a110..91f2d94cb 100644 --- a/crypto/stark/src/frame.rs +++ b/crypto/stark/src/frame.rs @@ -1,4 +1,6 @@ use crate::{table::TableView, trace::LDETraceTable}; +use alloc::vec; +use alloc::vec::Vec; use itertools::Itertools; use math::field::element::FieldElement; use math::field::traits::{IsField, IsSubFieldOf}; diff --git a/crypto/stark/src/fri/fri_commitment.rs b/crypto/stark/src/fri/fri_commitment.rs index 831471761..4fafede22 100644 --- a/crypto/stark/src/fri/fri_commitment.rs +++ b/crypto/stark/src/fri/fri_commitment.rs @@ -1,3 +1,4 @@ +use alloc::vec::Vec; use crypto::merkle_tree::{merkle::MerkleTree, traits::IsMerkleTreeBackend}; use math::{ field::{element::FieldElement, traits::IsField}, diff --git a/crypto/stark/src/fri/fri_decommit.rs b/crypto/stark/src/fri/fri_decommit.rs index f398096d5..4a1fb272c 100644 --- a/crypto/stark/src/fri/fri_decommit.rs +++ b/crypto/stark/src/fri/fri_decommit.rs @@ -1,3 +1,4 @@ +use alloc::vec::Vec; use crypto::merkle_tree::proof::Proof; use math::field::element::FieldElement; use math::field::traits::IsField; diff --git a/crypto/stark/src/fri/fri_functions.rs b/crypto/stark/src/fri/fri_functions.rs index 6037da4ec..bd8f79d77 100644 --- a/crypto/stark/src/fri/fri_functions.rs +++ b/crypto/stark/src/fri/fri_functions.rs @@ -1,3 +1,4 @@ +use alloc::vec::Vec; use math::fft::{ bit_reversing::in_place_bit_reverse_permute, roots_of_unity::get_powers_of_primitive_root_coset, }; diff --git a/crypto/stark/src/fri/mod.rs b/crypto/stark/src/fri/mod.rs index 60ad2a398..cc72c4a68 100644 --- a/crypto/stark/src/fri/mod.rs +++ b/crypto/stark/src/fri/mod.rs @@ -1,3 +1,5 @@ +use alloc::vec; +use alloc::vec::Vec; pub mod fri_commitment; pub mod fri_decommit; pub(crate) mod fri_functions; diff --git a/crypto/stark/src/lib.rs b/crypto/stark/src/lib.rs index e9f6a1cda..e5a756972 100644 --- a/crypto/stark/src/lib.rs +++ b/crypto/stark/src/lib.rs @@ -1,3 +1,7 @@ +#![cfg_attr(not(feature = "std"), no_std)] + +extern crate alloc; + // `StorageMode::Disk` uses `memmap2`, which does not build on wasm32. // Fail at the crate root rather than as a transitive memmap2 error. #[cfg(all(target_arch = "wasm32", feature = "disk-spill"))] diff --git a/crypto/stark/src/lookup.rs b/crypto/stark/src/lookup.rs index 745736d4d..4de42d044 100644 --- a/crypto/stark/src/lookup.rs +++ b/crypto/stark/src/lookup.rs @@ -1,6 +1,10 @@ +use alloc::boxed::Box; +use alloc::string::{String, ToString}; +use alloc::vec; +use alloc::vec::Vec; +use core::marker::PhantomData; #[cfg(feature = "debug-checks")] -use std::collections::HashMap; -use std::marker::PhantomData; +use hashbrown::HashMap; use crate::{ constraints::{ diff --git a/crypto/stark/src/par.rs b/crypto/stark/src/par.rs index a20a452b6..b65e29720 100644 --- a/crypto/stark/src/par.rs +++ b/crypto/stark/src/par.rs @@ -1,6 +1,8 @@ //! Helpers that abstract over `cfg(feature = "parallel")` for patterns //! that recur across the prover. +use alloc::vec::Vec; + /// Run `f(i)` for `i in 0..n` and return the unzipped pair of result vecs. /// Parallel when `feature = "parallel"`, sequential otherwise. pub(crate) fn map_unzip(n: usize, f: F) -> (Vec, Vec) diff --git a/crypto/stark/src/proof/options.rs b/crypto/stark/src/proof/options.rs index 70976b993..8fe3f1e6d 100644 --- a/crypto/stark/src/proof/options.rs +++ b/crypto/stark/src/proof/options.rs @@ -101,11 +101,24 @@ impl GoldilocksCubicProofOptions { }); } + #[cfg(feature = "std")] + let (sqrt, log2, ceil) = ( + f64::sqrt as fn(f64) -> f64, + f64::log2 as fn(f64) -> f64, + f64::ceil as fn(f64) -> f64, + ); + #[cfg(not(feature = "std"))] + let (sqrt, log2, ceil) = ( + libm::sqrt as fn(f64) -> f64, + libm::log2 as fn(f64) -> f64, + libm::ceil as fn(f64) -> f64, + ); + let rate = 1.0 / blowup_factor as f64; - let proximity = 1.0 - rate.sqrt() - 1.0 / 300.0; - let bits_per_query = -(1.0 - proximity).log2(); + let proximity = 1.0 - sqrt(rate) - 1.0 / 300.0; + let bits_per_query = -log2(1.0 - proximity); let fri_number_of_queries = - ((security_bits as f64 - grinding_factor as f64) / bits_per_query).ceil() as usize; + ceil((security_bits as f64 - grinding_factor as f64) / bits_per_query) as usize; Ok(ProofOptions { blowup_factor, diff --git a/crypto/stark/src/proof/stark.rs b/crypto/stark/src/proof/stark.rs index 1751d60fe..302649b29 100644 --- a/crypto/stark/src/proof/stark.rs +++ b/crypto/stark/src/proof/stark.rs @@ -1,3 +1,4 @@ +use alloc::vec::Vec; use crypto::merkle_tree::proof::Proof; use math::field::{ element::FieldElement, diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 4da57559c..390ed09da 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1,5 +1,8 @@ -use std::marker::PhantomData; -use std::sync::Arc; +use alloc::string::String; +use alloc::sync::Arc; +use alloc::vec; +use alloc::vec::Vec; +use core::marker::PhantomData; #[cfg(feature = "instruments")] use std::time::{Duration, Instant}; @@ -1687,8 +1690,8 @@ pub trait IsStarkProver< // Many tables share the same domain size (e.g., 7+ tables at 2^20). // Without dedup, each creates its own Domain (~24 MB) and LdeTwiddles (~32 MB). type DomainEntry = (Arc>, Arc>); - let mut domain_cache: std::collections::HashMap<(usize, usize, u64), DomainEntry> = - std::collections::HashMap::new(); + let mut domain_cache: hashbrown::HashMap<(usize, usize, u64), DomainEntry> = + hashbrown::HashMap::new(); let mut domains = Vec::with_capacity(num_airs); let mut twiddle_caches: Vec>> = Vec::with_capacity(num_airs); diff --git a/crypto/stark/src/r4_denoms.rs b/crypto/stark/src/r4_denoms.rs index 77076ecfe..a79912b74 100644 --- a/crypto/stark/src/r4_denoms.rs +++ b/crypto/stark/src/r4_denoms.rs @@ -12,6 +12,8 @@ //! - `z_scalars = [z_power, z_shifted[0..]]`, length `1 + z_shifted.len()` //! - `denoms[k * lde_size + i] = x_i - z_scalars[k]` (then inverted) +use alloc::vec::Vec; + use math::field::element::FieldElement; use math::field::traits::{IsField, IsSubFieldOf}; diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 58938d5e4..24189cc10 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -1,3 +1,5 @@ +use alloc::vec::Vec; + use crate::frame::Frame; #[cfg(feature = "disk-spill")] use crypto::mmap_util::spill_slice_to_mmap; diff --git a/crypto/stark/src/tests/bus_tests/completeness_tests.rs b/crypto/stark/src/tests/bus_tests/completeness_tests.rs index 83f8ac391..d51f8977e 100644 --- a/crypto/stark/src/tests/bus_tests/completeness_tests.rs +++ b/crypto/stark/src/tests/bus_tests/completeness_tests.rs @@ -7,6 +7,7 @@ use math::field::element::FieldElement; use math::field::{ extensions_goldilocks::Degree3GoldilocksExtensionField, goldilocks::GoldilocksField, }; +use minicbor_serde; use crate::examples::multi_table_lookup::{ new_add_air_with_lookup, new_cpu_air_with_lookup, new_mul_air_with_lookup, @@ -377,9 +378,9 @@ fn test_serialization_roundtrip() { multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); // Serialize and deserialize - let serialized = serde_cbor::to_vec(&multi_proof).expect("serialization failed"); + let serialized = minicbor_serde::to_vec(&multi_proof).expect("serialization failed"); let deserialized: crate::proof::stark::MultiProof = - serde_cbor::from_slice(&serialized).expect("deserialization failed"); + minicbor_serde::from_slice(&serialized).expect("deserialization failed"); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; diff --git a/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs b/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs index 4059ed481..717ff0be6 100644 --- a/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs +++ b/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs @@ -8,6 +8,7 @@ use math::field::element::FieldElement; use math::field::{ extensions_goldilocks::Degree3GoldilocksExtensionField, goldilocks::GoldilocksField, }; +use minicbor_serde; use crate::constraints::transition::TransitionConstraintEvaluator; use crate::lookup::{ @@ -142,13 +143,13 @@ fn test_verify_serialized_multi_table_proofs() { // NETWORK TRANSMISSION - Serialize and deserialize (using CBOR binary format) // ========================================================================= - let serialized = serde_cbor::to_vec(&proofs).expect("Failed to serialize proofs"); + let serialized = minicbor_serde::to_vec(&proofs).expect("Failed to serialize proofs"); // At this point, the prover's data is dropped (out of scope above) // The verifier only has the serialized data let received_proofs: MultiProof = - serde_cbor::from_slice(&serialized).expect("Failed to deserialize proofs"); + minicbor_serde::from_slice(&serialized).expect("Failed to deserialize proofs"); // ========================================================================= // VERIFIER SIDE - Reconstruct AIRs and verify diff --git a/crypto/stark/src/trace.rs b/crypto/stark/src/trace.rs index 405ce89f8..d6fdfe116 100644 --- a/crypto/stark/src/trace.rs +++ b/crypto/stark/src/trace.rs @@ -1,5 +1,7 @@ use crate::domain::{Domain, DomainConstants}; use crate::table::Table; +use alloc::vec; +use alloc::vec::Vec; use math::field::traits::{IsField, IsSubFieldOf}; use math::field::{element::FieldElement, traits::IsFFTField}; use math::polynomial::barycentric_inv_denoms; diff --git a/crypto/stark/src/traits.rs b/crypto/stark/src/traits.rs index 06465b659..862dad155 100644 --- a/crypto/stark/src/traits.rs +++ b/crypto/stark/src/traits.rs @@ -1,4 +1,7 @@ -use std::collections::HashMap; +use alloc::boxed::Box; +use alloc::vec; +use alloc::vec::Vec; +use hashbrown::HashMap; use crypto::fiat_shamir::is_transcript::IsStarkTranscript; use math::{ diff --git a/crypto/stark/src/verifier.rs b/crypto/stark/src/verifier.rs index 68819c76b..85e3209c1 100644 --- a/crypto/stark/src/verifier.rs +++ b/crypto/stark/src/verifier.rs @@ -12,6 +12,9 @@ use crate::{ lookup::{LOGUP_CHALLENGE_ALPHA, LOGUP_NUM_CHALLENGES, PackingShifts, compute_alpha_powers}, proof::stark::{DeepPolynomialOpening, MultiProof, PolynomialOpenings}, }; +use alloc::vec; +use alloc::vec::Vec; +use core::marker::PhantomData; use crypto::{fiat_shamir::is_transcript::IsStarkTranscript, merkle_tree::proof::Proof}; #[cfg(not(feature = "test_fiat_shamir"))] use log::error; @@ -25,8 +28,7 @@ use math::{ }, traits::AsBytes, }; -use std::collections::HashMap; -use std::marker::PhantomData; +use hashbrown::HashMap; #[cfg(feature = "instruments")] use std::time::Instant; @@ -314,7 +316,7 @@ pub trait IsStarkVerifier< E: IsField, Field: IsSubFieldOf, { - proof.verify::>(root, index, &value.to_owned()) + proof.verify::>(root, index, &value.to_vec()) } /// Verify both (proof, evaluations) and (proof_sym, evaluations_sym) openings diff --git a/executor/Cargo.toml b/executor/Cargo.toml index 5d1e4ae49..343735693 100644 --- a/executor/Cargo.toml +++ b/executor/Cargo.toml @@ -4,10 +4,19 @@ version = "0.1.0" edition = "2024" license.workspace = true +[features] +default = ["std"] +std = ["thiserror/std", "dep:rustc-demangle", "ecsm/std"] + +[[bin]] +name = "executor" +required-features = ["std"] + [dependencies] -thiserror = "1.0.68" -rustc-demangle = "0.1" -ecsm = { path = "../crypto/ecsm" } +thiserror = { version = "2.0", default-features = false } +rustc-demangle = { version = "0.1", optional = true } +hashbrown = { version = "0.14", default-features = false, features = ["inline-more", "ahash"] } +ecsm = { path = "../crypto/ecsm", default-features = false } [dev-dependencies] serde = { version = "1.0", features = ["derive"] } diff --git a/executor/src/constants.rs b/executor/src/constants.rs new file mode 100644 index 000000000..f84e05a2b --- /dev/null +++ b/executor/src/constants.rs @@ -0,0 +1,58 @@ +//! VM memory layout constants shared between prover and verifier code paths. +//! +//! These live outside `vm/` because the verifier needs them even when the full +//! VM executor is not compiled in (e.g. inside a RISC-V guest verifying a proof). + +/// Initial value of the stack pointer register (SP, x2). +/// 64-bit max, aligned to 16 bytes per RV64 ABI. +pub const STACK_TOP: u64 = 0xFFFFFFFFFFFFFFF0; + +/// Maximum byte length of the private-input region. +/// +/// Bumped from 6.7 MB to 64 MB to accommodate serialized STARK proofs as +/// private input for the naive recursion experiment. +pub const MAX_PRIVATE_INPUT_SIZE: u64 = 64 * 1024 * 1024; + +/// Memory address where the private-input region starts. +/// Layout: 4-byte LE length prefix at this address, then payload at +4. +pub const PRIVATE_INPUT_START_INDEX: u64 = 0xFF000000; + +/// Syscall number for the Keccak-f[1600] precompile. +pub const KECCAK_SYSCALL_NUMBER: u64 = u64::MAX - 1; + +/// Round constants for Keccak-f[1600] (24 rounds). +pub const KECCAK_RC: [u64; 24] = [ + 0x0000000000000001, + 0x0000000000008082, + 0x800000000000808A, + 0x8000000080008000, + 0x000000000000808B, + 0x0000000080000001, + 0x8000000080008081, + 0x8000000000008009, + 0x000000000000008A, + 0x0000000000000088, + 0x0000000080008009, + 0x000000008000000A, + 0x000000008000808B, + 0x800000000000008B, + 0x8000000000008089, + 0x8000000000008003, + 0x8000000000008002, + 0x8000000000000080, + 0x000000000000800A, + 0x800000008000000A, + 0x8000000080008081, + 0x8000000000008080, + 0x0000000080000001, + 0x8000000080008008, +]; + +/// Rotation offsets R[x][y] for the rho step of Keccak-f[1600]. +pub const KECCAK_RHO: [[u32; 5]; 5] = [ + [0, 36, 3, 41, 18], + [1, 44, 10, 45, 2], + [62, 6, 43, 15, 61], + [28, 55, 25, 21, 56], + [27, 20, 39, 8, 14], +]; diff --git a/executor/src/elf.rs b/executor/src/elf.rs index ed79fb983..120436efd 100644 --- a/executor/src/elf.rs +++ b/executor/src/elf.rs @@ -1,3 +1,5 @@ +use alloc::string::{String, ToString}; +use alloc::vec::Vec; const EI_NIDENT: usize = 16; // Section header types const SHT_SYMTAB: u32 = 2; @@ -557,4 +559,9 @@ impl SymbolTable { pub fn len(&self) -> usize { self.functions.len() } + + /// Borrow the full function list (sorted by address). + pub fn functions(&self) -> &[FunctionSymbol] { + &self.functions + } } diff --git a/executor/src/lib.rs b/executor/src/lib.rs index d626ca1f4..cb6c99eed 100644 --- a/executor/src/lib.rs +++ b/executor/src/lib.rs @@ -1,4 +1,10 @@ +#![cfg_attr(not(feature = "std"), no_std)] + +extern crate alloc; + +pub mod constants; pub mod elf; +#[cfg(feature = "std")] pub mod flamegraph; #[cfg(test)] pub mod tests; diff --git a/executor/src/vm/execution.rs b/executor/src/vm/execution.rs index 614aad649..81762cf19 100644 --- a/executor/src/vm/execution.rs +++ b/executor/src/vm/execution.rs @@ -1,4 +1,6 @@ -use std::{cmp::Ordering, fmt::Debug}; +use alloc::vec; +use alloc::vec::Vec; +use core::{cmp::Ordering, fmt::Debug}; use crate::{ elf::Elf, @@ -103,6 +105,13 @@ impl Executor { self.get_return_values() } + /// Read-only access to the executor's memory. Exposed for diagnostic + /// tooling that needs to inspect the final memory state (e.g. counting + /// distinct 4 KB pages touched) after a streaming `resume()` loop. + pub fn memory(&self) -> &Memory { + &self.memory + } + /// Run to completion and return all logs (consumes executor) pub fn run(mut self) -> Result { let mut logs = Vec::with_capacity(CHUNK_SIZE); diff --git a/executor/src/vm/instruction/execution.rs b/executor/src/vm/instruction/execution.rs index 148d7f86c..4ad257d70 100644 --- a/executor/src/vm/instruction/execution.rs +++ b/executor/src/vm/instruction/execution.rs @@ -1,3 +1,6 @@ +use alloc::borrow::ToOwned; +use alloc::string::String; + use crate::vm::{ instruction::decoding::{ArithOp, Comparison, Instruction, LoadStoreWidth}, logs::Log, @@ -346,7 +349,11 @@ impl Instruction { let bytes = memory.load_bytes(pointer, len)?; let value = str::from_utf8(&bytes).map_err(|_| ExecutionError::IncorrectMessage)?; - println!("PRINT VM: {}", value); + // No stdout when the executor itself runs inside a guest VM. + #[cfg(feature = "std")] + std::println!("PRINT VM: {}", value); + #[cfg(not(feature = "std"))] + let _ = value; } SyscallNumbers::Panic => { // panic @@ -617,42 +624,7 @@ pub enum ExecutionError { // Keccak-f[1600] permutation // ============================================================================= -/// Round constants for Keccak-f[1600] (24 rounds). -pub const KECCAK_RC: [u64; 24] = [ - 0x0000000000000001, - 0x0000000000008082, - 0x800000000000808A, - 0x8000000080008000, - 0x000000000000808B, - 0x0000000080000001, - 0x8000000080008081, - 0x8000000000008009, - 0x000000000000008A, - 0x0000000000000088, - 0x0000000080008009, - 0x000000008000000A, - 0x000000008000808B, - 0x800000000000008B, - 0x8000000000008089, - 0x8000000000008003, - 0x8000000000008002, - 0x8000000000000080, - 0x000000000000800A, - 0x800000008000000A, - 0x8000000080008081, - 0x8000000000008080, - 0x0000000080000001, - 0x8000000080008008, -]; - -/// Rotation offsets R[x][y] for the rho step of Keccak-f[1600]. -pub const KECCAK_RHO: [[u32; 5]; 5] = [ - [0, 36, 3, 41, 18], - [1, 44, 10, 45, 2], - [62, 6, 43, 15, 61], - [28, 55, 25, 21, 56], - [27, 20, 39, 8, 14], -]; +pub use crate::constants::{KECCAK_RC, KECCAK_RHO}; /// Apply the Keccak-f[1600] permutation (24 rounds) to a 25-word state. /// diff --git a/executor/src/vm/memory.rs b/executor/src/vm/memory.rs index ea84e2620..28d156ae2 100644 --- a/executor/src/vm/memory.rs +++ b/executor/src/vm/memory.rs @@ -1,5 +1,6 @@ -use std::collections::HashMap; -use std::hash::{BuildHasher, Hasher}; +use alloc::vec::Vec; +use core::hash::{BuildHasher, Hasher}; +use hashbrown::HashMap; /// Fast hasher for u64 keys - uses the key directly as the hash value. /// This avoids the overhead of SipHash for integer keys. @@ -42,13 +43,12 @@ pub type U64HashMap = HashMap; /// The COMMIT AIR concatenates calls via the running `x254` index, so this /// is enforced as a running-total budget rather than a per-call limit. pub const MAX_PUBLIC_OUTPUT_TOTAL_SIZE: u64 = 1024 * 1024; -/// Maximum size of the private input memory region (in bytes). -pub const MAX_PRIVATE_INPUT_SIZE: u64 = 6700000; -/// Fixed high address where private input is mapped. Guest programs can read -/// directly from this address (ZisK-style memory-mapped input). -/// Layout: 4-byte LE length prefix at `PRIVATE_INPUT_START_INDEX`, then data at +4. -/// Must match `PRIVATE_INPUT_START` in `syscalls/src/syscalls.rs`. -pub const PRIVATE_INPUT_START_INDEX: u64 = 0xFF000000; +/// Private-input region size cap and mapped base address. Re-exported from +/// `constants` (the canonical definitions) rather than redeclared here — the +/// old local `MAX_PRIVATE_INPUT_SIZE = 6.7 MiB` shadowed the 64 MiB constant +/// and rejected larger recursion blobs (e.g. multi-query / high-blowup inner +/// proofs) with `PrivateInputSizeExceeded`. +pub use crate::constants::{MAX_PRIVATE_INPUT_SIZE, PRIVATE_INPUT_START_INDEX}; #[derive(Default, Debug)] pub struct Memory { @@ -204,6 +204,13 @@ impl Memory { Ok(self.public_output.clone()) } + /// Read-only access to the underlying 4-byte cell map. Exposed for + /// diagnostic tooling (e.g. counting the distinct 4 KB memory pages a + /// program touches) — not part of the normal execution interface. + pub fn cells(&self) -> &U64HashMap<[u8; 4]> { + &self.cells + } + /// Pre-loads private input bytes at `PRIVATE_INPUT_START_INDEX` as a /// 4-byte LE length prefix followed by the raw data. The guest reads these /// bytes directly via normal RISC-V loads (ZisK-style memory-mapped input). @@ -232,7 +239,7 @@ impl Memory { let aligned = addr - (addr % 4); let bytes = self.cells.get(&aligned).cloned().unwrap_or_default(); let offset = (addr % 4) as usize; - let take = std::cmp::min(4 - offset, (end - addr) as usize); + let take = core::cmp::min(4 - offset, (end - addr) as usize); result.extend_from_slice(&bytes[offset..offset + take]); addr += take as u64; } diff --git a/executor/src/vm/registers.rs b/executor/src/vm/registers.rs index 61945b732..743b90542 100644 --- a/executor/src/vm/registers.rs +++ b/executor/src/vm/registers.rs @@ -1,6 +1,7 @@ -use std::fmt::Display; +use alloc::vec::Vec; +use core::fmt::Display; -pub const STACK_TOP: u64 = 0xFFFFFFFFFFFFFFF0; // 64-bit max (Multiple of 16 for RV64 ABI) +pub use crate::constants::STACK_TOP; #[derive(Debug)] /// Holds the current value of all 32 registers @@ -48,13 +49,13 @@ impl Registers { } impl Display for Registers { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { const REGISTER_NAMES: [&str; 32] = [ "zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6", ]; - let values = std::iter::once(0u64).chain(self.0.iter().copied()); + let values = core::iter::once(0u64).chain(self.0.iter().copied()); for (i, chunk) in REGISTER_NAMES .iter() diff --git a/prover/Cargo.toml b/prover/Cargo.toml index da9ceb9af..103f70f43 100644 --- a/prover/Cargo.toml +++ b/prover/Cargo.toml @@ -5,33 +5,44 @@ edition = "2024" license.workspace = true [features] -default = ["parallel"] -parallel = ["stark/parallel", "math/parallel", "crypto/parallel", "dep:rayon"] +default = ["std", "prove", "parallel"] +std = ["stark/std", "math/std", "crypto/std", "executor/std", "ecsm/std"] +prove = [] +parallel = ["stark/parallel", "math/parallel", "crypto/parallel", "dep:rayon", "std"] cuda = ["stark/cuda"] test-cuda-faults = ["cuda", "stark/test-cuda-faults"] -debug-checks = ["stark/debug-checks"] -instruments = ["stark/instruments"] -disk-spill = ["stark/disk-spill"] +debug-checks = ["stark/debug-checks", "std"] +instruments = ["stark/instruments", "std"] +disk-spill = ["stark/disk-spill", "dep:sysinfo"] [dependencies] -stark = { path = "../crypto/stark" } -crypto = { path = "../crypto/crypto" } -math = { path = "../crypto/math" } -executor = { path = "../executor" } -ecsm = { path = "../crypto/ecsm" } -serde = { version = "1.0", features = ["derive"] } +stark = { path = "../crypto/stark", default-features = false } +crypto = { path = "../crypto/crypto", default-features = false, features = ["serde"] } +math = { path = "../crypto/math", default-features = false, features = ["alloc", "lambdaworks-serde-binary"] } +executor = { path = "../executor", default-features = false } +ecsm = { path = "../crypto/ecsm", default-features = false } +serde = { version = "1.0", default-features = false, features = ["derive", "alloc"] } +hashbrown = { version = "0.14", default-features = false, features = ["inline-more", "ahash"] } rayon = { version = "1.8.0", optional = true } -sysinfo = { version = "0.31", default-features = false, features = ["system"] } +# Only the `disk-spill` auto-storage path queries OS memory. `sysinfo` is a +# host crate (pulls `std` via `memchr`), so keep it optional and out of the +# bare-metal guest builds that depend on the prover with no default features. +sysinfo = { version = "0.31", default-features = false, features = ["system"], optional = true } log = "0.4" sha3 = { version = "0.10.8", default-features = false } +postcard = { version = "1.0", default-features = false, features = ["alloc"] } [dev-dependencies] env_logger = "*" criterion = { version = "0.5", default-features = false } bincode = "1" +postcard = { version = "1.0", features = ["alloc"] } tikv-jemallocator = "0.6" tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] } tiny-keccak = { version = "2.0", features = ["keccak"] } +# Resolve guest PCs to source functions inside the histogram diagnostics +# (replaces piping the printed addresses through the addr2line binary). +addr2line = "0.27.0" # Enable stark's test-utils so cross-crate tests can reach # `compute_precomputed_commitment_for_testing`. Only active under cargo test/bench. stark = { path = "../crypto/stark", features = ["test-utils"] } diff --git a/prover/src/constraints/cpu.rs b/prover/src/constraints/cpu.rs index facc9e16d..4e3794a96 100644 --- a/prover/src/constraints/cpu.rs +++ b/prover/src/constraints/cpu.rs @@ -15,6 +15,9 @@ //! `JALR` is the `mem_flags` byte read directly: under `BRANCH` only the JALR bit //! of `mem_flags` can be set, so `mem_flags ∈ {0,1} = JALR` there. +use alloc::boxed::Box; +use alloc::vec; +use alloc::vec::Vec; use math::field::element::FieldElement; use math::field::traits::{IsField, IsSubFieldOf}; use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; diff --git a/prover/src/constraints/templates.rs b/prover/src/constraints/templates.rs index ef5b6c036..ec7177039 100644 --- a/prover/src/constraints/templates.rs +++ b/prover/src/constraints/templates.rs @@ -11,6 +11,8 @@ //! - lhs, rhs, sum: DWordWL (2 × 32-bit words) //! - Embeds carry constraints inline +use alloc::vec; +use alloc::vec::Vec; use math::field::element::FieldElement; use math::field::traits::{IsField, IsSubFieldOf}; use stark::{constraints::transition::TransitionConstraint, table::TableView}; diff --git a/prover/src/instruments.rs b/prover/src/instruments.rs index f15223e18..ef82f5ad2 100644 --- a/prover/src/instruments.rs +++ b/prover/src/instruments.rs @@ -1,3 +1,8 @@ +use alloc::format; +use alloc::string::{String, ToString}; +use alloc::vec; +use alloc::vec::Vec; +#[cfg(feature = "prove")] use std::collections::BTreeMap; use std::time::Duration; diff --git a/prover/src/lib.rs b/prover/src/lib.rs index 81233d39f..23d95ae23 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -10,6 +10,15 @@ //! assert!(lambda_vm_prover::verify(&vm_proof, &elf_bytes).unwrap()); //! ``` +#![cfg_attr(not(feature = "std"), no_std)] +// In guest builds (`prove` feature off) the prove-side helpers — trace generators, +// executor-typed imports, internal Operation structs, etc. — are unreferenced. +// They're real code, used by the host build, and there's nothing to fix there. +// Silence the resulting dead_code / unused_imports noise in the guest build only. +#![cfg_attr(not(feature = "prove"), allow(dead_code, unused_imports))] + +extern crate alloc; + #[cfg(feature = "disk-spill")] pub mod auto_storage; pub mod constraints; @@ -22,15 +31,24 @@ pub mod tables; pub mod test_utils; #[cfg(test)] pub mod tests; +pub mod vkey; -use std::fmt; +pub use vkey::VmVerifyingKey; + +use alloc::format; +use alloc::string::String; +use alloc::vec; +use alloc::vec::Vec; +use core::fmt; use crypto::fiat_shamir::default_transcript::DefaultTranscript; use crypto::fiat_shamir::is_transcript::IsTranscript; use executor::elf::Elf; +#[cfg(feature = "prove")] use executor::vm::execution::Executor; use math::field::element::FieldElement; use stark::config::Commitment; +#[cfg(feature = "prove")] use stark::prover::{IsStarkProver, Prover}; #[cfg(feature = "disk-spill")] use stark::storage_mode::StorageMode; @@ -56,7 +74,7 @@ use crate::test_utils::{ create_register_air, create_shift_air, create_store_air, }; -use stark::proof::options::{GoldilocksCubicProofOptions, ProofOptions}; +pub use stark::proof::options::{GoldilocksCubicProofOptions, ProofOptions}; use stark::proof::stark::MultiProof; /// A run-length encoded range of contiguous zero-initialized 4KB pages. @@ -201,7 +219,7 @@ impl fmt::Display for Error { } } -impl std::error::Error for Error {} +impl core::error::Error for Error {} /// Type alias for AIR-trace-public-inputs triples used in multi-table proving. type AirTracePair<'a> = ( @@ -243,6 +261,7 @@ pub(crate) struct VmAirs { impl VmAirs { /// Build `(air, trace, public_inputs)` triples for [`Prover::multi_prove`]. + #[cfg(feature = "prove")] pub fn air_trace_pairs<'a>(&'a self, traces: &'a mut Traces) -> Vec> { let mut pairs: Vec> = vec![ (&self.bitwise, &mut traces.bitwise, &()), @@ -418,6 +437,32 @@ impl VmAirs { table_counts: &TableCounts, decode_commitment: Option, page_commitments: Option<&[(u64, Commitment)]>, + ) -> Self { + Self::new_with_vkey( + elf, + proof_options, + minimal_bitwise, + page_configs, + table_counts, + decode_commitment, + page_commitments, + None, + ) + } + + /// Same as [`Self::new`] but accepts a precomputed [`VmVerifyingKey`]. + /// When `vkey` is `Some`, the bitwise preprocessed commitment is taken + /// from it instead of being recomputed from `proof_options` — that + /// recomputation is ~87% of verifier cycles inside the recursion guest. + pub fn new_with_vkey( + elf: &Elf, + proof_options: &ProofOptions, + minimal_bitwise: bool, + page_configs: &[crate::tables::page::PageConfig], + table_counts: &TableCounts, + decode_commitment: Option, + page_commitments: Option<&[(u64, Commitment)]>, + vkey: Option<&VmVerifyingKey>, ) -> Self { let cpus: Vec<_> = (0..table_counts.cpu) .map(|i| create_cpu_air(proof_options).with_name(&format!("CPU[{}]", i))) @@ -425,10 +470,12 @@ impl VmAirs { let bitwise = if minimal_bitwise { create_bitwise_air(proof_options) } else { - create_bitwise_air(proof_options).with_preprocessed( - bitwise::preprocessed_commitment(proof_options), - bitwise::NUM_PRECOMPUTED_COLS, - ) + let commitment = match vkey { + Some(vk) => vk.bitwise, + None => bitwise::preprocessed_commitment(proof_options), + }; + create_bitwise_air(proof_options) + .with_preprocessed(commitment, bitwise::NUM_PRECOMPUTED_COLS) }; let lts: Vec<_> = (0..table_counts.lt) .map(|i| create_lt_air(proof_options).with_name(&format!("LT[{}]", i))) @@ -445,10 +492,12 @@ impl VmAirs { let loads: Vec<_> = (0..table_counts.load) .map(|i| create_load_air(proof_options).with_name(&format!("LOAD[{}]", i))) .collect(); - let decode_root = decode_commitment.unwrap_or_else(|| { - decode::commitment_from_elf(elf, proof_options) - .expect("Failed to compute decode commitment") - }); + let decode_root = decode_commitment + .or_else(|| vkey.map(|vk| vk.decode)) + .unwrap_or_else(|| { + decode::commitment_from_elf(elf, proof_options) + .expect("Failed to compute decode commitment") + }); let decode = create_decode_air(proof_options) .with_preprocessed(decode_root, decode::NUM_PRECOMPUTED_COLS); let muls: Vec<_> = (0..table_counts.mul) @@ -464,17 +513,21 @@ impl VmAirs { let commit = create_commit_air(proof_options); let keccak = create_keccak_air(proof_options); let keccak_rnd = create_keccak_rnd_air(proof_options); + let keccak_rc_commitment = vkey + .map(|vk| vk.keccak_rc) + .unwrap_or_else(|| tables::keccak_rc::preprocessed_commitment(proof_options)); let keccak_rc = create_keccak_rc_air(proof_options).with_preprocessed( - tables::keccak_rc::preprocessed_commitment(proof_options), + keccak_rc_commitment, tables::keccak_rc::NUM_PRECOMPUTED_COLS, ); let ecsm = create_ecsm_air(proof_options); let ec_scalar = create_ec_scalar_air(proof_options); let ecdas = create_ecdas_air(proof_options); - let register = create_register_air(proof_options).with_preprocessed( - register::preprocessed_commitment(proof_options, elf.entry_point), - register::NUM_PREPROCESSED_COLS, - ); + let register_commitment = vkey + .map(|vk| vk.register) + .unwrap_or_else(|| register::preprocessed_commitment(proof_options, elf.entry_point)); + let register = create_register_air(proof_options) + .with_preprocessed(register_commitment, register::NUM_PREPROCESSED_COLS); // Every zero-init page shares one preprocessed commitment: OFFSET is // page-relative and INIT is all-zero, so it depends only on // (blowup, coset) — all fixed here. Compute it once (static const @@ -485,7 +538,8 @@ impl VmAirs { let pages: Vec<_> = page_configs .iter() - .map(|config| { + .enumerate() + .map(|(index, config)| { let air = create_page_air(proof_options, config.page_base); if config.is_private_input { // Private-input pages: all columns are main trace (not preprocessed). @@ -494,16 +548,21 @@ impl VmAirs { air } else if config.init_values.is_none() { // Zero-init pages: the shared commitment computed once above. + // `vkey.pages` caches the same static value for these slots, + // so the local lookup is equivalent and equally cheap. air.with_preprocessed(zero_init_commitment, page::NUM_PREPROCESSED_COLS) } else { // ELF data pages: INIT is program-specific, so the commitment is // per-page. Prefer a caller-supplied `(page_base, commitment)` - // (recursion guest); otherwise recompute from the ELF. + // (recursion guest), then the vkey's cached per-page root + // (indexed parallel to `page_configs`); otherwise recompute + // from the ELF. let commitment = page_commitments .unwrap_or(&[]) .iter() .find(|(pb, _)| *pb == config.page_base) .map(|(_, c)| *c) + .or_else(|| vkey.map(|vk| vk.pages[index])) .unwrap_or_else(|| { page::compute_precomputed_commitment(config, proof_options) }); @@ -650,11 +709,13 @@ pub(crate) fn compute_expected_commit_bus_balance( // ============================================================================= /// Prove an ELF binary execution. Returns a serializable proof bundle. +#[cfg(feature = "prove")] pub fn prove(elf_bytes: &[u8]) -> Result { prove_with_inputs(elf_bytes, &[]) } /// Prove an ELF binary execution with private inputs. Returns a serializable proof bundle. +#[cfg(feature = "prove")] pub fn prove_with_inputs(elf_bytes: &[u8], private_inputs: &[u8]) -> Result { prove_with_options_and_inputs( elf_bytes, @@ -672,6 +733,7 @@ pub fn prove_with_inputs(elf_bytes: &[u8], private_inputs: &[u8]) -> Result Result<(u64, u64), Error> { let program = Elf::load(elf_bytes).map_err(|e| Error::ElfLoad(format!("{e}")))?; let executor = Executor::new(&program, private_inputs.to_vec()) @@ -694,6 +756,7 @@ pub fn count_elements(elf_bytes: &[u8], private_inputs: &[u8]) -> Result<(u64, u } /// Prove an ELF binary execution with custom proof options and max rows config. +#[cfg(feature = "prove")] pub fn prove_with_options( elf_bytes: &[u8], proof_options: &ProofOptions, @@ -704,6 +767,7 @@ pub fn prove_with_options( /// Prove an ELF binary execution with custom proof options, max rows config, /// and explicit private inputs. +#[cfg(feature = "prove")] pub fn prove_with_options_and_inputs( elf_bytes: &[u8], private_inputs: &[u8], @@ -883,6 +947,30 @@ pub fn verify_with_options( proof_options: &ProofOptions, decode_commitment: Option, page_commitments: Option<&[(u64, Commitment)]>, +) -> Result { + verify_with_options_with_vkey( + vm_proof, + elf_bytes, + proof_options, + decode_commitment, + page_commitments, + None, + ) +} + +/// Same as [`verify_with_options`] but accepts a precomputed +/// [`VmVerifyingKey`]. When `vkey` is `Some`, the bitwise preprocessed +/// commitment is taken from it instead of being recomputed inside +/// `VmAirs::new`. A tampered vkey is caught by Fiat-Shamir: the verifier +/// feeds the supplied commitment into the transcript, derives different +/// challenges from what the prover used, and the openings stop matching. +pub fn verify_with_options_with_vkey( + vm_proof: &VmProof, + elf_bytes: &[u8], + proof_options: &ProofOptions, + decode_commitment: Option, + page_commitments: Option<&[(u64, Commitment)]>, + vkey: Option<&VmVerifyingKey>, ) -> Result { // Validate table_counts before constructing AIRs. // A malicious prover could set counts to 0, removing entire constraint sets. @@ -892,7 +980,7 @@ pub fn verify_with_options( // MAX_PRIVATE_INPUT_SIZE fits in ~26 pages of DEFAULT_PAGE_SIZE. { use crate::tables::page::DEFAULT_PAGE_SIZE; - use executor::vm::memory::MAX_PRIVATE_INPUT_SIZE; + use executor::constants::MAX_PRIVATE_INPUT_SIZE; let max_pages = (MAX_PRIVATE_INPUT_SIZE as usize + 4).div_ceil(DEFAULT_PAGE_SIZE) + 1; if vm_proof.num_private_input_pages > max_pages { return Err(Error::InvalidTableCounts(format!( @@ -923,7 +1011,7 @@ pub fn verify_with_options( ))); } - let airs = VmAirs::new( + let airs = VmAirs::new_with_vkey( &program, proof_options, false, @@ -931,6 +1019,7 @@ pub fn verify_with_options( &vm_proof.table_counts, decode_commitment, page_commitments, + vkey, ); // Recompute the COMMIT output bus offset from VmProof.public_output. @@ -974,6 +1063,7 @@ pub fn verify_with_options( } /// Prove and verify in one call (convenience). +#[cfg(feature = "prove")] pub fn prove_and_verify(elf_bytes: &[u8]) -> Result { let vm_proof = prove(elf_bytes)?; verify(&vm_proof, elf_bytes) diff --git a/prover/src/tables/bitwise.rs b/prover/src/tables/bitwise.rs index 468e2a5b2..1ac4eddd0 100644 --- a/prover/src/tables/bitwise.rs +++ b/prover/src/tables/bitwise.rs @@ -25,6 +25,9 @@ //! All lookups are provided as receivers with negative multiplicity, //! meaning other tables send to this table. +use alloc::vec; +use alloc::vec::Vec; + use math::fft::bit_reversing::in_place_bit_reverse_permute; use math::polynomial::Polynomial; use stark::config::{BatchedMerkleTree, Commitment}; diff --git a/prover/src/tables/branch.rs b/prover/src/tables/branch.rs index 1680b9edb..4c0b86f62 100644 --- a/prover/src/tables/branch.rs +++ b/prover/src/tables/branch.rs @@ -26,6 +26,8 @@ //! - Sender: IS_HALFWORD (×3 for next_pc_high[0..3]) //! - Receiver: BRANCH (provides branch targets to CPU) +use alloc::vec; +use alloc::vec::Vec; use math::field::element::FieldElement; use math::field::traits::{IsField, IsSubFieldOf}; use stark::constraints::transition::TransitionConstraint; @@ -155,9 +157,11 @@ impl BranchOperation { /// /// Duplicate operations (same pc, offset, register, jalr) are merged into a single row /// with their multiplicities summed. The table is then padded to the next power of 2. +#[cfg(feature = "prove")] pub fn generate_branch_trace( operations: &[BranchOperation], ) -> TraceTable { + #[cfg(feature = "prove")] use std::collections::HashMap; // Deduplicate operations: (pc, offset, register, jalr) -> multiplicity diff --git a/prover/src/tables/bytewise.rs b/prover/src/tables/bytewise.rs index 82d7c8772..0721f3183 100644 --- a/prover/src/tables/bytewise.rs +++ b/prover/src/tables/bytewise.rs @@ -16,6 +16,8 @@ //! - `res`: DWordBL (8 bytes) — output //! - `μ`: multiplicity +use alloc::vec; +use alloc::vec::Vec; use stark::lookup::{BusInteraction, BusValue, Multiplicity, Packing}; use stark::trace::TraceTable; @@ -97,7 +99,7 @@ impl BytewiseOperation { pub fn generate_bytewise_trace( operations: &[BytewiseOperation], ) -> TraceTable { - use std::collections::HashMap; + use hashbrown::HashMap; let mut op_map: HashMap = HashMap::new(); for op in operations { diff --git a/prover/src/tables/commit.rs b/prover/src/tables/commit.rs index c1663711e..88b0cdb97 100644 --- a/prover/src/tables/commit.rs +++ b/prover/src/tables/commit.rs @@ -43,6 +43,9 @@ //! - `count_decr_carry_0`: SUB template carry_0 for count_decr + 1 = count (degree 2) //! - `count_decr_carry_1`: SUB template carry_1 for count_decr + 1 = count (degree 2) //! +use alloc::boxed::Box; +use alloc::vec; +use alloc::vec::Vec; use math::field::element::FieldElement; use math::field::traits::{IsField, IsSubFieldOf}; use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; diff --git a/prover/src/tables/cpu.rs b/prover/src/tables/cpu.rs index 1752022b9..6a469ce73 100644 --- a/prover/src/tables/cpu.rs +++ b/prover/src/tables/cpu.rs @@ -26,6 +26,9 @@ use super::types::{BusId, DecodeEntry, FE, GoldilocksExtension, GoldilocksField, VmTable, alu_op}; use crate::Error; +use alloc::vec; +use alloc::vec::Vec; +#[cfg(feature = "prove")] use executor::vm::{ instruction::{decoding::Instruction, execution::SyscallNumbers}, logs::Log, @@ -216,6 +219,7 @@ impl CpuOperation { } /// Creates a CpuOperation from an executor Log and a DecodeEntry. + #[cfg(feature = "prove")] pub fn from_log(log: &Log, timestamp: u64, decode: DecodeEntry) -> Self { let f = decode.fields; // Real byte length: the column stores half. @@ -228,8 +232,7 @@ impl CpuOperation { } else { (0, 0) }; - let ecall_keccak = - f.ecall && log.src1_val == executor::vm::instruction::execution::KECCAK_SYSCALL_NUMBER; + let ecall_keccak = f.ecall && log.src1_val == executor::constants::KECCAK_SYSCALL_NUMBER; let keccak_state_addr = if ecall_keccak { log.src2_val } else { 0 }; // The ECSM operand addresses (x10/x11/x12) are recovered from the register state // in the trace builder. @@ -377,6 +380,7 @@ impl CpuOperation { } /// Creates a CpuOperation from Log and Instruction (convenience). + #[cfg(feature = "prove")] pub fn from_log_and_instruction(log: &Log, timestamp: u64, instruction: Instruction) -> Self { let decode = DecodeEntry::from_instruction(log.current_pc, instruction, 4); Self::from_log(log, timestamp, decode) @@ -555,6 +559,7 @@ pub fn generate_cpu_trace( } /// Generates the CPU trace table directly from executor logs. +#[cfg(feature = "prove")] pub fn generate_cpu_trace_from_logs( logs: &[Log], instructions: &U64HashMap, @@ -582,6 +587,7 @@ pub fn collect_bitwise_ops(operations: &[CpuOperation]) -> Vec, diff --git a/prover/src/tables/cpu32.rs b/prover/src/tables/cpu32.rs index d7dbd5d6f..bd4a4aeea 100644 --- a/prover/src/tables/cpu32.rs +++ b/prover/src/tables/cpu32.rs @@ -17,6 +17,9 @@ //! //! Register reads use the cast-to-`DWordWL` encoding. +use alloc::boxed::Box; +use alloc::vec; +use alloc::vec::Vec; use math::field::element::FieldElement; use math::field::traits::{IsField, IsSubFieldOf}; use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; @@ -280,7 +283,7 @@ fn register_dword(lo0: usize, lo1: usize, hi: usize) -> Vec { packing: Packing::Direct, }, ]; - v.extend(std::iter::repeat_n(BusValue::constant(0), 6)); + v.extend(core::iter::repeat_n(BusValue::constant(0), 6)); v } @@ -349,7 +352,7 @@ fn reg_write( packing: Packing::Direct, }, ]; - values.extend(std::iter::repeat_n(BusValue::constant(0), 6)); // value[2..8] + values.extend(core::iter::repeat_n(BusValue::constant(0), 6)); // value[2..8] values.extend(timestamp_plus(ts_offset)); values.push(BusValue::constant(1)); // write2 = 1 values.push(BusValue::constant(0)); // write4 diff --git a/prover/src/tables/decode.rs b/prover/src/tables/decode.rs index 6cef6a482..6d8d448d0 100644 --- a/prover/src/tables/decode.rs +++ b/prover/src/tables/decode.rs @@ -31,6 +31,8 @@ //! //! - **Receiver**: DECODE bus - receives lookups from CPU table +use alloc::vec; +use alloc::vec::Vec; use executor::elf::Elf; use executor::vm::instruction::decoding::{Instruction, InstructionError}; use executor::vm::memory::U64HashMap; @@ -85,7 +87,7 @@ pub const NUM_PRECOMPUTED_COLS: usize = 5; // Trace generation // ========================================================================= -use std::collections::HashMap; +use hashbrown::HashMap; /// Map from PC to row index in the DECODE trace table. pub type PcToRow = HashMap; @@ -176,6 +178,7 @@ pub fn generate_decode_trace( /// Updates multiplicities in the DECODE trace table. /// /// For each PC in `lookups`, increments the MU column in the corresponding row. +#[cfg(feature = "prove")] pub fn update_multiplicities( trace: &mut TraceTable, pc_to_row: &PcToRow, @@ -349,6 +352,7 @@ pub fn commitment_from_elf( // ========================================================================= /// Result of ELF processing for DECODE table. +#[cfg(feature = "prove")] pub struct ElfTables { /// DECODE trace table pub decode: TraceTable, @@ -364,6 +368,7 @@ pub struct ElfTables { /// - `pc_to_row`: Map from PC to row index for DECODE multiplicity updates /// /// Table has multiplicities initialized to 0. +#[cfg(feature = "prove")] pub fn tables_from_elf(elf: &Elf) -> Result { let mut decode_entries = Vec::new(); let mut pc_to_row = HashMap::with_capacity(elf.data.iter().map(|s| s.values.len()).sum()); @@ -387,6 +392,7 @@ pub fn tables_from_elf(elf: &Elf) -> Result { } /// Build DECODE trace table from entries. +#[cfg(feature = "prove")] fn build_decode_table( entries: Vec, pc_to_row: &mut PcToRow, diff --git a/prover/src/tables/dvrm.rs b/prover/src/tables/dvrm.rs index d3adbdc53..6d70b4e26 100644 --- a/prover/src/tables/dvrm.rs +++ b/prover/src/tables/dvrm.rs @@ -29,6 +29,9 @@ //! - Sender: ZERO (×5 for div_by_zero, overflow, NEG template) //! - Receiver: DVRM (×2 for quotient and remainder results) +use alloc::vec; +use alloc::vec::Vec; +#[cfg(feature = "prove")] use std::collections::HashMap; use math::field::element::FieldElement; @@ -284,6 +287,7 @@ impl DvrmOperation { /// /// # Arguments /// * `operations` - List of (DvrmOperation, wants_remainder) pairs +#[cfg(feature = "prove")] pub fn generate_dvrm_trace( operations: &[(DvrmOperation, bool)], ) -> TraceTable { diff --git a/prover/src/tables/ec_scalar.rs b/prover/src/tables/ec_scalar.rs index dd8d483a2..7779008c5 100644 --- a/prover/src/tables/ec_scalar.rs +++ b/prover/src/tables/ec_scalar.rs @@ -16,6 +16,9 @@ //! //! `limb = Σ 2^i · limb_bits[i]` is virtual (a linear combination, never stored). +use alloc::boxed::Box; +use alloc::vec; +use alloc::vec::Vec; use math::field::element::FieldElement; use math::field::traits::{IsField, IsSubFieldOf}; use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; diff --git a/prover/src/tables/ecdas.rs b/prover/src/tables/ecdas.rs index 6d508d363..a6118026b 100644 --- a/prover/src/tables/ecdas.rs +++ b/prover/src/tables/ecdas.rs @@ -10,6 +10,9 @@ //! See `spec/src/ecdas.toml`. Constraints are **unconditional**; padding rows set the quotients //! to `r` and `op = 0`, which makes every relation hold with zero carries. +use alloc::boxed::Box; +use alloc::vec; +use alloc::vec::Vec; use math::field::element::FieldElement; use math::field::traits::{IsField, IsSubFieldOf}; use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; diff --git a/prover/src/tables/ecsm.rs b/prover/src/tables/ecsm.rs index f8ec0859d..bed4418c6 100644 --- a/prover/src/tables/ecsm.rs +++ b/prover/src/tables/ecsm.rs @@ -17,6 +17,9 @@ //! drops when `µ = 0`). Only that single `µ·b` term is µ-gated. The range checks / //! virtual-carry checks remain µ-gated as before. +use alloc::boxed::Box; +use alloc::vec; +use alloc::vec::Vec; use executor::vm::instruction::execution::ECSM_SYSCALL_NUMBER; use math::field::element::FieldElement; use math::field::traits::{IsField, IsSubFieldOf}; @@ -261,12 +264,12 @@ fn memw_write( /// The eight bytes of a 256-bit value at `col + 8*chunk` as MEMW value elements. fn dword_bytes(col: usize, chunk: usize) -> [BusValue; 8] { - std::array::from_fn(|b| packed(col + 8 * chunk + b)) + core::array::from_fn(|b| packed(col + 8 * chunk + b)) } /// A register value `[lo, hi, 0, 0, 0, 0, 0, 0]` as MEMW value elements. fn register_value(lo_col: usize, hi_col: usize) -> [BusValue; 8] { - let mut v: [BusValue; 8] = std::array::from_fn(|_| BusValue::constant(0)); + let mut v: [BusValue; 8] = core::array::from_fn(|_| BusValue::constant(0)); v[0] = packed(lo_col); v[1] = packed(hi_col); v @@ -760,7 +763,7 @@ where let inv = FieldElement::::from(INV_SHIFT_32); let hl = kind.addend_hl_base(); let bl = kind.sum_bl_base(); - let mut c: [FieldElement; 8] = std::array::from_fn(|_| FieldElement::zero()); + let mut c: [FieldElement; 8] = core::array::from_fn(|_| FieldElement::zero()); let mut prev = FieldElement::::zero(); for (i, slot) in c.iter_mut().enumerate() { // addend1 word i (from halfwords): hl[2i] + 2^16·hl[2i+1] diff --git a/prover/src/tables/eq.rs b/prover/src/tables/eq.rs index 453caa928..459beb6da 100644 --- a/prover/src/tables/eq.rs +++ b/prover/src/tables/eq.rs @@ -21,6 +21,9 @@ //! four range-checked halves is `0` iff `diff == 0` iff `a == b`), and //! `res = eq XOR invert`. +use alloc::boxed::Box; +use alloc::vec; +use alloc::vec::Vec; use math::field::element::FieldElement; use math::field::traits::{IsField, IsSubFieldOf}; use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; @@ -120,7 +123,7 @@ impl EqOperation { pub fn generate_eq_trace( operations: &[EqOperation], ) -> TraceTable { - use std::collections::HashMap; + use hashbrown::HashMap; let mut op_map: HashMap = HashMap::new(); for op in operations { diff --git a/prover/src/tables/halt.rs b/prover/src/tables/halt.rs index 44bbf26cb..319653473 100644 --- a/prover/src/tables/halt.rs +++ b/prover/src/tables/halt.rs @@ -27,6 +27,8 @@ //! ## Padding //! Single-row table (2^0 = 1), no padding needed. +use alloc::vec; +use alloc::vec::Vec; use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing}; use stark::trace::TraceTable; diff --git a/prover/src/tables/keccak.rs b/prover/src/tables/keccak.rs index 0f305255b..3be69d15d 100644 --- a/prover/src/tables/keccak.rs +++ b/prover/src/tables/keccak.rs @@ -15,7 +15,10 @@ //! | state_ptr | 100 | Per-lane DWordHL addresses [25][4] | //! | mu | 1 | Multiplicity flag | -use executor::vm::instruction::execution::KECCAK_SYSCALL_NUMBER; +use alloc::boxed::Box; +use alloc::vec; +use alloc::vec::Vec; +use executor::constants::KECCAK_SYSCALL_NUMBER; use math::field::element::FieldElement; use math::field::traits::{IsField, IsSubFieldOf}; use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; diff --git a/prover/src/tables/keccak_rc.rs b/prover/src/tables/keccak_rc.rs index 3575c8ba1..8fafcf45e 100644 --- a/prover/src/tables/keccak_rc.rs +++ b/prover/src/tables/keccak_rc.rs @@ -8,6 +8,9 @@ //! committed via a static lookup table (with recompute as fallback for //! `ProofOptions` not covered by the static table). +use alloc::vec; +use alloc::vec::Vec; + use math::fft::bit_reversing::in_place_bit_reverse_permute; use math::polynomial::Polynomial; use stark::config::{BatchedMerkleTree, Commitment}; diff --git a/prover/src/tables/keccak_rnd.rs b/prover/src/tables/keccak_rnd.rs index 279b5c152..fe231f531 100644 --- a/prover/src/tables/keccak_rnd.rs +++ b/prover/src/tables/keccak_rnd.rs @@ -28,7 +28,10 @@ //! `Cxz_right` is typed `[Bit, 4]` per spec d75944ee — HWSL with shift=1 //! produces a single-bit carry, range-checked via IS_BIT polynomial constraints. -use executor::vm::instruction::execution::{KECCAK_RC, KECCAK_RHO}; +use alloc::boxed::Box; +use alloc::vec; +use alloc::vec::Vec; +use executor::constants::{KECCAK_RC, KECCAK_RHO}; use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing}; use stark::trace::TraceTable; @@ -40,6 +43,7 @@ use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField, VmTable, alu // ========================================================================= pub mod cols { + use executor::constants::KECCAK_RHO; pub const TIMESTAMP_0: usize = 0; pub const TIMESTAMP_1: usize = 1; pub const ROUND: usize = 2; @@ -159,7 +163,6 @@ pub mod cols { /// pair whose sum equals pi[x][y][z]. rbc is compile-time constant. #[inline] pub fn pi_src_cols(x: usize, y: usize, z: usize) -> (usize, usize) { - use executor::vm::instruction::execution::KECCAK_RHO; let sx = (x + 3 * y) % 5; let sy = x; let rho_offset = KECCAK_RHO[sx][sy] as usize; @@ -239,6 +242,7 @@ fn hwsl(halfword: u16, shift: u8) -> (u16, u16) { /// /// Each `KeccakRoundOperation` produces 24 rows (one per round). The trace /// computes all intermediate values (θ, ρ, π, χ, ι) at byte granularity. +#[cfg(feature = "prove")] pub fn generate_keccak_rnd_trace( ops: &[KeccakRoundOperation], ) -> TraceTable { diff --git a/prover/src/tables/load.rs b/prover/src/tables/load.rs index 250d565b2..1c56b41df 100644 --- a/prover/src/tables/load.rs +++ b/prover/src/tables/load.rs @@ -23,6 +23,9 @@ //! - Sender: MEMW (to read from memory) //! - Sender: MSB8 (for sign bit extraction) +use alloc::boxed::Box; +use alloc::vec; +use alloc::vec::Vec; use math::field::element::FieldElement; use math::field::traits::{IsField, IsSubFieldOf}; use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; diff --git a/prover/src/tables/lt.rs b/prover/src/tables/lt.rs index 0b1a57616..5b6359d4d 100644 --- a/prover/src/tables/lt.rs +++ b/prover/src/tables/lt.rs @@ -26,6 +26,8 @@ //! - Receiver: ALU (all less-than lookups — CPU SLT/BLT/BGE dispatch and the //! internal `memw`/`memw_aligned`/`dvrm` timestamp / |r|<|d| checks) +use alloc::vec; +use alloc::vec::Vec; use math::field::element::FieldElement; use math::field::traits::{IsField, IsSubFieldOf}; use stark::constraints::transition::TransitionConstraint; @@ -158,9 +160,11 @@ impl LtOperation { /// /// Duplicate operations (same lhs, rhs, signed) are merged into a single row /// with their multiplicities summed. The table is then padded to the next power of 2. +#[cfg(feature = "prove")] pub fn generate_lt_trace( operations: &[LtOperation], ) -> TraceTable { + #[cfg(feature = "prove")] use std::collections::HashMap; // Deduplicate operations: (lhs, rhs, signed) -> multiplicity diff --git a/prover/src/tables/memw.rs b/prover/src/tables/memw.rs index 2b240747c..36b9cbdb1 100644 --- a/prover/src/tables/memw.rs +++ b/prover/src/tables/memw.rs @@ -29,6 +29,9 @@ //! //! ## Constraints (11 total: 2 custom + 2 IS_BIT for multiplicities + 7 IS_BIT for carry) +use alloc::boxed::Box; +use alloc::vec; +use alloc::vec::Vec; use math::field::element::FieldElement; use math::field::traits::{IsField, IsSubFieldOf}; use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; diff --git a/prover/src/tables/memw_aligned.rs b/prover/src/tables/memw_aligned.rs index 8042d9052..75f17662d 100644 --- a/prover/src/tables/memw_aligned.rs +++ b/prover/src/tables/memw_aligned.rs @@ -34,6 +34,9 @@ //! - IS_HALF[base_address[i]] for i ∈ [0, 1] //! - IS_WORD[base_address[2]] +use alloc::boxed::Box; +use alloc::vec; +use alloc::vec::Vec; use math::field::element::FieldElement; use math::field::traits::{IsField, IsSubFieldOf}; use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; diff --git a/prover/src/tables/memw_register.rs b/prover/src/tables/memw_register.rs index 14a696cb9..2d5740c1d 100644 --- a/prover/src/tables/memw_register.rs +++ b/prover/src/tables/memw_register.rs @@ -38,6 +38,9 @@ //! - 4 Memory bus tokens (read-old + write-new, per word) //! - 2 MEMW output interactions (read + write, from CPU) +use alloc::boxed::Box; +use alloc::vec; +use alloc::vec::Vec; use math::field::element::FieldElement; use math::field::traits::{IsField, IsSubFieldOf}; use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; diff --git a/prover/src/tables/mul.rs b/prover/src/tables/mul.rs index ba414dc63..3406c242a 100644 --- a/prover/src/tables/mul.rs +++ b/prover/src/tables/mul.rs @@ -30,6 +30,9 @@ //! - Receiver: ALU (×2 for lo and hi results — every MUL lookup, CPU //! MUL/MULH dispatch and dvrm's internal `d*q` consistency) +use alloc::vec; +use alloc::vec::Vec; +#[cfg(feature = "prove")] use std::collections::HashMap; use math::field::element::FieldElement; @@ -292,6 +295,7 @@ impl MulOperation { /// /// # Arguments /// * `operations` - List of (MulOperation, wants_hi) pairs +#[cfg(feature = "prove")] pub fn generate_mul_trace( operations: &[(MulOperation, bool)], ) -> TraceTable { @@ -798,8 +802,8 @@ impl MulConstraint { // Build sign-extended values let sign_fill = FieldElement::::from(SIGN_FILL); - let mut lhs_ext: [FieldElement; 8] = std::array::from_fn(|_| FieldElement::zero()); - let mut rhs_ext: [FieldElement; 8] = std::array::from_fn(|_| FieldElement::zero()); + let mut lhs_ext: [FieldElement; 8] = core::array::from_fn(|_| FieldElement::zero()); + let mut rhs_ext: [FieldElement; 8] = core::array::from_fn(|_| FieldElement::zero()); lhs_ext[..4].clone_from_slice(&lhs); rhs_ext[..4].clone_from_slice(&rhs); diff --git a/prover/src/tables/page.rs b/prover/src/tables/page.rs index 174225ffa..edb9c8f36 100644 --- a/prover/src/tables/page.rs +++ b/prover/src/tables/page.rs @@ -30,7 +30,10 @@ //! | PAGE-C3 | Memory | `[0, address, 0, init]` | -1 (receiver) | //! | PAGE-C4 | Memory | `[0, address, timestamp, fini]` | 1 (sender) | -use std::collections::HashMap; +use alloc::vec; +use alloc::vec::Vec; +#[cfg(feature = "prove")] +use hashbrown::HashMap; use math::fft::bit_reversing::in_place_bit_reverse_permute; use math::polynomial::Polynomial; @@ -50,7 +53,7 @@ use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField, VmTable}; pub const DEFAULT_PAGE_SIZE: usize = 1 << 18; /// Stack top address (where SP starts). Re-exported from executor. -pub use executor::vm::registers::STACK_TOP; +pub use executor::constants::STACK_TOP; // ========================================================================= // Column indices for PAGE table @@ -98,6 +101,7 @@ pub struct FinalByteState { } /// Map from byte address to final state. +#[cfg(feature = "prove")] pub type FinalStateMap = HashMap; /// Configuration for a single PAGE table instance. @@ -163,6 +167,7 @@ impl PageConfig { /// ## Returns /// /// The trace table for this page. +#[cfg(feature = "prove")] pub fn generate_page_trace( config: &PageConfig, final_state: &FinalStateMap, @@ -333,6 +338,26 @@ pub fn compute_precomputed_commitment(config: &PageConfig, options: &ProofOption tree.root } +/// Returns a page's preprocessed commitment, preferring the cheap path. +/// +/// Zero-init pages (INIT is all-zero) share a single commitment that depends +/// only on `(blowup, coset)`, so they resolve to the static lookup in +/// [`zero_init_preprocessed_commitment`] instead of rebuilding the FFT + +/// Merkle tree. ELF data pages have program-specific INIT and fall through +/// to [`compute_precomputed_commitment`]. This mirrors the per-page choice +/// made in `VmAirs::new_with_vkey`, so a vkey built from this function caches +/// exactly the commitments the verifier expects. +/// +/// Private-input pages have no preprocessed commitment; callers must skip +/// them before calling this. +pub fn precomputed_commitment_cached(config: &PageConfig, options: &ProofOptions) -> Commitment { + if config.init_values.is_none() { + zero_init_preprocessed_commitment(options) + } else { + compute_precomputed_commitment(config, options) + } +} + /// Returns the zero-init PAGE preprocessed commitment. /// /// Looks up `blowup_factor` in [`static_zero_page_commitment`] when diff --git a/prover/src/tables/register.rs b/prover/src/tables/register.rs index 5a09fb2fa..26431fc16 100644 --- a/prover/src/tables/register.rs +++ b/prover/src/tables/register.rs @@ -18,6 +18,9 @@ //! | fini | Word | Final value after execution | //! | timestamp | DWordWL | Final timestamp (1 if never accessed) | +use alloc::vec; +use alloc::vec::Vec; +#[cfg(feature = "prove")] use std::collections::HashMap; use math::fft::bit_reversing::in_place_bit_reverse_permute; @@ -91,6 +94,7 @@ pub struct FinalRegisterWordState { } /// Map from register Word address to final state. +#[cfg(feature = "prove")] pub type FinalRegisterStateMap = HashMap; // ========================================================================= @@ -144,6 +148,7 @@ fn init_value_for_address(word_addr: u64, entry_point: u64) -> u32 { /// ## Returns /// /// The trace table for registers. +#[cfg(feature = "prove")] pub fn generate_register_trace( final_state: &FinalRegisterStateMap, entry_point: u64, diff --git a/prover/src/tables/shift.rs b/prover/src/tables/shift.rs index 3115784f6..453c8736f 100644 --- a/prover/src/tables/shift.rs +++ b/prover/src/tables/shift.rs @@ -17,6 +17,8 @@ //! - Senders: MSB16, BYTE_ALU[AND] (×3), ZERO, HWSL (×5), IS_HALFWORD (×4) //! - Receiver: SHIFT (from CPU) +use alloc::vec; +use alloc::vec::Vec; use math::field::element::FieldElement; use math::field::traits::{IsField, IsSubFieldOf}; use stark::constraints::transition::TransitionConstraint; diff --git a/prover/src/tables/store.rs b/prover/src/tables/store.rs index 1cdf0334e..6960d4ed2 100644 --- a/prover/src/tables/store.rs +++ b/prover/src/tables/store.rs @@ -19,6 +19,9 @@ //! - `value`: DWordBL (8 bytes) — value to store //! - `μ`: multiplicity +use alloc::boxed::Box; +use alloc::vec; +use alloc::vec::Vec; use math::field::element::FieldElement; use math::field::traits::{IsField, IsSubFieldOf}; use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 02371c1a0..42103f266 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -25,13 +25,20 @@ //! // Use traces.cpus, traces.bitwise, traces.lts, traces.memws, traces.loads //! ``` +use alloc::vec::Vec; +use alloc::format; +use alloc::vec; +#[cfg(feature = "prove")] use std::collections::HashMap; #[cfg(feature = "disk-spill")] use std::collections::HashSet; use executor::elf::Elf; +#[cfg(feature = "prove")] use executor::vm::instruction::decoding::Instruction; +#[cfg(feature = "prove")] use executor::vm::logs::Log; +#[cfg(feature = "prove")] use executor::vm::memory::U64HashMap; #[cfg(feature = "disk-spill")] use stark::storage_mode::StorageMode; @@ -59,12 +66,18 @@ use super::memw::{self, MemwOperation}; use super::memw_aligned; use super::memw_register; use super::mul::{self, MulOperation}; -use super::page::{self, FinalByteState, FinalStateMap, PageConfig}; -use super::register::{self, FinalRegisterStateMap, FinalRegisterWordState}; +use super::page::{self, PageConfig}; +#[cfg(feature = "prove")] +use super::page::{FinalByteState, FinalStateMap}; +#[cfg(feature = "prove")] +use super::register::FinalRegisterStateMap; +use super::register::{self, FinalRegisterWordState}; use super::shift::{self, ShiftOperation}; use super::store; use super::types::{GoldilocksExtension, GoldilocksField}; use crate::Error; +#[cfg(feature = "prove")] +use crate::tables::decode::PcToRow; // ============================================================================= // Memory and Register State Tracking @@ -77,11 +90,13 @@ type MemoryCell = (u8, u64); type RegisterCell = (u64, u64); /// Memory state tracker for generating MEMW/LOAD traces. +#[cfg(feature = "prove")] struct MemoryState { /// Map from byte address to (value, timestamp) cells: HashMap, } +#[cfg(feature = "prove")] impl MemoryState { fn new() -> Self { Self { @@ -128,7 +143,8 @@ impl MemoryState { if private_input.is_empty() { return; } - use executor::vm::memory::PRIVATE_INPUT_START_INDEX; + #[cfg(feature = "prove")] + use executor::constants::PRIVATE_INPUT_START_INDEX; let start = PRIVATE_INPUT_START_INDEX; for (i, &b) in private_input_bytes(private_input).iter().enumerate() { self.cells.insert(start + i as u64, (b, 0)); @@ -167,6 +183,7 @@ impl MemoryState { } /// Register state tracker for generating MEMW register traces. +#[cfg(feature = "prove")] struct RegisterState { /// Register file: (value, last_write_timestamp) regs: [RegisterCell; 32], @@ -176,6 +193,7 @@ struct RegisterState { pc_register: RegisterCell, } +#[cfg(feature = "prove")] impl RegisterState { fn new(entry_point: u64) -> Self { // Per spec/memory.typ: "register initialization happens at timestamp 1" @@ -296,6 +314,7 @@ impl RegisterState { // ============================================================================= /// Get byte count and signed flag from CpuOperation memory flags. +#[cfg(feature = "prove")] fn cpu_op_to_bytes_and_signed(op: &CpuOperation) -> (usize, bool) { let f = &op.decode.fields; (f.mem_bytes(), f.mem_signed()) @@ -304,6 +323,7 @@ fn cpu_op_to_bytes_and_signed(op: &CpuOperation) -> (usize, bool) { /// Pack a 64-bit register value into the MEMW value format. /// /// For register operations, values are packed as [lo32, hi32, 0, 0, 0, 0, 0, 0]. +#[cfg(feature = "prove")] fn pack_register_value(value: u64) -> [u64; 8] { [value & 0xFFFF_FFFF, value >> 32, 0, 0, 0, 0, 0, 0] } @@ -315,6 +335,7 @@ fn pack_register_value(value: u64) -> [u64; 8] { /// Collects CPU operations from execution logs. /// /// Returns a vector of CpuOperation, one per log entry. +#[cfg(feature = "prove")] fn collect_cpu_ops( logs: &[Log], instructions: &U64HashMap, @@ -356,6 +377,7 @@ fn collect_cpu_ops( /// Returns: (memw_ops, load_ops, lt_ops, shift_ops, bitwise_ops, commit_ops, keccak_ops, /// cpu32_ops, ecsm_ops, ec_scalar_ops, ecdas_ops) #[allow(clippy::type_complexity)] +#[cfg(feature = "prove")] fn collect_ops_from_cpu( cpu_ops: &[CpuOperation], memory_state: &mut MemoryState, @@ -534,6 +556,7 @@ fn collect_ops_from_cpu( /// Collects a LOAD operation and corresponding MEMW read from CpuOperation. /// /// Returns: (memw_op, load_op, bitwise_ops) +#[cfg(feature = "prove")] fn collect_load_op_from_cpu( op: &CpuOperation, memory_state: &mut MemoryState, @@ -596,6 +619,7 @@ fn collect_load_op_from_cpu( /// Collects a STORE operation as a MEMW write from CpuOperation. /// /// Returns: memw_op +#[cfg(feature = "prove")] fn collect_store_op_from_cpu(op: &CpuOperation, memory_state: &mut MemoryState) -> MemwOperation { // res contains the effective address (base + offset) let base_address = op.res; @@ -643,6 +667,7 @@ fn collect_store_op_from_cpu(op: &CpuOperation, memory_state: &mut MemoryState) /// `memory_state` / `register_state` (the offline read-old + write-new model), so later /// accesses always observe a strictly smaller old timestamp. #[allow(clippy::needless_range_loop)] +#[cfg(feature = "prove")] fn collect_ecsm_ops( op: &CpuOperation, memory_state: &mut MemoryState, @@ -760,6 +785,7 @@ fn collect_ecsm_ops( /// Collects register read/write operations (M1, M3, M5) from CpuOperation. /// /// Returns: Vec of MEMW operations for register accesses +#[cfg(feature = "prove")] fn collect_register_ops_from_cpu( op: &CpuOperation, register_state: &mut RegisterState, @@ -996,6 +1022,7 @@ fn cpu32_chip_op( /// Note: x17 (syscall number) is read by CPU's M1 interaction (read_register1=true, rs1=17). /// /// Returns: Vec of MEMW operations +#[cfg(feature = "prove")] fn collect_commit_memw_ops( op: &CpuOperation, register_state: &mut RegisterState, @@ -1092,6 +1119,7 @@ fn collect_commit_memw_ops( /// REGISTER final token is set separately by the caller, at the last padding /// timestamp). Also updates `register_state` so `to_final_state_map()` reflects /// the finalized GP register values. +#[cfg(feature = "prove")] fn collect_halt_ops(register_state: &mut RegisterState) -> Vec { let mut ops = Vec::with_capacity(32); let ts = u64::MAX; @@ -1147,6 +1175,7 @@ fn collect_halt_ops(register_state: &mut RegisterState) -> Vec { /// /// Generates 25 read operations (input lanes at timestamp) and 25 write /// operations (output lanes at timestamp+1). Each operation is 8 bytes wide. +#[cfg(feature = "prove")] fn collect_keccak_memw_ops( op: &CpuOperation, input: &[u64; 25], @@ -1215,6 +1244,7 @@ fn collect_keccak_memw_ops( /// - MEMW-C4 through MEMW-C7: old_timestamp[i] < timestamp (based on width) /// /// Returns: Vec of LT operations +#[cfg(feature = "prove")] fn collect_lt_from_memw(memw_ops: &[MemwOperation]) -> Vec { let mut lt_ops = Vec::with_capacity(memw_ops.len() * 8); @@ -1267,6 +1297,7 @@ fn collect_lt_from_memw(memw_ops: &[MemwOperation]) -> Vec { /// Collects LT operations from MEMW_A for timestamp ordering. /// /// Each aligned operation has a single old_timestamp < timestamp check. +#[cfg(feature = "prove")] fn collect_lt_from_memw_aligned(memw_aligned_ops: &[MemwOperation]) -> Vec { // Address overflow LT checks (R1-R3 in MEMW) are intentionally absent. // Alignment guarantees addr + (width-1) never wraps: the largest width-N @@ -1282,6 +1313,7 @@ fn collect_lt_from_memw_aligned(memw_aligned_ops: &[MemwOperation]) -> Vec 1: base_address is aligned to width (low bits are zero) /// 2. All accessed bytes share the same old_timestamp +#[cfg(feature = "prove")] fn is_aligned_op(op: &MemwOperation) -> bool { let low = (op.base_address & 0xFFFF_FFFF) as u32; let width = op.width as u32; @@ -1308,6 +1340,7 @@ fn is_aligned_op(op: &MemwOperation) -> bool { /// /// IS_HALF[base_address[i]] for i ∈ [0, 1] and IS_WORD[base_address[2]] are /// assumptions — the caller's (CPU's) responsibility. +#[cfg(feature = "prove")] fn collect_bitwise_from_memw_aligned(ops: &[MemwOperation]) -> Vec { let mut bitwise_ops = Vec::with_capacity(ops.len()); @@ -1351,6 +1384,7 @@ fn collect_bitwise_from_memw_aligned(ops: &[MemwOperation]) -> Vec bool { if !op.is_register || op.width != 2 { return false; @@ -1372,6 +1406,7 @@ pub(crate) fn is_register_op(op: &MemwOperation) -> bool { /// /// For each register op: checks that `timestamp[0] - old_timestamp_lo - 1` fits /// in a halfword (proving the timestamp delta is in range [1, 2^16]). +#[cfg(feature = "prove")] fn collect_bitwise_from_memw_register(ops: &[MemwOperation]) -> Vec { ops.iter() .map(|op| { @@ -1398,6 +1433,7 @@ fn collect_bitwise_from_memw_register(ops: &[MemwOperation]) -> Vec Vec { let mut bitwise_ops = Vec::with_capacity(lt_ops.len() * 8); @@ -1457,6 +1493,7 @@ fn collect_bitwise_from_lt(lt_ops: &[LtOperation]) -> Vec { /// op that spans two instances is sent twice and must be tallied twice. /// /// Returns: Vec of bitwise lookups +#[cfg(feature = "prove")] pub(crate) fn collect_bitwise_from_mul( mul_ops: &[(MulOperation, bool)], max_rows_mul: usize, @@ -1514,7 +1551,7 @@ pub(crate) fn collect_bitwise_from_mul( // MSB16: dedup per chunk — the MUL AIR sends Msb16 once per unique signed row // per instance, so the collector must mirror the same chunk boundary. for chunk in mul_ops.chunks(max_rows_mul) { - let mut msb16_seen = std::collections::HashSet::new(); + let mut msb16_seen = hashbrown::HashSet::new(); for (op, _wants_hi) in chunk { if msb16_seen.insert((op.lhs, op.lhs_signed, op.rhs, op.rhs_signed)) { if op.lhs_signed { @@ -1554,6 +1591,7 @@ pub(crate) fn collect_bitwise_from_mul( /// chunk, mirroring `chunk_and_generate`. /// /// Returns: Vec of bitwise lookups +#[cfg(feature = "prove")] pub(crate) fn collect_bitwise_from_dvrm( dvrm_ops: &[(DvrmOperation, bool)], max_rows_dvrm: usize, @@ -1640,7 +1678,7 @@ pub(crate) fn collect_bitwise_from_dvrm( // MSB16: same per-chunk dedup as MUL (Column(SIGNED) is a bit, not a count). for chunk in dvrm_ops.chunks(max_rows_dvrm) { - let mut msb16_seen = std::collections::HashSet::new(); + let mut msb16_seen = hashbrown::HashSet::new(); for (op, _wants_remainder) in chunk { if op.signed && msb16_seen.insert(op.clone()) { let r = op.compute_remainder(); @@ -1674,7 +1712,7 @@ pub(crate) fn collect_bitwise_from_dvrm( // ZERO (NEG template): same — SIGN_R/SIGN_D are bits, dedup per chunk. for chunk in dvrm_ops.chunks(max_rows_dvrm) { - let mut zero_seen = std::collections::HashSet::new(); + let mut zero_seen = hashbrown::HashSet::new(); for (op, _wants_remainder) in chunk { if zero_seen.insert(op.clone()) { // C3: NEG for r (when sign_r = 1) @@ -1724,6 +1762,7 @@ pub(crate) fn collect_bitwise_from_dvrm( /// - IS_HALFWORD[next_pc_high[0..3]] - range checks for bits 16-63 /// /// Returns: Vec of bitwise lookups +#[cfg(feature = "prove")] fn collect_bitwise_from_branch(branch_ops: &[BranchOperation]) -> Vec { let mut bitwise_ops = Vec::with_capacity(branch_ops.len() * 5); @@ -1786,6 +1825,7 @@ fn collect_bitwise_from_branch(branch_ops: &[BranchOperation]) -> Vec Vec { if num_padding_rows == 0 { return Vec::new(); @@ -1832,10 +1872,10 @@ fn private_input_bytes(private_input: &[u8]) -> Vec { .collect() } -fn build_init_page_data(elf: &Elf, private_input: &[u8]) -> HashMap> { - use executor::vm::memory::PRIVATE_INPUT_START_INDEX; +fn build_init_page_data(elf: &Elf, private_input: &[u8]) -> hashbrown::HashMap> { + use executor::constants::PRIVATE_INPUT_START_INDEX; let page_size = page::DEFAULT_PAGE_SIZE; - let mut init_page_data: HashMap> = HashMap::new(); + let mut init_page_data: hashbrown::HashMap> = hashbrown::HashMap::new(); for segment in &elf.data { for (i, &word) in segment.values.iter().enumerate() { let word_addr = segment.base_addr + (i as u64 * 4); @@ -1865,11 +1905,13 @@ fn build_init_page_data(elf: &Elf, private_input: &[u8]) -> HashMap init_page_data } +#[cfg(feature = "prove")] fn collect_bitwise_from_page( elf: &Elf, memory_state: &MemoryState, private_input: &[u8], ) -> Vec { + #[cfg(feature = "prove")] use std::collections::BTreeSet; let page_size = page::DEFAULT_PAGE_SIZE; @@ -1922,6 +1964,7 @@ fn collect_bitwise_from_page( /// Expand one Commit ECALL into its per-byte COMMIT rows using the memory state /// at the moment the ECALL executes. +#[cfg(feature = "prove")] fn expand_commit_operations_for_ecall( ecall: &CpuOperation, memory_state: &MemoryState, @@ -1964,6 +2007,7 @@ fn expand_commit_operations_for_ecall( /// - Zero for end detection (1 per real row, mult = mu) /// /// Note: AreBytes for value is intentionally omitted per spec. +#[cfg(feature = "prove")] fn collect_bitwise_from_commit(commit_ops: &[CommitOperation]) -> Vec { let mut lookups = Vec::new(); @@ -2097,6 +2141,7 @@ pub(crate) fn collect_bitwise_from_ecdas(ops: &[ecdas::EcdasOperation]) -> Vec Vec { use executor::vm::instruction::execution::{KECCAK_RC, KECCAK_RHO}; @@ -2347,6 +2392,7 @@ pub(crate) fn collect_bitwise_from_keccak(keccak_ops: &[KeccakOperation]) -> Vec /// every address accessed during execution (ELF init + runtime stores/loads). /// ELF pages get their init data from the binary; all others are zero-init. +#[cfg(feature = "prove")] fn generate_page_tables( elf: &Elf, memory_state: &MemoryState, @@ -2355,6 +2401,7 @@ fn generate_page_tables( Vec>, Vec, ) { + #[cfg(feature = "prove")] use std::collections::BTreeSet; // Collect init data from ELF segments + private input region @@ -2378,14 +2425,14 @@ fn generate_page_tables( let mut page_configs = Vec::new(); // Determine which page bases hold private input data. - let private_input_page_bases: std::collections::BTreeSet = if !private_input.is_empty() { - use executor::vm::memory::PRIVATE_INPUT_START_INDEX; + let private_input_page_bases: alloc::collections::BTreeSet = if !private_input.is_empty() { + use executor::constants::PRIVATE_INPUT_START_INDEX; let total_bytes = 4 + private_input.len(); // length prefix + data (0..total_bytes) .map(|i| page::page_base_for_address(PRIVATE_INPUT_START_INDEX + i as u64)) .collect() } else { - std::collections::BTreeSet::new() + alloc::collections::BTreeSet::new() }; for &page_base in &page_bases { @@ -2551,6 +2598,7 @@ fn chunk_and_generate( /// Takes the raw output of `collect_ops_from_cpu` plus `register_state` /// (for HALT finalization), and returns fully-routed ops ready for Phase 3+. #[allow(clippy::too_many_arguments)] +#[cfg(feature = "prove")] fn collect_all_ops( cpu_ops: Vec, mut memw_ops: Vec, @@ -2706,13 +2754,14 @@ fn collect_all_ops( /// `elf` controls PAGE table generation: `Some(elf)` generates real PAGE tables /// and PAGE bitwise lookups; `None` produces empty page tables. #[allow(clippy::too_many_arguments)] +#[cfg(feature = "prove")] fn build_traces( ops: CollectedOps, elf: Option<&Elf>, memory_state: &MemoryState, entry_point: u64, decode_trace: TraceTable, - decode_pc_to_row: HashMap, + decode_pc_to_row: PcToRow, mut register_state: RegisterState, max_rows: &super::MaxRowsConfig, #[cfg(feature = "disk-spill")] storage_mode: StorageMode, @@ -2927,7 +2976,7 @@ fn build_traces( // When CPU is split, each chunk pads independently let mut decode = decode_trace; let mut decode_lookups: Vec = cpu_ops.iter().map(|op| op.decode.pc).collect(); - decode_lookups.extend(std::iter::repeat_n(cpu::CPU_PADDING_PC, num_padding_rows)); + decode_lookups.extend(core::iter::repeat_n(cpu::CPU_PADDING_PC, num_padding_rows)); decode::update_multiplicities(&mut decode, &decode_pc_to_row, &decode_lookups); // Prepare register final state before scope (needs register_state ownership) @@ -3574,7 +3623,7 @@ impl Traces { /// init data populated. Used by the verifier to reconstruct the ELF /// portion of the PAGE table layout. pub fn page_configs_from_elf(elf: &Elf) -> Vec { - use std::collections::BTreeSet; + use alloc::collections::BTreeSet; let init_page_data = build_init_page_data(elf, &[]); @@ -3617,7 +3666,7 @@ impl Traces { // Add private-input pages (non-preprocessed, verifier doesn't know init values) if num_private_input_pages > 0 { - use executor::vm::memory::PRIVATE_INPUT_START_INDEX; + use executor::constants::PRIVATE_INPUT_START_INDEX; let first_page_base = page::page_base_for_address(PRIVATE_INPUT_START_INDEX); for i in 0..num_private_input_pages { configs.push(PageConfig { @@ -3682,6 +3731,7 @@ impl Traces { /// 3. MEMW → LT operations (timestamp ordering) /// 4. LT, MEMW, Branch → Bitwise lookups /// 5. Generate all traces including PAGE tables + #[cfg(feature = "prove")] pub fn from_elf_and_logs( elf: &Elf, logs: &[Log], @@ -3755,6 +3805,7 @@ impl Traces { /// as it generates PAGE tables from ELF data. /// /// Note: This creates empty PAGE tables since no ELF is provided. + #[cfg(feature = "prove")] pub fn from_logs( logs: &[Log], instructions: U64HashMap, diff --git a/prover/src/test_utils.rs b/prover/src/test_utils.rs index fd9d9d40c..252920e25 100644 --- a/prover/src/test_utils.rs +++ b/prover/src/test_utils.rs @@ -10,13 +10,24 @@ //! - Minimal trace generation for testing //! - AIR creation helpers +use alloc::boxed::Box; +use alloc::format; +use alloc::vec; +use alloc::vec::Vec; + +#[cfg(feature = "prove")] use std::path::PathBuf; use crypto::fiat_shamir::is_transcript::IsStarkTranscript; +#[cfg(feature = "prove")] use executor::elf::Elf; +#[cfg(feature = "prove")] use executor::vm::execution::Executor; +#[cfg(feature = "prove")] use executor::vm::instruction::decoding::Instruction; +#[cfg(feature = "prove")] use executor::vm::logs::Log; +#[cfg(feature = "prove")] use executor::vm::memory::U64HashMap; use math::field::element::FieldElement; use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; @@ -209,6 +220,7 @@ pub fn is_halfword_sender_columns(interactions: &[BusInteraction]) -> Vec // ============================================================================= /// Returns the raw ELF bytes for an assembly test program. +#[cfg(feature = "prove")] pub fn asm_elf_bytes(name: &str) -> Vec { let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); let workspace_root = manifest_dir @@ -227,6 +239,7 @@ pub fn asm_elf_bytes(name: &str) -> Vec { /// Helper to run an ELF from the program_artifacts directory. /// /// Returns the ELF, execution logs, and instruction map. +#[cfg(feature = "prove")] pub fn run_asm_elf(name: &str) -> (Elf, Vec, U64HashMap) { let elf_data = asm_elf_bytes(name); let elf = Elf::load(&elf_data).expect("Failed to load ELF"); @@ -240,6 +253,7 @@ pub fn run_asm_elf(name: &str) -> (Elf, Vec, U64HashMap) { // ============================================================================= /// Collect bitwise lookups from executor logs for minimal table generation. +#[cfg(feature = "prove")] pub fn collect_bitwise_ops_from_logs( logs: &[Log], instructions: &U64HashMap, @@ -258,10 +272,12 @@ pub fn collect_bitwise_ops_from_logs( /// /// For each instruction that triggers an SLT or BLT operation, creates an LtOperation /// with the arg1, arg2, and signed values. +#[cfg(feature = "prove")] pub fn collect_lt_lookups_from_logs( logs: &[Log], instructions: &U64HashMap, ) -> Vec { + #[cfg(feature = "prove")] use executor::vm::instruction::decoding::{ArithOp, Comparison}; let mut lookups = Vec::new(); @@ -357,10 +373,12 @@ pub fn collect_lt_lookups_from_logs( /// Collect LOAD operations from executor logs. /// /// Creates LoadOperation objects for each Load instruction in the logs. +#[cfg(feature = "prove")] pub fn collect_load_ops_from_logs( logs: &[Log], instructions: &U64HashMap, ) -> Vec { + #[cfg(feature = "prove")] use executor::vm::instruction::decoding::LoadStoreWidth; let mut load_ops = Vec::new(); @@ -423,6 +441,7 @@ pub fn collect_load_ops_from_logs( /// The LT table sends: /// - MSB16 lookups (×2 per row: for lhs_msb and rhs_msb) /// - IS_HALFWORD lookups (×6 per row: ×4 for lhs_sub_rhs, ×1 for lhs[1], ×1 for rhs[1]) +#[cfg(feature = "prove")] pub fn collect_bitwise_ops_from_lt(lt_ops: &[LtOperation]) -> Vec { let mut lookups = Vec::new(); @@ -481,6 +500,7 @@ pub fn collect_bitwise_ops_from_lt(lt_ops: &[LtOperation]) -> Vec sign_bit /// - read4: MSB8[res[3]] -> sign_bit /// - read8: no MSB8 lookup (all 8 bytes are used) +#[cfg(feature = "prove")] pub fn collect_bitwise_ops_from_load( load_ops: &[crate::tables::load::LoadOperation], ) -> Vec { @@ -500,7 +520,9 @@ pub fn collect_bitwise_ops_from_load( /// /// **WARNING: FOR TESTING/BENCHMARKING ONLY - NOT PRODUCTION SAFE!** /// The verifier expects the full deterministic 2^20 row public table. +#[cfg(feature = "prove")] pub fn generate_minimal_bitwise_trace(ops: &[BitwiseOperation]) -> TraceTable { + #[cfg(feature = "prove")] use std::collections::HashMap; // Collect unique (lo_byte, hi_byte, shift) tuples and count multiplicities per lookup type diff --git a/prover/src/tests/bitwise_bus_tests.rs b/prover/src/tests/bitwise_bus_tests.rs index fd3b55cba..1a6a356a1 100644 --- a/prover/src/tests/bitwise_bus_tests.rs +++ b/prover/src/tests/bitwise_bus_tests.rs @@ -4,6 +4,7 @@ //! - Completeness: Valid lookups to BITWISE are accepted //! - Soundness: Invalid lookups to BITWISE are rejected +#[cfg(feature = "prove")] use std::collections::HashMap; use crypto::fiat_shamir::default_transcript::DefaultTranscript; diff --git a/prover/src/tests/branch_bus_tests.rs b/prover/src/tests/branch_bus_tests.rs index 636f6dd34..52e71c693 100644 --- a/prover/src/tests/branch_bus_tests.rs +++ b/prover/src/tests/branch_bus_tests.rs @@ -6,6 +6,7 @@ //! - Padding: Auto-padding to power of 2 works correctly //! - Border cases: Edge values (0, MAX, signed boundaries) work +#[cfg(feature = "prove")] use std::collections::HashMap; use crypto::fiat_shamir::default_transcript::DefaultTranscript; diff --git a/prover/src/tests/decode_tests.rs b/prover/src/tests/decode_tests.rs index 43e6991cf..229ff58b9 100644 --- a/prover/src/tests/decode_tests.rs +++ b/prover/src/tests/decode_tests.rs @@ -11,8 +11,11 @@ use crate::tables::types::DecodeEntry; use crate::test_utils::asm_elf_bytes; use crate::{prove, verify_with_options}; +#[cfg(feature = "prove")] use executor::elf::Elf; +#[cfg(feature = "prove")] use executor::vm::instruction::decoding::{ArithOp, Comparison, Instruction, LoadStoreWidth}; +#[cfg(feature = "prove")] use executor::vm::memory::U64HashMap; use stark::proof::options::GoldilocksCubicProofOptions; diff --git a/prover/src/tests/lt_bus_tests.rs b/prover/src/tests/lt_bus_tests.rs index b6148cfdc..b41b9aab3 100644 --- a/prover/src/tests/lt_bus_tests.rs +++ b/prover/src/tests/lt_bus_tests.rs @@ -6,6 +6,7 @@ //! - Padding: Auto-padding to power of 2 works correctly //! - Border cases: Edge values (0, MAX, signed boundaries) work +#[cfg(feature = "prove")] use std::collections::HashMap; use crypto::fiat_shamir::default_transcript::DefaultTranscript; diff --git a/prover/src/tests/mod.rs b/prover/src/tests/mod.rs index 4d0ac4477..b253dd543 100644 --- a/prover/src/tests/mod.rs +++ b/prover/src/tests/mod.rs @@ -59,6 +59,8 @@ pub mod page_tests; #[cfg(test)] pub mod prove_elfs_tests; #[cfg(test)] +pub mod recursion_smoke_test; +#[cfg(test)] pub mod register_tests; #[cfg(test)] pub mod shift_tests; @@ -74,3 +76,5 @@ pub mod templates_tests; pub mod trace_builder_tests; #[cfg(test)] pub mod trace_test_helpers; +#[cfg(test)] +pub mod vkey_tests; diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index a52383341..e0751d3e4 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -26,6 +26,7 @@ use crate::tables::MaxRowsConfig; use crate::tables::trace_builder::Traces; use crate::tables::types::{GoldilocksExtension, GoldilocksField}; +#[cfg(feature = "prove")] use executor::elf::Elf; use executor::vm::execution::Executor; @@ -1440,6 +1441,7 @@ fn test_prove_elfs_all_instructions_64_full() { fn test_debug_memory_bus_tokens() { use crate::tables::memw::cols as memw_cols; use crate::tables::register::cols as reg_cols; + #[cfg(feature = "prove")] use std::collections::HashMap; let (_elf, logs, instructions) = run_asm_elf("sub_neg_result"); @@ -1705,6 +1707,7 @@ fn test_debug_memory_tokens_sb_sh() { use crate::tables::memw::cols as memw_cols; use crate::tables::page::cols as page_cols; use crate::tables::register::cols as reg_cols; + #[cfg(feature = "prove")] use std::collections::HashMap; let (elf, logs, _instructions) = run_asm_elf("test_sb_sh_8"); diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs new file mode 100644 index 000000000..06f66d3f8 --- /dev/null +++ b/prover/src/tests/recursion_smoke_test.rs @@ -0,0 +1,1342 @@ +//! End-to-end naive recursion pipeline smoke tests. +//! +//! Each test: +//! 1. Proves an inner program on the host. +//! 2. Serializes `(VmProof, inner_elf)` with postcard. +//! 3. Hands that as private input to the recursion guest. +//! 4. Proves the recursion guest's execution. +//! 5. Verifies the outer proof. +//! +//! The ELFs are built on demand by `bench_vs/build_recursion_elfs.sh`. +//! +//! Tests are `#[ignore]`d because the outer proof runs the full STARK verifier +//! inside the VM (minutes per run, large memory footprint). + +use std::path::PathBuf; +use std::process::Command; + +fn workspace_root() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("workspace root") + .to_path_buf() +} + +fn build_elfs(root: &std::path::Path) { + let status = Command::new("bash") + .arg(root.join("bench_vs/build_recursion_elfs.sh")) + .status() + .expect("failed to spawn build helper"); + assert!(status.success(), "ELF build script failed"); +} + +/// Path to a guest ELF artifact from a bench_vs/lambda// build. +fn guest_elf_path(root: &std::path::Path, name: &str, bin_name: &str) -> PathBuf { + root.join(format!( + "bench_vs/lambda/{name}/target/riscv64im-lambda-vm-elf/release/{bin_name}" + )) +} + +/// Read a guest ELF artifact from a bench_vs/lambda// build. +fn read_guest_elf(root: &std::path::Path, name: &str, bin_name: &str) -> Vec { + let path = guest_elf_path(root, name, bin_name); + std::fs::read(&path).unwrap_or_else(|e| panic!("failed to read {}: {e}", path.display())) +} + +/// Resolve a guest PC to `function (file:line)` using the ELF's DWARF info, +/// the same mapping `addr2line -e -f -i -C 0x` produces. Returns the +/// innermost (most-inlined) frame; `` when no DWARF frame covers the +/// PC (e.g. PLT stubs or a release build that dropped line info). +fn resolve_pc(loader: &addr2line::Loader, pc: u64) -> String { + let mut frames = match loader.find_frames(pc) { + Ok(frames) => frames, + Err(_) => return "".to_string(), + }; + match frames.next() { + Ok(Some(frame)) => { + let func = frame + .function + .as_ref() + .and_then(|f| f.demangle().ok().map(|n| n.into_owned())) + .unwrap_or_else(|| "".to_string()); + let loc = frame + .location + .as_ref() + .and_then(|l| l.file.map(|file| (file, l.line))) + .map(|(file, line)| match line { + Some(line) => format!(" ({file}:{line})"), + None => format!(" ({file})"), + }) + .unwrap_or_default(); + format!("{func}{loc}") + } + // No DWARF frame; fall back to the symbol table. + _ => loader.find_symbol(pc).map_or_else( + || "".to_string(), + |s| addr2line::demangle_auto(s.into(), None).into_owned(), + ), + } +} + +/// Print a PC histogram as two tables: a per-function summary (the cycles each +/// resolved function accounts for, folded over all its PCs) followed by the +/// top-100 per-address detail. `pc_hist` maps program counter → cycle count. +/// +/// The per-function view is the one that matters: an inlined kernel is spread +/// across dozens of PCs, so the raw per-address table scatters its true cost. +fn print_pc_histogram( + title: &str, + loader: &addr2line::Loader, + pc_hist: std::collections::HashMap, + total_cycles: u64, + exec_time: std::time::Duration, +) { + let mut entries: Vec<(u64, u64)> = pc_hist.into_iter().collect(); + entries.sort_unstable_by_key(|(_pc, count)| std::cmp::Reverse(*count)); + + // Aggregate the full histogram by resolved function, resolving each PC once. + let mut by_function: std::collections::HashMap = + std::collections::HashMap::new(); + for (pc, count) in &entries { + let entry = by_function.entry(resolve_pc(loader, *pc)).or_insert((0, 0)); + entry.0 += *count; // cycles + entry.1 += 1; // distinct PCs folded into this function + } + let mut fn_entries: Vec<(String, (u64, u64))> = by_function.into_iter().collect(); + fn_entries.sort_unstable_by_key(|(_name, (cycles, _pcs))| std::cmp::Reverse(*cycles)); + + let pct = |n: u64| 100.0 * (n as f64) / (total_cycles as f64); + + eprintln!(); + eprintln!("============================================================"); + eprintln!(" {title}"); + eprintln!("============================================================"); + eprintln!(" Total cycles : {total_cycles}"); + eprintln!(" Unique PCs : {}", entries.len()); + eprintln!(" Exec time : {exec_time:?}"); + eprintln!(); + eprintln!(" Top 25 functions by cycle count (aggregated over their PCs):"); + eprintln!( + " {:>4} {:>14} {:>7} {:>7} {:>5} {}", + "rank", "cycles", "%", "cum %", "PCs", "function (file:line)" + ); + let mut fn_cumulative: u64 = 0; + for (rank, (name, (cycles, pcs))) in fn_entries.iter().take(25).enumerate() { + fn_cumulative += cycles; + eprintln!( + " {:>4} {:>14} {:>6.2}% {:>6.2}% {:>5} {}", + rank + 1, + cycles, + pct(*cycles), + pct(fn_cumulative), + pcs, + name, + ); + } + eprintln!(); + eprintln!(" Top 100 PCs by cycle count (per-address detail):"); + eprintln!( + " {:>4} {:>18} {:>14} {:>7} {:>7} {}", + "rank", "pc", "cycles", "%", "cum %", "function (file:line)" + ); + let mut cumulative: u64 = 0; + for (rank, (pc, count)) in entries.iter().take(100).enumerate() { + cumulative += count; + eprintln!( + " {:>4} {:#018x} {:>14} {:>6.2}% {:>6.2}% {}", + rank + 1, + pc, + count, + pct(*count), + pct(cumulative), + resolve_pc(loader, *pc), + ); + } + eprintln!("============================================================"); +} + +/// Core pipeline: prove an inner program with the given options, hand the +/// proof+ELF+options to the recursion guest, then prove and verify the outer +/// proof. +fn run_recursion_pipeline_with_options( + label: &str, + inner_elf_bytes: &[u8], + inner_private_input: &[u8], + inner_proof_options: stark::proof::options::ProofOptions, +) { + let root = workspace_root(); + build_elfs(&root); + let recursion_elf_bytes = read_guest_elf(&root, "recursion", "recursion-bench"); + + eprintln!( + "[{label}] proving inner (blowup={}, fri_queries={}) ...", + inner_proof_options.blowup_factor, inner_proof_options.fri_number_of_queries + ); + let inner_proof = crate::prove_with_options_and_inputs( + inner_elf_bytes, + inner_private_input, + &inner_proof_options, + &crate::MaxRowsConfig::default(), + ) + .expect("inner prove should succeed"); + eprintln!("[{label}] inner proof generated"); + + assert!( + crate::verify_with_options(&inner_proof, inner_elf_bytes, &inner_proof_options, None, None) + .expect("inner verify errored"), + "inner proof must verify on host" + ); + + let elf_for_vkey = executor::elf::Elf::load(inner_elf_bytes).expect("ELF load failed"); + let page_configs = crate::tables::trace_builder::Traces::page_configs_from_elf_and_runtime( + &elf_for_vkey, + &inner_proof.runtime_page_ranges, + inner_proof.num_private_input_pages, + ); + let vkey = crate::VmVerifyingKey::from_elf_and_options( + &elf_for_vkey, + &inner_proof_options, + &page_configs, + ); + let blob = + postcard::to_allocvec(&(&inner_proof, &inner_elf_bytes, &inner_proof_options, &vkey)) + .expect("postcard encode failed"); + eprintln!( + "[{label}] postcard blob: {} bytes (limit: MAX_PRIVATE_INPUT_SIZE)", + blob.len() + ); + assert!( + blob.len() < executor::constants::MAX_PRIVATE_INPUT_SIZE as usize, + "recursion input exceeds MAX_PRIVATE_INPUT_SIZE" + ); + + eprintln!("[{label}] proving outer (recursion guest) ..."); + let outer_proof = + crate::prove_with_inputs(&recursion_elf_bytes, &blob).expect("outer prove should succeed"); + eprintln!("[{label}] outer proof generated"); + + assert!( + crate::verify(&outer_proof, &recursion_elf_bytes).expect("outer verify errored"), + "outer proof must verify on host" + ); + + assert_eq!( + outer_proof.public_output, + vec![1u8], + "guest should commit success marker" + ); +} + +/// Convenience wrapper using `blowup=8` for the inner proof — the default for +/// the existing smoke tests, chosen to keep outer-prove memory tractable. +fn run_recursion_pipeline(label: &str, inner_elf_bytes: &[u8], inner_private_input: &[u8]) { + let inner_proof_options = stark::proof::options::GoldilocksCubicProofOptions::with_blowup(8) + .expect("blowup=8 is always valid"); + run_recursion_pipeline_with_options( + label, + inner_elf_bytes, + inner_private_input, + inner_proof_options, + ); +} + +/// Inner program: empty (halt immediately). Useful for measuring the +/// lambda-vm verifier's intrinsic recursion overhead — i.e. what it costs +/// to verify the smallest possible lambda-vm proof, with no inner workload. +#[test] +#[ignore = "slow: runs the full STARK verifier inside the VM"] +fn test_recursion_smoke_empty() { + let root = workspace_root(); + build_elfs(&root); + let empty_elf_bytes = read_guest_elf(&root, "empty", "empty-bench"); + run_recursion_pipeline("recursion-empty", &empty_elf_bytes, &[]); +} + +/// Inner program: empty, but with the absolute-minimum FRI parameters +/// (blowup=2, **fri_number_of_queries=1**). This is a "can the pipeline even +/// run end-to-end on a 125 GB box" experiment — security is intentionally +/// terrible. Use only for capacity probing. +#[test] +#[ignore = "slow: runs the full STARK verifier inside the VM"] +fn test_recursion_smoke_1query() { + let root = workspace_root(); + build_elfs(&root); + let empty_elf_bytes = read_guest_elf(&root, "empty", "empty-bench"); + + // Construct ProofOptions directly so we can pin fri_number_of_queries = 1. + // (GoldilocksCubicProofOptions::with_blowup derives queries from a 128-bit + // security target — way more than we want here.) + let inner_proof_options = stark::proof::options::ProofOptions { + blowup_factor: 2, + fri_number_of_queries: 1, + coset_offset: 3, + grinding_factor: 1, + }; + + run_recursion_pipeline_with_options( + "recursion-1query", + &empty_elf_bytes, + &[], + inner_proof_options, + ); +} + +/// Diagnostic: build the inner proof and dump the recursion guest's private-input +/// blob to `/tmp/recursion_input.bin` so the CLI's `execute --flamegraph` can +/// consume it. +/// +/// Usage after running this test: +/// ``` +/// cargo run -p cli --release -- execute \ +/// bench_vs/lambda/recursion/target/riscv64im-lambda-vm-elf/release/recursion-bench \ +/// --private-input /tmp/recursion_input.bin \ +/// --flamegraph /tmp/recursion_folded.txt +/// cat /tmp/recursion_folded.txt | inferno-flamegraph > /tmp/recursion_flamegraph.svg +/// ``` +#[test] +#[ignore = "diagnostic: writes recursion private input to /tmp/recursion_input.bin"] +fn test_dump_recursion_input() { + let root = workspace_root(); + build_elfs(&root); + let empty_elf_bytes = read_guest_elf(&root, "empty", "empty-bench"); + + let inner_proof_options = stark::proof::options::ProofOptions { + blowup_factor: 2, + fri_number_of_queries: 1, + coset_offset: 3, + grinding_factor: 1, + }; + + eprintln!("[dump-input] proving inner ..."); + let inner_proof = crate::prove_with_options_and_inputs( + &empty_elf_bytes, + &[], + &inner_proof_options, + &crate::MaxRowsConfig::default(), + ) + .expect("inner prove should succeed"); + + let elf_for_vkey = executor::elf::Elf::load(&empty_elf_bytes).expect("ELF load failed"); + let page_configs = crate::tables::trace_builder::Traces::page_configs_from_elf_and_runtime( + &elf_for_vkey, + &inner_proof.runtime_page_ranges, + inner_proof.num_private_input_pages, + ); + let vkey = crate::VmVerifyingKey::from_elf_and_options( + &elf_for_vkey, + &inner_proof_options, + &page_configs, + ); + let blob = + postcard::to_allocvec(&(&inner_proof, &empty_elf_bytes, &inner_proof_options, &vkey)) + .expect("postcard encode failed"); + + let path = "/tmp/recursion_input.bin"; + std::fs::write(path, &blob).expect("write blob"); + eprintln!("[dump-input] wrote {} bytes to {path}", blob.len()); +} + +/// Diagnostic: build the inner proof + recursion guest input, then **execute +/// only** the recursion guest (no STARK proving) and report cycle counts + +/// trace size estimates. +/// +/// This is the cheap way to find out how many RISC-V instructions the +/// verifier actually executes inside the guest — a much faster signal than +/// running the full outer prove (which can OOM on a 125 GB machine). +#[test] +#[ignore = "diagnostic: runs the executor only, prints cycle counts"] +fn test_recursion_cycle_count() { + use executor::elf::Elf; + use executor::vm::execution::Executor; + + let root = workspace_root(); + build_elfs(&root); + let empty_elf_bytes = read_guest_elf(&root, "empty", "empty-bench"); + let recursion_elf_bytes = read_guest_elf(&root, "recursion", "recursion-bench"); + + // Build the inner proof exactly as the smoke test does, with the + // absolute-minimum FRI params so the inner is as small as possible. + let inner_proof_options = stark::proof::options::ProofOptions { + blowup_factor: 2, + fri_number_of_queries: 1, + coset_offset: 3, + grinding_factor: 1, + }; + + eprintln!("[cycle-count] proving inner (empty, blowup=2, fri_queries=1) ..."); + let inner_proof = crate::prove_with_options_and_inputs( + &empty_elf_bytes, + &[], + &inner_proof_options, + &crate::MaxRowsConfig::default(), + ) + .expect("inner prove should succeed"); + + let elf_for_vkey = executor::elf::Elf::load(&empty_elf_bytes).expect("ELF load failed"); + let page_configs = crate::tables::trace_builder::Traces::page_configs_from_elf_and_runtime( + &elf_for_vkey, + &inner_proof.runtime_page_ranges, + inner_proof.num_private_input_pages, + ); + let vkey = crate::VmVerifyingKey::from_elf_and_options( + &elf_for_vkey, + &inner_proof_options, + &page_configs, + ); + let blob = + postcard::to_allocvec(&(&inner_proof, &empty_elf_bytes, &inner_proof_options, &vkey)) + .expect("postcard encode failed"); + eprintln!("[cycle-count] postcard blob: {} bytes", blob.len()); + + // Execute (NOT prove) the recursion guest. Use `resume()` in a loop and + // only count chunk sizes — never accumulate logs in memory. This avoids + // the Vec blow-up that OOMs even a 125 GB server (one Log is 40 B; + // a few billion of them is hundreds of GB). + eprintln!("[cycle-count] executing recursion guest (streaming counter only) ..."); + let program = Elf::load(&recursion_elf_bytes).expect("ELF load failed"); + let mut executor = Executor::new(&program, blob).expect("Executor::new failed"); + let start = std::time::Instant::now(); + let mut cycle_count: usize = 0; + let mut chunks: usize = 0; + while let Some(logs) = executor.resume().expect("executor resume failed") { + cycle_count += logs.len(); + chunks += 1; + if chunks.is_multiple_of(50) { + eprintln!( + "[cycle-count] ... {chunks} chunks, {cycle_count} cycles, {:?} elapsed", + start.elapsed() + ); + } + } + let exec_time = start.elapsed(); + + eprintln!(); + eprintln!("============================================================"); + eprintln!(" RECURSION GUEST EXECUTION SUMMARY"); + eprintln!("============================================================"); + eprintln!(" Cycle count : {cycle_count}"); + eprintln!(" Executor wall time : {exec_time:?}"); + eprintln!(); + eprintln!(" Rough memory estimate for outer prove:"); + let bytes_per_field = 8usize; + let approx_columns = 250usize; // CPU + MEMW + DECODE + bus columns combined + let main_trace_bytes = cycle_count * approx_columns * bytes_per_field; + let blowup = 2usize; + let lde_main_bytes = main_trace_bytes * blowup; + eprintln!( + " main trace : ~{:.2} GB ({} cycles × ~{} cols × 8 B)", + main_trace_bytes as f64 / 1e9, + cycle_count, + approx_columns + ); + eprintln!( + " main LDE (blowup={}) : ~{:.2} GB", + blowup, + lde_main_bytes as f64 / 1e9 + ); + eprintln!(" (aux trace adds roughly 50% more, so peak peak ≈ 2-3× LDE)"); + eprintln!("============================================================"); +} + +/// Diagnostic: count the distinct 4 KB memory pages the recursion guest +/// touches when verifying a small inner proof. +/// +/// We suspect the outer prover's 125 GB OOM wall is dominated by per-page +/// PAGE-table overhead. The number of PAGE tables the prover would build +/// equals the number of distinct 4 KB pages the executor touches — code, +/// heap, private input, and stack. This test surfaces that count without +/// running the prover. +/// +/// Layout (per `executor::constants` + `bench_vs/lambda/recursion/src/main.rs`): +/// - Code/static: whatever PT_LOAD segments the recursion ELF carries. +/// - Heap: `_end .. 0xC000_0000` (`MAX_MEMORY_SIZE`); `TlsfHeap` scatters +/// allocations across this region. +/// - Private input: starts at `PRIVATE_INPUT_START_INDEX = 0xFF000000`. +/// - Stack: top of address space (down from `STACK_TOP = 0xFFFFFFFFFFFFFFF0`). +/// +/// Interpretation (rough): +/// - <1,000 pages: PAGE-table overhead is not the bottleneck. +/// - 10k-100k pages: TLSF heap fragmentation; design a tighter bump allocator +/// and re-measure. +/// - >100k pages: postcard decode dominates; consider streaming decode. +#[test] +#[ignore = "diagnostic: counts distinct 4 KB memory pages touched by the recursion guest"] +fn test_recursion_page_count() { + use executor::constants::PRIVATE_INPUT_START_INDEX; + use executor::elf::Elf; + use executor::vm::execution::Executor; + use std::collections::HashSet; + + let root = workspace_root(); + build_elfs(&root); + let empty_elf_bytes = read_guest_elf(&root, "empty", "empty-bench"); + let recursion_elf_bytes = read_guest_elf(&root, "recursion", "recursion-bench"); + + let inner_proof_options = stark::proof::options::ProofOptions { + blowup_factor: 2, + fri_number_of_queries: 1, + coset_offset: 3, + grinding_factor: 1, + }; + + eprintln!("[page-count] proving inner (empty, blowup=2, fri_queries=1) ..."); + let inner_proof = crate::prove_with_options_and_inputs( + &empty_elf_bytes, + &[], + &inner_proof_options, + &crate::MaxRowsConfig::default(), + ) + .expect("inner prove should succeed"); + + let elf_for_vkey = Elf::load(&empty_elf_bytes).expect("ELF load failed"); + let page_configs = crate::tables::trace_builder::Traces::page_configs_from_elf_and_runtime( + &elf_for_vkey, + &inner_proof.runtime_page_ranges, + inner_proof.num_private_input_pages, + ); + let vkey = crate::VmVerifyingKey::from_elf_and_options( + &elf_for_vkey, + &inner_proof_options, + &page_configs, + ); + let blob = + postcard::to_allocvec(&(&inner_proof, &empty_elf_bytes, &inner_proof_options, &vkey)) + .expect("postcard encode failed"); + eprintln!("[page-count] postcard blob: {} bytes", blob.len()); + + // Precompute the recursion ELF's PT_LOAD ranges so we can bucket code/ + // static pages separately from heap. `Elf::load` already expands BSS + // (memsz > filesz) into zero-valued words, so these ranges cover + // .text + .rodata + .data + .bss. + let program = Elf::load(&recursion_elf_bytes).expect("ELF load failed"); + let segment_ranges: Vec<(u64, u64)> = program + .data + .iter() + .map(|seg| (seg.base_addr, seg.base_addr + (seg.values.len() as u64 * 4))) + .collect(); + eprintln!( + "[page-count] recursion ELF: {} PT_LOAD segment(s)", + segment_ranges.len(), + ); + for (i, (lo, hi)) in segment_ranges.iter().enumerate() { + eprintln!( + "[page-count] segment[{i}]: 0x{lo:016x} .. 0x{hi:016x} ({} bytes)", + hi - lo, + ); + } + + // Stream through execution — running to completion via `Executor::run` + // would accumulate ~67 M `Log` records (~2.7 GB) we don't need. We only + // care about the *final* memory state. + eprintln!("[page-count] executing recursion guest (streaming) ..."); + let mut executor = Executor::new(&program, blob).expect("Executor::new failed"); + let start = std::time::Instant::now(); + let mut chunks: usize = 0; + let mut total_cycles: u64 = 0; + while let Some(logs) = executor.resume().expect("executor resume failed") { + total_cycles += logs.len() as u64; + chunks += 1; + if chunks.is_multiple_of(50) { + eprintln!( + "[page-count] ... {chunks} chunks, {total_cycles} cycles, {:?} elapsed", + start.elapsed() + ); + } + } + let exec_time = start.elapsed(); + + // Collect the set of distinct 4 KB pages from every cell touched during + // (a) program loading, (b) private-input loading, (c) execution. + const PAGE_MASK: u64 = !0xFFFu64; + let cells = executor.memory().cells(); + let total_cells = cells.len(); + let pages: HashSet = cells.keys().map(|&a| a & PAGE_MASK).collect(); + + // Bucket by region. A "code/static" page is any page that overlaps a + // PT_LOAD segment. Stack lives near the top of the 64-bit address + // space; private input lives in the [0xFF000000, ...) window above the + // 3 GB heap ceiling. + const HEAP_CEILING: u64 = 0xC000_0000; + const STACK_FLOOR: u64 = 0xFFFF_FFFF_0000_0000; + + let mut code_pages = 0usize; + let mut heap_pages = 0usize; + let mut private_input_pages = 0usize; + let mut stack_pages = 0usize; + let mut other_pages = 0usize; + + for &page in &pages { + let page_end = page.saturating_add(0x1000); + let in_code = segment_ranges + .iter() + .any(|&(lo, hi)| page < hi && lo < page_end); + if in_code { + code_pages += 1; + } else if page >= STACK_FLOOR { + stack_pages += 1; + } else if page >= PRIVATE_INPUT_START_INDEX { + private_input_pages += 1; + } else if page < HEAP_CEILING { + heap_pages += 1; + } else { + other_pages += 1; + } + } + + eprintln!(); + eprintln!("============================================================"); + eprintln!(" RECURSION GUEST PAGE-COUNT SUMMARY"); + eprintln!("============================================================"); + eprintln!(" Total cycles : {total_cycles}"); + eprintln!(" Executor wall time : {exec_time:?}"); + eprintln!(" Memory cells touched (4 B ea) : {total_cells}"); + eprintln!(" Distinct 4 KB pages touched : {}", pages.len()); + eprintln!(); + eprintln!(" Pages per region:"); + eprintln!(" code/static (ELF segments) : {code_pages}"); + eprintln!(" heap (0..0xC000_0000) : {heap_pages}"); + eprintln!(" private input (0xFF000000..) : {private_input_pages}"); + eprintln!(" stack (>= 0xFFFFFFFF_00000000) : {stack_pages}"); + if other_pages > 0 { + eprintln!(" other (unclassified) : {other_pages}"); + } + eprintln!(); + eprintln!(" Interpretation (PAGE-table overhead):"); + eprintln!(" <1k pages → PAGE overhead is not the bottleneck."); + eprintln!(" 10k-100k → TLSF heap fragmentation; try a bump alloc."); + eprintln!(" >100k → postcard decode dominates; stream-decode?"); + eprintln!("============================================================"); +} + +/// Build a PC histogram of the recursion guest verifying an `empty`-program +/// inner proof produced with `inner_proof_options`, and print it via +/// [`print_pc_histogram`] under `title`. +/// +/// `blowup_factor` and `fri_number_of_queries` are coupled (the query count is +/// derived from blowup for a fixed security target), so each `#[test]` below is +/// just this runner with a different `ProofOptions` — a single query at low +/// blowup, vs. the security-derived multi-query count at a higher blowup. +/// +/// Streams chunks of logs via `Executor::resume()` so memory stays bounded to +/// the histogram itself. Each PC is resolved to its source function via the +/// `addr2line` crate (reading the recursion ELF's DWARF directly — no external +/// tool needed). +fn run_recursion_pc_histogram(title: &str, inner_proof_options: stark::proof::options::ProofOptions) { + use executor::elf::Elf; + use executor::vm::execution::Executor; + use std::collections::HashMap; + + let root = workspace_root(); + build_elfs(&root); + let empty_elf_bytes = read_guest_elf(&root, "empty", "empty-bench"); + let recursion_elf_path = guest_elf_path(&root, "recursion", "recursion-bench"); + let recursion_elf_bytes = + std::fs::read(&recursion_elf_path).expect("failed to read recursion ELF"); + + eprintln!( + "[pc-hist] proving inner (empty, blowup={}, fri_queries={}) ...", + inner_proof_options.blowup_factor, inner_proof_options.fri_number_of_queries + ); + let inner_proof = crate::prove_with_options_and_inputs( + &empty_elf_bytes, + &[], + &inner_proof_options, + &crate::MaxRowsConfig::default(), + ) + .expect("inner prove should succeed"); + + let elf_for_vkey = executor::elf::Elf::load(&empty_elf_bytes).expect("ELF load failed"); + let page_configs = crate::tables::trace_builder::Traces::page_configs_from_elf_and_runtime( + &elf_for_vkey, + &inner_proof.runtime_page_ranges, + inner_proof.num_private_input_pages, + ); + let vkey = crate::VmVerifyingKey::from_elf_and_options( + &elf_for_vkey, + &inner_proof_options, + &page_configs, + ); + let blob = + postcard::to_allocvec(&(&inner_proof, &empty_elf_bytes, &inner_proof_options, &vkey)) + .expect("postcard encode failed"); + eprintln!("[pc-hist] postcard blob: {} bytes", blob.len()); + + eprintln!("[pc-hist] executing recursion guest (building PC histogram) ..."); + let program = Elf::load(&recursion_elf_bytes).expect("ELF load failed"); + let mut executor = Executor::new(&program, blob).expect("Executor::new failed"); + + let start = std::time::Instant::now(); + let mut pc_hist: HashMap = HashMap::with_capacity(300_000); + let mut total_cycles: u64 = 0; + let mut chunks: usize = 0; + while let Some(logs) = executor.resume().expect("executor resume failed") { + for log in logs { + *pc_hist.entry(log.current_pc).or_insert(0) += 1; + } + total_cycles += logs.len() as u64; + chunks += 1; + if chunks.is_multiple_of(500) { + eprintln!( + "[pc-hist] ... {chunks} chunks, {total_cycles} cycles, {} unique PCs, {:?}", + pc_hist.len(), + start.elapsed() + ); + } + } + let exec_time = start.elapsed(); + + // Resolve PCs to source functions directly from the ELF's DWARF, the same + // mapping `addr2line -e -f -i -C 0x` produces. + let loader = addr2line::Loader::new(&recursion_elf_path) + .expect("failed to load recursion ELF for addr2line"); + print_pc_histogram(title, &loader, pc_hist, total_cycles, exec_time); +} + +/// Diagnostic: PC histogram of the recursion guest with a **single** FRI query +/// at blowup=2 — the cheapest verifier run, dominated by fixed setup cost +/// (decode, allocator, postcard) rather than per-query FRI/Merkle work. +#[test] +#[ignore = "diagnostic: ~8 minutes; prints PC histogram of the verifier-in-VM"] +fn test_recursion_pc_histogram() { + let inner_proof_options = stark::proof::options::ProofOptions { + blowup_factor: 2, + fri_number_of_queries: 1, + coset_offset: 3, + grinding_factor: 1, + }; + run_recursion_pc_histogram("RECURSION GUEST PC HISTOGRAM (blowup=2, 1 query)", inner_proof_options); +} + +/// Diagnostic: PC histogram of the recursion guest at **128-bit security** +/// (blowup=8, FRI query count derived by the Johnson Bound Regime — tens of +/// queries). Compared against the single-query runs, weight shifts toward the +/// verifier's per-query FRI-layer / Merkle-opening and field arithmetic. +#[test] +#[ignore = "diagnostic: heavy; PC histogram of the multi-query verifier-in-VM"] +fn test_recursion_pc_histogram_multiquery() { + let inner_proof_options = crate::GoldilocksCubicProofOptions::with_blowup(8) + .expect("blowup=8 is always valid"); + run_recursion_pc_histogram( + &format!( + "RECURSION GUEST PC HISTOGRAM (blowup=8, {} queries, 128-bit)", + inner_proof_options.fri_number_of_queries + ), + inner_proof_options, + ); +} + +/// Diagnostic: build a **sampled** call-stack histogram of the recursion guest. +/// +/// Like `test_recursion_pc_histogram` but groups by full call stack (not PC). +/// To stay fast, only every `SAMPLE_RATE`-th log is recorded into the histogram. +/// The call stack itself is updated on every log (skipping would corrupt it). +/// +/// Output is written to `/tmp/recursion_folded_sampled.txt` in +/// inferno-flamegraph "folded stacks" format. Pipe it through: +/// +/// cat /tmp/recursion_folded_sampled.txt | inferno-flamegraph > svg.svg +/// +/// Expect ~10-20 minutes for SAMPLE_RATE=100 on a 40B-cycle guest. +#[test] +#[ignore = "diagnostic: sampled flamegraph for the verifier-in-VM"] +fn test_recursion_sampled_flamegraph() { + use executor::elf::Elf; + use executor::flamegraph::FlamegraphGenerator; + use executor::vm::execution::Executor; + use std::io::BufWriter; + + /// 1 in N logs is fed to `process_logs`, which both updates the call + /// stack and records a sample. At 1, every cycle goes through — the call + /// stack stays exactly in sync with execution so frame widths are + /// trustworthy, but the per-cycle cost (~57µs) limits how many cycles + /// we can cover within a wall-clock budget. + /// + /// At SAMPLE_RATE > 1, every CALL/RETURN that lands on a skipped cycle + /// silently desyncs the stack, producing the "stuck-in-visit_seq" effect + /// we saw at 1:1000. Use values > 1 only when stack accuracy is + /// expendable. + const SAMPLE_RATE: usize = 1; + + /// Stop the executor early once we've covered this many cycles. + /// Set to 0 to run to completion (40B+ cycles, hours at SAMPLE_RATE=1). + /// At SAMPLE_RATE=1, ~57µs per cycle means 5M cycles ≈ 5 min wall time. + const CYCLE_BUDGET: u64 = 5_000_000; + + let root = workspace_root(); + build_elfs(&root); + let empty_elf_bytes = read_guest_elf(&root, "empty", "empty-bench"); + let recursion_elf_bytes = read_guest_elf(&root, "recursion", "recursion-bench"); + + let inner_proof_options = stark::proof::options::ProofOptions { + blowup_factor: 2, + fri_number_of_queries: 1, + coset_offset: 3, + grinding_factor: 1, + }; + + eprintln!("[sampled-fg] proving inner (empty, blowup=2, fri_queries=1) ..."); + let inner_proof = crate::prove_with_options_and_inputs( + &empty_elf_bytes, + &[], + &inner_proof_options, + &crate::MaxRowsConfig::default(), + ) + .expect("inner prove should succeed"); + + let elf_for_vkey = executor::elf::Elf::load(&empty_elf_bytes).expect("ELF load failed"); + let page_configs = crate::tables::trace_builder::Traces::page_configs_from_elf_and_runtime( + &elf_for_vkey, + &inner_proof.runtime_page_ranges, + inner_proof.num_private_input_pages, + ); + let vkey = crate::VmVerifyingKey::from_elf_and_options( + &elf_for_vkey, + &inner_proof_options, + &page_configs, + ); + let blob = + postcard::to_allocvec(&(&inner_proof, &empty_elf_bytes, &inner_proof_options, &vkey)) + .expect("postcard encode failed"); + eprintln!("[sampled-fg] postcard blob: {} bytes", blob.len()); + + eprintln!("[sampled-fg] executing recursion guest (sampling 1-in-{SAMPLE_RATE}) ...",); + let program = Elf::load(&recursion_elf_bytes).expect("ELF load failed"); + let symbols = executor::elf::SymbolTable::parse(&recursion_elf_bytes); + let entry_point = program.entry_point; + let mut executor = Executor::new(&program, blob).expect("Executor::new failed"); + + let mut generator = FlamegraphGenerator::new(symbols, entry_point); + + // Path is defined here (not after the loop) so the periodic checkpoint + // writes below can target it. The final write at the end still happens. + let path = "/tmp/recursion_folded_sampled.txt"; + + let start = std::time::Instant::now(); + let mut total_cycles: u64 = 0; + let mut chunks: usize = 0; + while let Some(logs) = executor.resume().expect("executor resume failed") { + // Pull the chunk into an owned Vec so we can use it after dropping the + // immutable borrow of `executor`. + let (sampled, chunk_len) = { + let len = logs.len(); + // When SAMPLE_RATE == 1, this is the identity filter — `_ % 1 == 0` + // is trivially true. clippy::modulo_one is fired so we suppress it + // here; the generality of the filter is the point (lets us flip + // SAMPLE_RATE without touching the loop body). + #[allow(clippy::modulo_one)] + let sampled: Vec<_> = logs + .iter() + .enumerate() + .filter(|(i, _)| i % SAMPLE_RATE == 0) + .map(|(_, log)| log.clone()) + .collect(); + (sampled, len) + }; + + // Now we can re-borrow executor.instructions immutably for the + // flamegraph generator. We build the sampled subset of logs (every Nth) + // and call process_logs on it. THIS LOSES STACK ACCURACY for skipped + // logs but is fast — acceptable for diagnostic-quality data at this + // sample rate. + generator + .process_logs(&sampled, &executor.instructions) + .expect("flamegraph process_logs"); + + total_cycles += chunk_len as u64; + chunks += 1; + if chunks.is_multiple_of(500) { + eprintln!( + "[sampled-fg] ... {chunks} chunks, {total_cycles} cycles, {:?} elapsed", + start.elapsed() + ); + // Checkpoint: re-write the folded file in place so a killed run + // still leaves a usable (if partial) flamegraph on disk. + let file = std::fs::File::create(path).expect("create output file"); + let mut writer = BufWriter::new(file); + generator + .write_folded(&mut writer) + .expect("write folded output"); + } + + // Early exit once we've covered the cycle budget. The flamegraph will + // reflect only the cycles we processed, but the dominant hot kernels + // are typically uniformly distributed across the verifier's runtime so + // a partial run still surfaces them clearly. Wrapped in #[allow] so + // CYCLE_BUDGET can be const-0 (full run) without tripping clippy. + #[allow(clippy::absurd_extreme_comparisons)] + if CYCLE_BUDGET > 0 && total_cycles >= CYCLE_BUDGET { + eprintln!("[sampled-fg] hit cycle budget ({CYCLE_BUDGET} cycles), stopping early"); + break; + } + } + let exec_time = start.elapsed(); + + let file = std::fs::File::create(path).expect("create output file"); + let mut writer = BufWriter::new(file); + generator + .write_folded(&mut writer) + .expect("write folded output"); + + eprintln!(); + eprintln!("============================================================"); + eprintln!(" SAMPLED FLAMEGRAPH SUMMARY"); + eprintln!("============================================================"); + eprintln!(" Total cycles : {total_cycles}"); + eprintln!(" Sample rate : 1 in {SAMPLE_RATE}"); + eprintln!(" Exec time : {exec_time:?}"); + eprintln!(" Output file : {path}"); + eprintln!("============================================================"); + eprintln!(); + eprintln!(" To render SVG (requires inferno):"); + eprintln!(" cat {path} | inferno-flamegraph > /tmp/recursion_flamegraph_sampled.svg"); + eprintln!("============================================================"); +} + +/// Diagnostic: host-side per-step timings for the verifier. +/// +/// Runs an inner prove (empty guest, blowup=2, 1 query) and then verifies it +/// on the host. When built with `--features stark/instruments`, the verifier +/// prints `Time spent: ...` for each of the four steps (replay challenges, +/// composition polynomial, FRI, DEEP openings) plus the step-1-replay it +/// does before step 2. Lets us see the host-side split in seconds, without +/// running anything inside the VM. +/// +/// Usage: +/// ``` +/// cargo test --release -p lambda-vm-prover --features stark/instruments \ +/// --lib test_host_verify_step_timings -- --ignored --nocapture +/// ``` +#[test] +#[ignore = "diagnostic: prints host-side verifier step timings"] +fn test_host_verify_step_timings() { + let root = workspace_root(); + let empty_path = + root.join("bench_vs/lambda/empty/target/riscv64im-lambda-vm-elf/release/empty-bench"); + if !empty_path.exists() { + build_elfs(&root); + } + let empty_elf_bytes = std::fs::read(&empty_path).expect("read empty-bench"); + + let inner_proof_options = stark::proof::options::ProofOptions { + blowup_factor: 2, + fri_number_of_queries: 1, + coset_offset: 3, + grinding_factor: 1, + }; + + eprintln!("[host-verify] proving empty (blowup=2, fri_queries=1) ..."); + let inner_proof = crate::prove_with_options_and_inputs( + &empty_elf_bytes, + &[], + &inner_proof_options, + &crate::MaxRowsConfig::default(), + ) + .expect("inner prove should succeed"); + + eprintln!("[host-verify] verifying on host (with instruments) ..."); + let ok = + crate::verify_with_options(&inner_proof, &empty_elf_bytes, &inner_proof_options, None, None) + .expect("verify errored"); + assert!(ok, "proof must verify"); + eprintln!("[host-verify] verified OK"); +} + +/// Diagnostic: cycle count for the **deserialize-only** counterpart of the +/// recursion guest. Same input layout +/// (`(VmProof, Vec, ProofOptions, VmVerifyingKey)`) and same proof, but +/// the guest just postcard-decodes the blob and halts — it never calls +/// `verify_with_options`. +/// +/// The cycle delta between this and `test_recursion_cycle_count` is the +/// actual cost of the STARK verifier inside the VM. Historically (40.5 B-cycle +/// recursion guest) postcard decode was ~15.6 M cycles — negligible. Now that +/// the recursion guest is ~67 M cycles, the same absolute cost would be ~23% +/// of total; this test re-measures it. +#[test] +#[ignore = "diagnostic: runs the deserialize-only guest, prints cycle count"] +fn test_deserialize_only_cycle_count() { + use executor::elf::Elf; + use executor::vm::execution::Executor; + + let root = workspace_root(); + build_elfs(&root); + let empty_elf_bytes = read_guest_elf(&root, "empty", "empty-bench"); + let deser_elf_bytes = read_guest_elf(&root, "deserialize-only", "deserialize-only-bench"); + + let inner_proof_options = stark::proof::options::ProofOptions { + blowup_factor: 2, + fri_number_of_queries: 1, + coset_offset: 3, + grinding_factor: 1, + }; + + eprintln!("[deser-only] proving inner (empty, blowup=2, fri_queries=1) ..."); + let inner_proof = crate::prove_with_options_and_inputs( + &empty_elf_bytes, + &[], + &inner_proof_options, + &crate::MaxRowsConfig::default(), + ) + .expect("inner prove should succeed"); + + let elf_for_vkey = executor::elf::Elf::load(&empty_elf_bytes).expect("ELF load failed"); + let page_configs = crate::tables::trace_builder::Traces::page_configs_from_elf_and_runtime( + &elf_for_vkey, + &inner_proof.runtime_page_ranges, + inner_proof.num_private_input_pages, + ); + let vkey = crate::VmVerifyingKey::from_elf_and_options( + &elf_for_vkey, + &inner_proof_options, + &page_configs, + ); + let blob = + postcard::to_allocvec(&(&inner_proof, &empty_elf_bytes, &inner_proof_options, &vkey)) + .expect("postcard encode failed"); + eprintln!("[deser-only] postcard blob: {} bytes", blob.len()); + + eprintln!("[deser-only] executing deserialize-only guest (streaming) ..."); + let program = Elf::load(&deser_elf_bytes).expect("ELF load failed"); + eprintln!( + "[deser-only] ELF: {} bytes, entry_point=0x{:x}", + deser_elf_bytes.len(), + program.entry_point, + ); + assert_ne!( + program.entry_point, 0, + "deserialize-only ELF has entry_point=0 — build artifact is malformed" + ); + let mut executor = Executor::new(&program, blob).expect("Executor::new failed"); + + let start = std::time::Instant::now(); + let mut cycle_count: usize = 0; + let mut chunks: usize = 0; + while let Some(logs) = executor.resume().expect("executor resume failed") { + cycle_count += logs.len(); + chunks += 1; + if chunks.is_multiple_of(50) { + eprintln!( + "[deser-only] ... {chunks} chunks, {cycle_count} cycles, {:?} elapsed", + start.elapsed() + ); + } + } + let exec_time = start.elapsed(); + + eprintln!(); + eprintln!("============================================================"); + eprintln!(" DESERIALIZE-ONLY GUEST EXECUTION SUMMARY"); + eprintln!("============================================================"); + eprintln!(" Cycle count : {cycle_count}"); + eprintln!(" Executor wall time : {exec_time:?}"); + eprintln!(); + eprintln!(" Compare against test_recursion_cycle_count (~40.5B cycles"); + eprintln!(" with the same proof). Delta = verifier-in-VM cost."); + eprintln!("============================================================"); +} + +/// Diagnostic: PC histogram for the **deserialize-only** guest. +/// +/// Sibling of `test_recursion_pc_histogram`, but targeting the +/// deserialize-only control guest so we can locate the hot kernel inside the +/// 15.7 M-cycle postcard decode itself. Every cycle goes through the +/// histogram (no sampling), so attribution is exact — the previous sampled +/// flamegraph at 1:1000 had broken stack reconstruction on skipped +/// CALL/RETURNs, which made it unreliable for a workload this small. +/// +/// Each top PC is resolved to its source function via the `addr2line` crate, +/// reading the guest ELF's DWARF directly (no external tool needed). +#[test] +#[ignore = "diagnostic: ~1 min; PC histogram for the deserialize-only guest"] +fn test_deserialize_only_pc_histogram() { + use executor::elf::Elf; + use executor::vm::execution::Executor; + use std::collections::HashMap; + + let root = workspace_root(); + build_elfs(&root); + let empty_elf_bytes = read_guest_elf(&root, "empty", "empty-bench"); + let deser_elf_path = guest_elf_path(&root, "deserialize-only", "deserialize-only-bench"); + let deser_elf_bytes = + std::fs::read(&deser_elf_path).expect("failed to read deserialize-only ELF"); + + let inner_proof_options = stark::proof::options::ProofOptions { + blowup_factor: 2, + fri_number_of_queries: 1, + coset_offset: 3, + grinding_factor: 1, + }; + + eprintln!("[deser-pc-hist] proving inner (empty, blowup=2, fri_queries=1) ..."); + let inner_proof = crate::prove_with_options_and_inputs( + &empty_elf_bytes, + &[], + &inner_proof_options, + &crate::MaxRowsConfig::default(), + ) + .expect("inner prove should succeed"); + + let elf_for_vkey = Elf::load(&empty_elf_bytes).expect("ELF load failed"); + let page_configs = crate::tables::trace_builder::Traces::page_configs_from_elf_and_runtime( + &elf_for_vkey, + &inner_proof.runtime_page_ranges, + inner_proof.num_private_input_pages, + ); + let vkey = crate::VmVerifyingKey::from_elf_and_options( + &elf_for_vkey, + &inner_proof_options, + &page_configs, + ); + let blob = + postcard::to_allocvec(&(&inner_proof, &empty_elf_bytes, &inner_proof_options, &vkey)) + .expect("postcard encode failed"); + eprintln!("[deser-pc-hist] postcard blob: {} bytes", blob.len()); + + eprintln!("[deser-pc-hist] executing deserialize-only guest (building PC histogram) ..."); + let program = Elf::load(&deser_elf_bytes).expect("ELF load failed"); + let mut executor = Executor::new(&program, blob).expect("Executor::new failed"); + + let start = std::time::Instant::now(); + // ~50k unique PCs is plenty: the deserialize-only guest is ~74 KB of ELF + // (~18k 4-byte instructions); the hot inner loop is much smaller still. + let mut pc_hist: HashMap = HashMap::with_capacity(50_000); + let mut total_cycles: u64 = 0; + let mut chunks: usize = 0; + while let Some(logs) = executor.resume().expect("executor resume failed") { + for log in logs { + *pc_hist.entry(log.current_pc).or_insert(0) += 1; + } + total_cycles += logs.len() as u64; + chunks += 1; + if chunks.is_multiple_of(50) { + eprintln!( + "[deser-pc-hist] ... {chunks} chunks, {total_cycles} cycles, {} unique PCs, {:?}", + pc_hist.len(), + start.elapsed() + ); + } + } + let exec_time = start.elapsed(); + + // Resolve PCs to source functions directly from the ELF's DWARF, the same + // mapping `addr2line -e -f -i -C 0x` produces. + let loader = addr2line::Loader::new(&deser_elf_path) + .expect("failed to load deserialize-only ELF for addr2line"); + print_pc_histogram( + "DESERIALIZE-ONLY GUEST PC HISTOGRAM", + &loader, + pc_hist, + total_cycles, + exec_time, + ); +} + +/// Diagnostic: bucket the recursion guest's cycles by which verifier step +/// is currently executing. +/// +/// The verifier's hot path is `verify_rounds_2_to_4`, which calls four +/// sub-routines in a fixed order: +/// 1. `replay_rounds_after_round_1` (recover challenges) +/// 2. `step_2_verify_claimed_composition_polynomial` +/// 3. `step_3_verify_fri` +/// 4. `step_4_verify_trace_and_composition_openings` +/// +/// We resolve each sub-routine's entry PC from the recursion ELF's symbol +/// table, then run a monotonic state machine over the execution stream: +/// the active bucket only advances 0 → 1 → 2 → 3 → 4 (never backwards), +/// so cycles inside a step's callees stay attributed to that step. +/// +/// Bucket 0 ("setup") captures everything before step 1 is entered — the +/// allocator init, postcard decode, and `VmAirs::new` (which contains the +/// expensive preprocessed-commitment FFTs). +/// +/// Streams chunks via `Executor::resume()` so memory stays bounded. +#[test] +#[ignore = "diagnostic: ~13 min; buckets the 40B cycles by verifier step"] +fn test_recursion_step_breakdown() { + use executor::elf::{Elf, SymbolTable}; + use executor::vm::execution::Executor; + + let root = workspace_root(); + build_elfs(&root); + let empty_elf_bytes = read_guest_elf(&root, "empty", "empty-bench"); + let recursion_elf_bytes = read_guest_elf(&root, "recursion", "recursion-bench"); + + let inner_proof_options = stark::proof::options::ProofOptions { + blowup_factor: 2, + fri_number_of_queries: 1, + coset_offset: 3, + grinding_factor: 1, + }; + + eprintln!("[step-bkd] proving inner (empty, blowup=2, fri_queries=1) ..."); + let inner_proof = crate::prove_with_options_and_inputs( + &empty_elf_bytes, + &[], + &inner_proof_options, + &crate::MaxRowsConfig::default(), + ) + .expect("inner prove should succeed"); + + let elf_for_vkey = executor::elf::Elf::load(&empty_elf_bytes).expect("ELF load failed"); + let page_configs = crate::tables::trace_builder::Traces::page_configs_from_elf_and_runtime( + &elf_for_vkey, + &inner_proof.runtime_page_ranges, + inner_proof.num_private_input_pages, + ); + let vkey = crate::VmVerifyingKey::from_elf_and_options( + &elf_for_vkey, + &inner_proof_options, + &page_configs, + ); + let blob = + postcard::to_allocvec(&(&inner_proof, &empty_elf_bytes, &inner_proof_options, &vkey)) + .expect("postcard encode failed"); + eprintln!("[step-bkd] postcard blob: {} bytes", blob.len()); + + // Build a per-step "advance bucket to N" lookup. The verifier's step + // functions get inlined by LLVM in release mode, so we can't rely on + // matching their entry PCs directly. Instead we anchor on closures the + // compiler emits *inside* each step's body — iterator combinators like + // `.fold(|...|)` keep the step's method name as a substring in their + // mangled symbol. Any PC that resolves to a symbol containing step N's + // keyword advances the bucket to N (monotonically). + // + // If step N has no matching symbol at all (e.g. step 4 is fully inlined + // with no closure children of its own), its cycles get attributed to the + // previous bucket. We report that explicitly in the summary. + let symbols = SymbolTable::parse(&recursion_elf_bytes); + assert!( + !symbols.is_empty(), + "recursion ELF has no symbol table — was it stripped?" + ); + + let step_keywords = [ + "replay_rounds_after_round_1", + "step_2_verify_claimed_composition_polynomial", + "step_3_verify_fri", + "step_4_verify_trace_and_composition_openings", + ]; + let step_found: [bool; 4] = std::array::from_fn(|i| { + symbols + .functions() + .iter() + .any(|f| f.name.contains(step_keywords[i])) + }); + for (i, found) in step_found.iter().enumerate() { + let n_matches = symbols + .functions() + .iter() + .filter(|f| f.name.contains(step_keywords[i])) + .count(); + eprintln!( + "[step-bkd] step {}: keyword={:?} -> {} symbol(s) {}", + i + 1, + step_keywords[i], + n_matches, + if *found { + "" + } else { + "(fully inlined; will merge into the previous bucket)" + } + ); + } + + // Monotonic state machine: 0=setup, 1..=4=inside step N (or its callees / + // inlined-step-N-cycles attributed here because step N+1 is missing). + let mut bucket: u8 = 0; + let mut buckets = [0u64; 5]; + + eprintln!("[step-bkd] executing recursion guest (streaming) ..."); + let program = Elf::load(&recursion_elf_bytes).expect("ELF load failed"); + let mut executor = Executor::new(&program, blob).expect("Executor::new failed"); + + // Cache the last symbol-table hit so we only do a binary search on + // function transitions, not on every cycle. Functions are typically + // long-running (>>1 instruction), so this cache hits ~all of the time. + let mut last_range: Option<(u64, u64)> = None; + let mut last_advance: u8 = 0; + + let start = std::time::Instant::now(); + let mut total_cycles: u64 = 0; + let mut chunks: usize = 0; + while let Some(logs) = executor.resume().expect("executor resume failed") { + for log in logs { + let pc = log.current_pc; + let in_cached = matches!(last_range, Some((s, e)) if pc >= s && pc < e); + if !in_cached { + // Slow path: refresh the cache from the symbol table. + if let Some(sym) = symbols.lookup(pc) { + // SymbolTable accepts size=0 symbols as "any address >="; for + // those we'd need the next symbol's start for a real upper + // bound. Cheapest workaround: set a tiny range so we re-resolve + // soon enough that wrong attribution is bounded. + let end = sym.address + sym.size.max(1); + last_range = Some((sym.address, end)); + last_advance = 0; + for (i, kw) in step_keywords.iter().enumerate() { + if sym.name.contains(kw) { + last_advance = (i + 1) as u8; + } + } + } else { + last_range = None; + last_advance = 0; + } + } + if bucket < last_advance { + bucket = last_advance; + } + buckets[bucket as usize] += 1; + } + total_cycles += logs.len() as u64; + chunks += 1; + if chunks.is_multiple_of(500) { + eprintln!( + "[step-bkd] ... {chunks} chunks, {total_cycles} cycles, bucket={bucket}, {:?}", + start.elapsed() + ); + } + } + let exec_time = start.elapsed(); + + let labels = [ + "0. setup (alloc + postcard decode + VmAirs::new + pre-step-1)", + "1. step 1: replay_rounds_after_round_1", + "2. step 2: verify_claimed_composition_polynomial", + "3. step 3: verify_fri", + "4. step 4: verify_trace_and_composition_openings (+ wrap-up)", + ]; + + eprintln!(); + eprintln!("============================================================"); + eprintln!(" RECURSION GUEST PER-STEP CYCLE BREAKDOWN"); + eprintln!("============================================================"); + eprintln!(" Total cycles : {total_cycles}"); + eprintln!(" Exec time : {exec_time:?}"); + eprintln!(); + eprintln!(" {:<60} {:>14} {:>7}", "bucket", "cycles", "%"); + for (label, cycles) in labels.iter().zip(buckets.iter()) { + let pct = if total_cycles > 0 { + 100.0 * (*cycles as f64) / (total_cycles as f64) + } else { + 0.0 + }; + eprintln!(" {:<60} {:>14} {:>6.2}%", label, cycles, pct); + } + eprintln!("============================================================"); +} + +/// Inner program: fibonacci(10). +#[test] +#[ignore = "slow: runs the full STARK verifier inside the VM"] +fn test_recursion_smoke() { + let root = workspace_root(); + build_elfs(&root); + let fib_elf_bytes = read_guest_elf(&root, "fibonacci", "fibonacci-bench"); + + let n: u64 = 10; + let inner_private_input = n.to_le_bytes().to_vec(); + + run_recursion_pipeline("recursion-smoke", &fib_elf_bytes, &inner_private_input); +} diff --git a/prover/src/tests/trace_builder_tests.rs b/prover/src/tests/trace_builder_tests.rs index b3c1e1514..9a5da7bfb 100644 --- a/prover/src/tests/trace_builder_tests.rs +++ b/prover/src/tests/trace_builder_tests.rs @@ -6,8 +6,11 @@ use crate::tables::lt; use crate::tables::memw_register; use crate::tables::trace_builder::Traces; use crate::tables::types::FE; +#[cfg(feature = "prove")] use executor::vm::instruction::decoding::{ArithOp, Comparison, Instruction}; +#[cfg(feature = "prove")] use executor::vm::logs::Log; +#[cfg(feature = "prove")] use executor::vm::memory::U64HashMap; fn make_log(pc: u64, rs1_val: u64, rs2_val: u64, dst_val: u64, taken: bool, offset: i32) -> Log { diff --git a/prover/src/tests/vkey_tests.rs b/prover/src/tests/vkey_tests.rs new file mode 100644 index 000000000..aba3420d0 --- /dev/null +++ b/prover/src/tests/vkey_tests.rs @@ -0,0 +1,180 @@ +//! Tests for [`crate::VmVerifyingKey`] and the vkey-aware verify path. + +use executor::elf::Elf; +use stark::proof::options::{GoldilocksCubicProofOptions, ProofOptions}; + +use crate::VmVerifyingKey; +use crate::tables::page::PageConfig; +use crate::tables::trace_builder::Traces; +use crate::test_utils::asm_elf_bytes; +use crate::vkey::VKEY_VERSION; +use crate::{VmProof, prove}; + +fn default_options() -> ProofOptions { + GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is always valid") +} + +/// Derive the same `page_configs` slice the verifier would reconstruct from +/// `vm_proof`. This is exactly what `verify_with_options_with_vkey` does +/// internally, lifted into the test so the test-side and verifier-side +/// `vkey.pages` indexing line up. +fn page_configs_from_proof(elf: &Elf, vm_proof: &VmProof) -> Vec { + Traces::page_configs_from_elf_and_runtime( + elf, + &vm_proof.runtime_page_ranges, + vm_proof.num_private_input_pages, + ) +} + +#[test] +fn test_vkey_roundtrip() { + let elf_bytes = asm_elf_bytes("sub"); + let vm_proof = prove(&elf_bytes).expect("inner prove should succeed"); + let elf = Elf::load(&elf_bytes).expect("ELF load failed"); + let options = default_options(); + let page_configs = page_configs_from_proof(&elf, &vm_proof); + + let vkey = VmVerifyingKey::from_elf_and_options(&elf, &options, &page_configs); + assert_eq!(vkey.version, VKEY_VERSION, "version field must be set"); + assert_eq!( + vkey.pages.len(), + page_configs.len(), + "vkey.pages must have one entry per page config", + ); + let digest_before = vkey.compute_digest(); + + // Two host derivations on the same inputs must produce the same vkey; + // the per-table commitment caches should not change between calls. + let vkey_again = VmVerifyingKey::from_elf_and_options(&elf, &options, &page_configs); + assert_eq!(vkey, vkey_again, "vkey derivation must be deterministic"); + + // postcard round-trip preserves every field. + let encoded = postcard::to_allocvec(&vkey).expect("postcard encode"); + let decoded: VmVerifyingKey = postcard::from_bytes(&encoded).expect("postcard decode"); + assert_eq!(vkey, decoded, "postcard round-trip must preserve the vkey"); + assert_eq!( + decoded.compute_digest(), + digest_before, + "digest must be stable across serialization" + ); +} + +#[test] +fn test_vkey_verify_equivalence() { + // Prove a tiny program once with the full (non-minimal) bitwise table, + // then verify it both ways: with and without a precomputed vkey. + // Both paths must accept the proof. This is the core correctness + // guarantee — the vkey shortcut produces identical results to the + // recompute-from-scratch path. + let elf_bytes = asm_elf_bytes("sub"); + let vm_proof = prove(&elf_bytes).expect("inner prove should succeed"); + let elf = Elf::load(&elf_bytes).expect("ELF load failed"); + let options = default_options(); + let page_configs = page_configs_from_proof(&elf, &vm_proof); + let vkey = VmVerifyingKey::from_elf_and_options(&elf, &options, &page_configs); + + let baseline = crate::verify_with_options(&vm_proof, &elf_bytes, &options, None, None) + .expect("baseline verify errored"); + assert!(baseline, "baseline verify must accept the proof"); + + let with_vkey = + crate::verify_with_options_with_vkey(&vm_proof, &elf_bytes, &options, None, None, Some(&vkey)) + .expect("vkey verify errored"); + assert!(with_vkey, "vkey verify must accept the same proof"); +} + +#[test] +fn test_vkey_mismatch_rejects() { + // Tamper with vkey.bitwise. Without an explicit `vk_digest` field on + // VmProof (deferred to a later PR), rejection comes from Fiat-Shamir: + // the verifier feeds the tampered commitment into the transcript, + // derives different challenges from what the prover used, and the + // proof's openings stop matching. + let elf_bytes = asm_elf_bytes("sub"); + let vm_proof = prove(&elf_bytes).expect("inner prove should succeed"); + let elf = Elf::load(&elf_bytes).expect("ELF load failed"); + let options = default_options(); + let page_configs = page_configs_from_proof(&elf, &vm_proof); + let mut vkey = VmVerifyingKey::from_elf_and_options(&elf, &options, &page_configs); + + vkey.bitwise[0] ^= 0xFF; + + let result = crate::verify_with_options_with_vkey(&vm_proof, &elf_bytes, &options, None, None, Some(&vkey)) + .expect("verify must not return Err — Fiat-Shamir mismatch is Ok(false)"); + assert!(!result, "tampered bitwise commitment must cause rejection"); +} + +#[test] +fn test_vkey_page_mismatch_rejects() { + // Same shape as `test_vkey_mismatch_rejects`, but tampers with the page + // table that gets it first non-private-input slot. Fiat-Shamir rejects + // the same way: the page commitment is in the verifier's transcript + // exactly like the bitwise one. + let elf_bytes = asm_elf_bytes("sub"); + let vm_proof = prove(&elf_bytes).expect("inner prove should succeed"); + let elf = Elf::load(&elf_bytes).expect("ELF load failed"); + let options = default_options(); + let page_configs = page_configs_from_proof(&elf, &vm_proof); + let mut vkey = VmVerifyingKey::from_elf_and_options(&elf, &options, &page_configs); + + let target = page_configs + .iter() + .position(|c| !c.is_private_input) + .expect("test ELF must produce at least one non-private-input page"); + vkey.pages[target][0] ^= 0xFF; + + let result = crate::verify_with_options_with_vkey(&vm_proof, &elf_bytes, &options, None, None, Some(&vkey)) + .expect("verify must not return Err — Fiat-Shamir mismatch is Ok(false)"); + assert!(!result, "tampered page commitment must cause rejection"); +} + +#[test] +fn test_vkey_decode_mismatch_rejects() { + let elf_bytes = asm_elf_bytes("sub"); + let vm_proof = prove(&elf_bytes).expect("inner prove should succeed"); + let elf = Elf::load(&elf_bytes).expect("ELF load failed"); + let options = default_options(); + let page_configs = page_configs_from_proof(&elf, &vm_proof); + let mut vkey = VmVerifyingKey::from_elf_and_options(&elf, &options, &page_configs); + + vkey.decode[0] ^= 0xFF; + + let result = crate::verify_with_options_with_vkey(&vm_proof, &elf_bytes, &options, None, None, Some(&vkey)) + .expect("verify must not return Err — Fiat-Shamir mismatch is Ok(false)"); + assert!(!result, "tampered decode commitment must cause rejection"); +} + +#[test] +fn test_vkey_register_mismatch_rejects() { + let elf_bytes = asm_elf_bytes("sub"); + let vm_proof = prove(&elf_bytes).expect("inner prove should succeed"); + let elf = Elf::load(&elf_bytes).expect("ELF load failed"); + let options = default_options(); + let page_configs = page_configs_from_proof(&elf, &vm_proof); + let mut vkey = VmVerifyingKey::from_elf_and_options(&elf, &options, &page_configs); + + vkey.register[0] ^= 0xFF; + + let result = crate::verify_with_options_with_vkey(&vm_proof, &elf_bytes, &options, None, None, Some(&vkey)) + .expect("verify must not return Err — Fiat-Shamir mismatch is Ok(false)"); + assert!(!result, "tampered register commitment must cause rejection"); +} + +#[test] +fn test_vkey_keccak_rc_mismatch_rejects() { + let elf_bytes = asm_elf_bytes("sub"); + let vm_proof = prove(&elf_bytes).expect("inner prove should succeed"); + let elf = Elf::load(&elf_bytes).expect("ELF load failed"); + let options = default_options(); + let page_configs = page_configs_from_proof(&elf, &vm_proof); + let mut vkey = VmVerifyingKey::from_elf_and_options(&elf, &options, &page_configs); + + vkey.keccak_rc[0] ^= 0xFF; + + let result = crate::verify_with_options_with_vkey(&vm_proof, &elf_bytes, &options, None, None, Some(&vkey)) + .expect("verify must not return Err — Fiat-Shamir mismatch is Ok(false)"); + assert!( + !result, + "tampered keccak_rc commitment must cause rejection" + ); +} diff --git a/prover/src/vkey.rs b/prover/src/vkey.rs new file mode 100644 index 000000000..a81d31bb3 --- /dev/null +++ b/prover/src/vkey.rs @@ -0,0 +1,126 @@ +//! Verifying key for the lambda-vm STARK verifier. +//! +//! Caches preprocessed-table Merkle commitments that the verifier would +//! otherwise recompute on every call. Mirrors the SP1 `MachineVerifyingKey` +//! pattern (preprocessed commitments derived once at setup, never recomputed +//! per-proof) and the prover-side companion in +//! (which caches the +//! same data on the prover side). +//! +//! ## Current scope +//! +//! All five preprocessed tables — BITWISE, DECODE, REGISTER, KECCAK_RC, and +//! every non-private-input PAGE — are cached here. `VmAirs::new_with_vkey` +//! prefers the vkey-supplied commitment over recomputing when a vkey is +//! provided. The `version` field exists so a vkey serialized against an +//! older layout produces a different `compute_digest()` and stops +//! validating. +//! +//! ## Security +//! +//! For this PR the verifying key is only a performance shortcut. The +//! verifier still relies on Fiat-Shamir: every preprocessed commitment the +//! prover used is bound into the proof's challenges, so a verifier that +//! consumes a tampered `vkey` field derives different challenges, the +//! openings stop matching, and verification fails. A future PR will +//! additionally embed `vkey.compute_digest()` in `VmProof` so vkey +//! substitution surfaces as an explicit error before any STARK work runs. + +use alloc::vec::Vec; + +use executor::elf::Elf; +use sha3::{Digest, Keccak256}; +use stark::config::Commitment; +use stark::proof::options::ProofOptions; + +use crate::tables::bitwise; +use crate::tables::decode; +use crate::tables::keccak_rc; +use crate::tables::page::{self, PageConfig}; +use crate::tables::register; + +/// Current `VmVerifyingKey` layout version. Bump whenever fields are added, +/// removed, or reordered so that vkeys serialized against an older layout +/// produce a different `compute_digest()` and stop validating. +pub const VKEY_VERSION: u32 = 3; + +/// Placeholder commitment stored in [`VmVerifyingKey::pages`] for +/// private-input page slots, where there is no preprocessed commitment to +/// cache. The verifier never reads these slots (private-input pages have no +/// `with_preprocessed(...)` call in `VmAirs::new`). +const PRIVATE_INPUT_PAGE_PLACEHOLDER: Commitment = [0u8; 32]; + +/// Cached preprocessed-table commitments the verifier would otherwise +/// recompute on every call. +#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +pub struct VmVerifyingKey { + /// Layout version. See [`VKEY_VERSION`]. + pub version: u32, + /// Merkle root over the LDE of the bitwise preprocessed columns. + /// Program-independent; depends only on `ProofOptions`. + pub bitwise: Commitment, + /// Merkle root over the LDE of the decode preprocessed columns. + /// Program-dependent: derived from the inner ELF's instruction stream. + pub decode: Commitment, + /// Merkle root over the LDE of the register preprocessed columns. + /// Program-dependent via the ELF's entry point. + pub register: Commitment, + /// Merkle root over the LDE of the keccak round-constants preprocessed + /// columns. Program-independent; depends only on `ProofOptions`. + pub keccak_rc: Commitment, + /// Per-page preprocessed Merkle roots, indexed parallel to the + /// `page_configs` slice the verifier reconstructs from the proof via + /// [`crate::tables::trace_builder::Traces::page_configs_from_elf_and_runtime`]. + /// Private-input slots hold a zero placeholder and are never read by the + /// verifier — they exist only to keep the index aligned with + /// `page_configs`, which interleaves preprocessed and private-input pages. + pub pages: Vec, +} + +impl VmVerifyingKey { + /// Derive the verifying key on the host. + /// + /// `elf` is read to derive the program-dependent commitments (DECODE + /// from the instruction stream, REGISTER from `elf.entry_point`). + /// + /// `page_configs` must match exactly what the verifier will reconstruct + /// from the proof — i.e. the output of + /// `Traces::page_configs_from_elf_and_runtime(elf, runtime_page_ranges, + /// num_private_input_pages)`. The host can call that helper with the + /// values it already has after producing the inner proof. + pub fn from_elf_and_options( + elf: &Elf, + options: &ProofOptions, + page_configs: &[PageConfig], + ) -> Self { + let pages = page_configs + .iter() + .map(|config| { + if config.is_private_input { + PRIVATE_INPUT_PAGE_PLACEHOLDER + } else { + page::precomputed_commitment_cached(config, options) + } + }) + .collect(); + Self { + version: VKEY_VERSION, + bitwise: bitwise::preprocessed_commitment(options), + decode: decode::commitment_from_elf(elf, options) + .expect("decode commitment must compute"), + register: register::preprocessed_commitment(options, elf.entry_point), + keccak_rc: keccak_rc::preprocessed_commitment(options), + pages, + } + } + + /// Keccak256 fingerprint of the postcard-serialized vkey. Stable as long + /// as the field layout (and [`VKEY_VERSION`]) does not change. + pub fn compute_digest(&self) -> [u8; 32] { + let bytes = postcard::to_allocvec(self) + .expect("postcard serialization of VmVerifyingKey must succeed"); + let mut hasher = Keccak256::new(); + hasher.update(&bytes); + hasher.finalize().into() + } +}