diff --git a/.github/workflows/pr_main.yaml b/.github/workflows/pr_main.yaml index bc0560acb..ae675d770 100644 --- a/.github/workflows/pr_main.yaml +++ b/.github/workflows/pr_main.yaml @@ -102,21 +102,43 @@ jobs: cargo test --release -p executor test_ethrex -- --ignored cargo test --release -p executor test_ckzg -- --ignored + test-cli: + name: CLI tests + runs-on: ubuntu-latest + if: github.event_name != 'push' || github.actor != 'github-merge-queue[bot]' + steps: + - name: Checkout sources + uses: actions/checkout@v4 + + - name: Setup Rust Environment + uses: ./.github/actions/setup-rust + + - name: Cache cargo build artifacts + uses: Swatinem/rust-cache@v2 + with: + shared-key: "lambda-vm-cli-test" + cache-all-crates: "true" + + - name: Run CLI tests + run: cargo test -p cli + # "Test" is a required check — keep this name to avoid branch protection changes. - # This gate job passes only when executor tests AND all prover shards succeed. + # This gate job passes only when CLI, executor, disk-spill, and prover tests succeed. test: name: Test if: always() - needs: [test-executor, test-prover, test-disk-spill] + needs: [test-executor, test-cli, test-prover, test-disk-spill] runs-on: ubuntu-latest steps: - name: Check results run: | executor="${{ needs.test-executor.result }}" + cli="${{ needs.test-cli.result }}" prover="${{ needs.test-prover.result }}" disk_spill="${{ needs.test-disk-spill.result }}" echo "test-executor: $executor" + echo "test-cli: $cli" echo "test-prover: $prover" echo "test-disk-spill: $disk_spill" @@ -124,6 +146,9 @@ jobs: if [[ "$executor" != "success" && "$executor" != "skipped" ]]; then exit 1 fi + if [[ "$cli" != "success" && "$cli" != "skipped" ]]; then + exit 1 + fi if [[ "$prover" != "success" && "$prover" != "skipped" ]]; then exit 1 fi diff --git a/bin/cli/README.md b/bin/cli/README.md index c784ff6c7..bc5eb9d53 100644 --- a/bin/cli/README.md +++ b/bin/cli/README.md @@ -57,8 +57,10 @@ cargo run -p cli --release -- prove -o proof.bin [flags] | `--private-input ` | Pass private input bytes to the guest. | | `--blowup ` | FRI blowup factor (power of 2). Higher = fewer queries, smaller proof, slower proving. [default: 2] | | `--time` | Print total proving time. | -| `--cycles` | Run one extra pre-pass outside the timer and print the dynamic instruction count. | -| `--elements` | Build traces and print main-trace and aux-trace field element counts. | +| `--cycles` | Run one extra execution outside the timer and print the dynamic instruction count. Also supported with `--continuations`. | +| `--elements` | Build traces and print main-trace and aux-trace field element counts. Monolithic proving only; conflicts with `--continuations`. | +| `--continuations` | Prove as a continuation bundle split into fixed-size epochs. | +| `--epoch-size-log2 ` | Continuation epoch size as `2^N` cycles. Requires `--continuations`. Defaults to `20`; values below `18` are rejected. | ### Verify @@ -72,8 +74,10 @@ cargo run -p cli --release -- verify [flags] |---|---| | `--blowup ` | FRI blowup factor used during proving. Must match. [default: 2] | | `--time` | Print verification time. | +| `--continuations` | Verify a continuation proof bundle produced by `prove --continuations`. | -Returns exit code `0` on successful verification, `1` on failure. +Returns exit code `0` on successful verification, `1` on failure. `--blowup` must +match the value used during proving. ### Count Elements @@ -96,10 +100,29 @@ cargo run -p cli --release -- execute executor/program_artifacts/asm/add.elf cargo run -p cli --release -- prove executor/program_artifacts/asm/add.elf -o /tmp/proof.bin cargo run -p cli --release -- verify /tmp/proof.bin executor/program_artifacts/asm/add.elf +# Generate and verify a continuation proof +cargo run -p cli --release -- prove program.elf -o /tmp/cont.bin --continuations --epoch-size-log2 20 +cargo run -p cli --release -- verify /tmp/cont.bin program.elf --continuations + +# Generate a continuation proof and print total dynamic instruction count +cargo run -p cli --release -- prove program.elf -o /tmp/cont.bin --continuations --cycles + # Prove with private input and print metrics cargo run -p cli --release -- prove program.elf -o /tmp/proof.bin --private-input input.bin --time --cycles ``` +For continuation proofs, `--epoch-size-log2` is the power in `2^N` cycles. Larger +values reduce epoch count and fixed per-epoch overhead, but increase peak memory. +As rough ethrex 10-transfer distinct-account reference points from a local sweep: +`19` used about 6.9 GB peak heap, `20` about 9.5 GB, `21` about 15.8 GB, and `22` +about 26.8 GB. For a new workload, use the highest value the machine can run +without swapping. + +Continuation proof bundles are self-contained for standalone verification. When +`--private-input` is used, the serialized continuation proof includes the raw +private input bytes so the verifier can rebuild the genesis memory commitment. +Do not treat continuation proof files as confidential-input hiding artifacts. + ## Guest Program Flamegraphs Generate flamegraphs showing where the guest RISC-V program spends its execution time (by instruction count). diff --git a/bin/cli/src/main.rs b/bin/cli/src/main.rs index 5c9719650..2b053755c 100644 --- a/bin/cli/src/main.rs +++ b/bin/cli/src/main.rs @@ -18,6 +18,9 @@ use executor::{ use prover::VmProof; use stark::proof::options::GoldilocksCubicProofOptions; +const DEFAULT_CONTINUATION_EPOCH_SIZE_LOG2: u32 = 20; +const MIN_CONTINUATION_EPOCH_SIZE_LOG2: u32 = 18; + /// Polls jemalloc `stats.allocated` every 10ms from a background thread, /// tracking the high-water mark. Near-zero overhead because jemalloc uses /// thread-local caches — `epoch::advance()` just merges cached counters. @@ -130,20 +133,34 @@ enum Commands { /// Blowup factor (power of 2). Higher = fewer queries, smaller proof, slower proving. #[arg(long, default_value = "2")] - blowup: Option, + blowup: u8, /// Print proving time #[arg(long)] time: bool, - /// Execute one pre-pass outside the timer and print dynamic instruction count + /// Execute once outside the timer and print dynamic instruction count #[arg(long)] cycles: bool, /// Build traces and print total main-trace field elements (rows × columns summed across /// all tables) and aux-trace field elements (committed EF columns × rows) - #[arg(long)] + #[arg(long, conflicts_with = "continuations")] elements: bool, + + /// Prove with continuations (split execution into epochs; flat peak memory) + #[arg(long)] + continuations: bool, + + /// Continuation epoch size as log2(cycles); e.g. 20 means 1,048,576 cycles. + #[arg( + long, + value_name = "N", + requires = "continuations", + value_parser = parse_epoch_size_log2, + long_help = "Continuation epoch size as log2(cycles); e.g. 20 means 1,048,576 cycles.\n\nDefault when omitted: 20. Values below 18 are rejected for the CLI because tiny epochs are dominated by fixed overhead. Indicative ethrex 10-transfer distinct-account peak heap from a local sweep: 19 ~= 6.9 GB, 20 ~= 9.5 GB, 21 ~= 15.8 GB, 22 ~= 26.8 GB. Higher values reduce epoch count, continuation bundle size, and fixed per-epoch overhead, but increase peak memory. For a new workload, try the highest value your machine can run without swapping." + )] + epoch_size_log2: Option, }, /// Verify a proof bundle @@ -158,11 +175,15 @@ enum Commands { /// Blowup factor used during proving (must match) #[arg(long, default_value = "2")] - blowup: Option, + blowup: u8, /// Print verification time #[arg(long)] time: bool, + + /// Verify a continuation proof bundle (produced by `prove --continuations`) + #[arg(long)] + continuations: bool, }, /// Count main-trace and aux-trace field elements without proving @@ -196,13 +217,36 @@ fn main() -> ExitCode { time, cycles, elements, - } => cmd_prove(elf, output, private_input, blowup, time, cycles, elements), + continuations, + epoch_size_log2, + } => { + if continuations { + cmd_prove_continuation( + elf, + output, + private_input, + epoch_size_log2, + blowup, + time, + cycles, + ) + } else { + cmd_prove(elf, output, private_input, blowup, time, cycles, elements) + } + } Commands::Verify { proof, elf, blowup, time, - } => cmd_verify(proof, elf, blowup, time), + continuations, + } => { + if continuations { + cmd_verify_continuation(proof, elf, blowup, time) + } else { + cmd_verify(proof, elf, blowup, time) + } + } Commands::CountElements { elf, private_input } => cmd_count_elements(elf, private_input), } } @@ -217,6 +261,17 @@ fn read_private_input(path: Option<&PathBuf>) -> Result, String> { } } +fn count_cycles(elf_data: &[u8], private_inputs: &[u8]) -> Result { + let program = + Elf::load(elf_data).map_err(|e| format!("Failed to load ELF for cycle count: {e:?}"))?; + let executor = Executor::new(&program, private_inputs.to_vec()) + .map_err(|e| format!("Failed to create executor for cycle count: {e:?}"))?; + executor + .run() + .map(|result| result.logs.len() as u64) + .map_err(|e| format!("Execution failed during cycle count: {e:?}")) +} + fn cmd_execute( elf_path: PathBuf, private_input_path: Option, @@ -324,7 +379,7 @@ fn cmd_prove( elf_path: PathBuf, output_path: PathBuf, private_input_path: Option, - blowup: Option, + blowup: u8, time: bool, cycles: bool, elements: bool, @@ -350,24 +405,10 @@ fn cmd_prove( // Mirrors SP1's cycle-count pass so both provers report the same kind of // number without inflating the measured proving time. let cycle_count = if cycles { - let program = match Elf::load(&elf_data) { - Ok(p) => p, + match count_cycles(&elf_data, &private_inputs) { + Ok(count) => Some(count), Err(e) => { - eprintln!("Failed to load ELF for cycle count: {:?}", e); - return ExitCode::FAILURE; - } - }; - let executor = match Executor::new(&program, private_inputs.clone()) { - Ok(e) => e, - Err(e) => { - eprintln!("Failed to create executor for cycle count: {:?}", e); - return ExitCode::FAILURE; - } - }; - match executor.run() { - Ok(result) => Some(result.logs.len() as u64), - Err(e) => { - eprintln!("Execution failed during cycle count: {:?}", e); + eprintln!("{e}"); return ExitCode::FAILURE; } } @@ -398,31 +439,23 @@ fn cmd_prove( }); let start = Instant::now(); - let proof = match blowup { - Some(b) => { - let opts = match GoldilocksCubicProofOptions::with_blowup(b) { - Ok(opts) => opts, - Err(e) => { - eprintln!("Invalid proof options: {e}"); - return ExitCode::FAILURE; - } - }; - eprintln!( - "Generating proof (blowup={b}, queries={})...", - opts.fri_number_of_queries - ); - prover::prove_with_options_and_inputs( - &elf_data, - &private_inputs, - &opts, - &Default::default(), - ) - } - None => { - eprintln!("Generating proof..."); - prover::prove_with_inputs(&elf_data, &private_inputs) + let opts = match GoldilocksCubicProofOptions::with_blowup(blowup) { + Ok(opts) => opts, + Err(e) => { + eprintln!("Invalid proof options: {e}"); + return ExitCode::FAILURE; } }; + eprintln!( + "Generating proof (blowup={blowup}, queries={})...", + opts.fri_number_of_queries + ); + let proof = prover::prove_with_options_and_inputs( + &elf_data, + &private_inputs, + &opts, + &Default::default(), + ); let prove_elapsed = start.elapsed(); let proof = match proof { Ok(proof) => proof, @@ -474,7 +507,7 @@ fn cmd_prove( ExitCode::SUCCESS } -fn cmd_verify(proof_path: PathBuf, elf_path: PathBuf, blowup: Option, time: bool) -> ExitCode { +fn cmd_verify(proof_path: PathBuf, elf_path: PathBuf, blowup: u8, time: bool) -> ExitCode { eprintln!("Reading ELF file..."); let elf_data = match std::fs::read(&elf_path) { Ok(data) => data, @@ -503,19 +536,14 @@ fn cmd_verify(proof_path: PathBuf, elf_path: PathBuf, blowup: Option, time: eprintln!("Verifying proof..."); let start = Instant::now(); - let result = match blowup { - Some(b) => { - let opts = match GoldilocksCubicProofOptions::with_blowup(b) { - Ok(opts) => opts, - Err(e) => { - eprintln!("Invalid proof options: {e}"); - return ExitCode::FAILURE; - } - }; - prover::verify_with_options(&proof, &elf_data, &opts, None, None) + let opts = match GoldilocksCubicProofOptions::with_blowup(blowup) { + Ok(opts) => opts, + Err(e) => { + eprintln!("Invalid proof options: {e}"); + return ExitCode::FAILURE; } - None => prover::verify(&proof, &elf_data), }; + let result = prover::verify_with_options(&proof, &elf_data, &opts, None, None); let verify_elapsed = start.elapsed(); let result = match result { Ok(valid) => valid, @@ -532,11 +560,181 @@ fn cmd_verify(proof_path: PathBuf, elf_path: PathBuf, blowup: Option, time: } ExitCode::SUCCESS } else { - eprintln!("Verification failed!"); + eprintln!("Verification failed! Ensure --blowup matches the value used for proving."); ExitCode::FAILURE } } +fn cmd_prove_continuation( + elf_path: PathBuf, + output_path: PathBuf, + private_input_path: Option, + epoch_size_log2: Option, + blowup: u8, + time: bool, + cycles: bool, +) -> ExitCode { + eprintln!("Reading ELF file..."); + let elf_data = match std::fs::read(&elf_path) { + Ok(data) => data, + Err(e) => { + eprintln!("Failed to read ELF file: {}", e); + return ExitCode::FAILURE; + } + }; + + let private_inputs = match read_private_input(private_input_path.as_ref()) { + Ok(inputs) => inputs, + Err(e) => { + eprintln!("{e}"); + return ExitCode::FAILURE; + } + }; + + let cycle_count = if cycles { + match count_cycles(&elf_data, &private_inputs) { + Ok(count) => Some(count), + Err(e) => { + eprintln!("{e}"); + return ExitCode::FAILURE; + } + } + } else { + None + }; + + let epoch_size_log2 = epoch_size_log2.unwrap_or(DEFAULT_CONTINUATION_EPOCH_SIZE_LOG2); + let epoch_size = match continuation_epoch_size(epoch_size_log2) { + Ok(size) => size, + Err(e) => { + eprintln!("{e}"); + return ExitCode::FAILURE; + } + }; + + let opts = match GoldilocksCubicProofOptions::with_blowup(blowup) { + Ok(opts) => opts, + Err(e) => { + eprintln!("Invalid proof options: {e}"); + return ExitCode::FAILURE; + } + }; + + eprintln!( + "Generating continuation proof (blowup={blowup}, epoch_size_log2={epoch_size_log2}, epoch_size={epoch_size})...", + ); + let start = Instant::now(); + let bundle = match prover::continuation::prove_continuation( + &elf_data, + &private_inputs, + epoch_size_log2, + &opts, + ) { + Ok(b) => b, + Err(e) => { + eprintln!("Continuation proof generation failed: {}", e); + return ExitCode::FAILURE; + } + }; + let prove_elapsed = start.elapsed(); + + eprintln!("Writing proof..."); + let file = match File::create(&output_path) { + Ok(f) => f, + Err(e) => { + eprintln!("Failed to create output file: {}", e); + return ExitCode::FAILURE; + } + }; + let mut writer = BufWriter::new(file); + let bytes = match bincode::serialize(&bundle) { + Ok(b) => b, + Err(e) => { + eprintln!("Failed to serialize proof: {}", e); + return ExitCode::FAILURE; + } + }; + if let Err(e) = writer.write_all(&bytes) { + eprintln!("Failed to write proof: {}", e); + return ExitCode::FAILURE; + } + + eprintln!("Proof written to {:?}", output_path); + if let Some(c) = cycle_count { + println!("Cycles: {}", c); + } + println!("Epochs: {}", bundle.num_epochs()); + if time { + println!("Proving time: {:.3}s", prove_elapsed.as_secs_f64()); + } + ExitCode::SUCCESS +} + +fn cmd_verify_continuation( + proof_path: PathBuf, + elf_path: PathBuf, + blowup: u8, + time: bool, +) -> ExitCode { + eprintln!("Reading ELF file..."); + let elf_data = match std::fs::read(&elf_path) { + Ok(data) => data, + Err(e) => { + eprintln!("Failed to read ELF file: {}", e); + return ExitCode::FAILURE; + } + }; + + eprintln!("Reading proof..."); + let proof_bytes = match std::fs::read(&proof_path) { + Ok(b) => b, + Err(e) => { + eprintln!("Failed to read proof file: {}", e); + return ExitCode::FAILURE; + } + }; + let bundle: prover::continuation::ContinuationProof = match bincode::deserialize(&proof_bytes) { + Ok(p) => p, + Err(e) => { + eprintln!("Failed to deserialize proof: {}", e); + return ExitCode::FAILURE; + } + }; + + let opts = match GoldilocksCubicProofOptions::with_blowup(blowup) { + Ok(opts) => opts, + Err(e) => { + eprintln!("Invalid proof options: {e}"); + return ExitCode::FAILURE; + } + }; + + eprintln!("Verifying continuation proof..."); + let start = Instant::now(); + let result = prover::continuation::verify_continuation(&elf_data, &bundle, &opts); + let verify_elapsed = start.elapsed(); + + match result { + Ok(Some(output)) => { + eprintln!("Verification succeeded!"); + let hex: String = output.iter().map(|b| format!("{:02x}", b)).collect(); + println!("Output: {}", hex); + if time { + println!("Verification time: {:.3}s", verify_elapsed.as_secs_f64()); + } + ExitCode::SUCCESS + } + Ok(None) => { + eprintln!("Verification failed! Ensure --blowup matches the value used for proving."); + ExitCode::FAILURE + } + Err(e) => { + eprintln!("Verification error: {}", e); + ExitCode::FAILURE + } + } +} + fn cmd_count_elements(elf_path: PathBuf, private_input_path: Option) -> ExitCode { let elf_data = match std::fs::read(&elf_path) { Ok(data) => data, @@ -566,3 +764,160 @@ fn cmd_count_elements(elf_path: PathBuf, private_input_path: Option) -> } } } + +fn continuation_epoch_size(epoch_size_log2: u32) -> Result { + if epoch_size_log2 < MIN_CONTINUATION_EPOCH_SIZE_LOG2 { + return Err(format!( + "--epoch-size-log2 must be at least {MIN_CONTINUATION_EPOCH_SIZE_LOG2} for CLI proving" + )); + } + 1usize.checked_shl(epoch_size_log2).ok_or_else(|| { + format!("--epoch-size-log2 {epoch_size_log2} is too large for this platform") + }) +} + +fn parse_epoch_size_log2(value: &str) -> Result { + let epoch_size_log2 = value + .parse::() + .map_err(|_| format!("--epoch-size-log2 must be an integer, got `{value}`"))?; + continuation_epoch_size(epoch_size_log2)?; + Ok(epoch_size_log2) +} + +#[cfg(test)] +mod tests { + use super::*; + use clap::CommandFactory; + + // The arg graph is well-formed (e.g. `requires`/`conflicts_with` reference real args). + #[test] + fn cli_command_is_valid() { + Cli::command().debug_assert(); + } + + // The continuation epoch flag requires --continuations. + #[test] + fn epoch_size_log2_requires_continuations() { + let r = Cli::command().try_get_matches_from([ + "cli", + "prove", + "prog.elf", + "-o", + "out", + "--epoch-size-log2", + "20", + ]); + assert!(r.is_err()); + } + + #[test] + fn epoch_size_log2_accepts_continuations() { + let r = Cli::command().try_get_matches_from([ + "cli", + "prove", + "prog.elf", + "-o", + "out", + "--continuations", + "--epoch-size-log2", + "20", + ]); + assert!(r.is_ok()); + } + + #[test] + fn cycles_accepts_continuations() { + let r = Cli::command().try_get_matches_from([ + "cli", + "prove", + "prog.elf", + "-o", + "out", + "--continuations", + "--cycles", + ]); + assert!(r.is_ok()); + } + + #[test] + fn elements_conflicts_with_continuations() { + let r = Cli::command().try_get_matches_from([ + "cli", + "prove", + "prog.elf", + "-o", + "out", + "--continuations", + "--elements", + ]); + assert!(r.is_err()); + } + + #[test] + fn epoch_size_log2_rejects_tiny_cli_values() { + let r = Cli::command().try_get_matches_from([ + "cli", + "prove", + "prog.elf", + "-o", + "out", + "--continuations", + "--epoch-size-log2", + "17", + ]); + assert!(r.is_err()); + } + + #[test] + fn old_epoch_size_flag_is_rejected() { + let r = Cli::command().try_get_matches_from([ + "cli", + "prove", + "prog.elf", + "-o", + "out", + "--continuations", + "--epoch-size", + "1048576", + ]); + assert!(r.is_err()); + } + + #[test] + fn old_num_epochs_flag_is_rejected() { + let r = Cli::command().try_get_matches_from([ + "cli", + "prove", + "prog.elf", + "-o", + "out", + "--continuations", + "--num-epochs", + "4", + ]); + assert!(r.is_err()); + } + + #[test] + fn prove_help_omits_removed_epoch_flags() { + let mut cmd = Cli::command(); + let prove = cmd.find_subcommand_mut("prove").unwrap(); + let mut help = Vec::new(); + prove.write_long_help(&mut help).unwrap(); + let help = String::from_utf8(help).unwrap(); + + assert!(help.contains("--epoch-size-log2 ")); + assert!(!help.contains("--num-epochs")); + assert!(!help.contains("--epoch-size <")); + } + + #[test] + fn continuation_epoch_size_rejects_tiny_cli_values() { + assert!(continuation_epoch_size(17).is_err()); + } + + #[test] + fn continuation_epoch_size_uses_exact_power_of_two() { + assert_eq!(continuation_epoch_size(20).unwrap(), 1 << 20); + } +} diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index e8f27b631..8ba066462 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -17,6 +17,7 @@ - [Provable security and conjectured security](./cryptography/security.md) - [Lookup argument](./cryptography/lookup.md) - [Virtual machine](./virtual_machine/introduction.md) +- [Continuations design](./continuations_design.md) ## Getting started diff --git a/docs/continuations_design.md b/docs/continuations_design.md new file mode 100644 index 000000000..9c3f54747 --- /dev/null +++ b/docs/continuations_design.md @@ -0,0 +1,564 @@ +# Continuations (Approach 2) — design + +This is the single design document for the "continuations" prover (Approach 2, +"prove-epoch" from the streaming spec). It covers the things a continuation must +carry across epoch boundaries — **memory** (the bulk of the doc: §1–§5, including +the cross-epoch local-to-global table and the Design X vs Design Y decision), +**registers** including the commit index x254 (§6), and the **Fiat-Shamir statement +binding** (§7) that ties each epoch proof to its program and position — plus the +soundness mechanisms that make each safe. §8 describes the **standalone (split) +prover/verifier** that checks a proof bundle with only the ELF. + +It is written to be read by a human picking this up cold. + +--- + +## 1. Why continuations + +A monolithic proof builds the trace for the **whole** execution in memory at +once; for large programs that exhausts RAM. Continuations split the execution +into fixed-size **epochs** and prove each independently, so peak memory stays +flat as program size grows. + +Almost every constraint in a proof is local to its slice of cycles — *except +memory*. A load in a late epoch may read what an early epoch wrote. So the only +thing that must be stitched across epoch boundaries is **memory consistency**. + +``` + one execution (e.g. 4,000,000 cycles) + ┌───────────────────────────────────────────────┐ + │ split into epochs of N cycles │ + └───────────────────────────────────────────────┘ + │ │ │ │ + ▼ ▼ ▼ ▼ + ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ + │ Epoch 0 │ │ Epoch 1 │ │ Epoch 2 │ │ Epoch 3 │ each proven on its own + │ CPU MEMW│ │ CPU MEMW│ │ CPU MEMW│ │ CPU MEMW│ (tables dropped from RAM + │ ... L2G │ │ ... L2G │ │ ... L2G │ │ ... L2G │ after each epoch) + └────┬────┘ └────┬────┘ └────┬────┘ └────┬────┘ + └────────────┴─────┬──────┴────────────┘ + ▼ + ┌────────────────────────┐ + │ ONE global proof │ links the epochs together + │ (cross-epoch memory) │ + └────────────────────────┘ +``` + +--- + +## 2. The pieces + +A **bus** is a LogUp channel: tables *send* and *receive* tokens, and the proof +checks that everything sent is received (the bus "balances"). An unmatched token +makes the proof fail. + +- **MEMW** — the actual loads/stores, driven by the CPU executing the program. +- **L2G** (local-to-global) — one row per memory cell an epoch *touches*. Two roles: + - inside an epoch, on the **Memory bus**, it is the *bookend* — it supplies a + cell's starting value (seed at timestamp 0) and collects its ending value. + It **replaces the PAGE table**, which is switched off inside continuation + epochs. + - across epochs, on the **GlobalMemory bus**, it carries each cell's + "where did this value come from / where is it going" claims. +- **global_memory** — the *anchors* on the GlobalMemory bus: + - **genesis**: a cell's starting value, read from the **ELF** (preprocessed, + so the verifier recomputes it — the prover cannot choose initial memory). + - **finalization**: a cell's final value after the last epoch that touched it. + +### A single L2G row + +``` + ┌──────────┬───────────────────────────┬───────────────────────────┐ + │ address │ init: value, epoch │ fini: value, time │ + └──────────┴───────────────────────────┴───────────────────────────┘ + which what it was when this what it is at this + cell epoch first saw it, and epoch's end (its last + which epoch wrote it access timestamp) +``` + +Column layout (9 columns): `address_lo/hi` (32-bit), `init_value` (byte), +`init_epoch` (two 16-bit halfwords), `fini_value` (byte), +`fini_timestamp_lo/hi` (32-bit), `MU` (selector). + +Note: **`fini_epoch` is NOT a column** — it is supplied as a per-table constant +(see §4.2). + +Note: there is **no `init_timestamp`**. Timestamps are epoch-local (each epoch's +clock restarts; the Memory-bus seed is `ts = 0`) and order accesses only *within* +an epoch. The cross-epoch chain is ordered by the **epoch number** (§3.3), so the +GlobalMemory bus carries no timestamp at all (see §2 telescoping). `fini_timestamp` +stays only because the epoch-local **Memory bus** needs it (matched against MEMW). + +### Cross-epoch telescoping + +For a cell touched in epochs 1, 2, 3, the GlobalMemory bus checks: + +``` + global_memory L2G(ep1) L2G(ep2) L2G(ep3) global_memory + GENESIS ───────► init + (value v0, fini ───────► init + from ELF) fini ───────► init + fini ───────► FINAL + (last value) + + each "fini ───► init" is one matched token: + epoch i's fini == epoch (i+1)'s init (same address, value, epoch — no timestamp) +``` + +The bus balances **iff** every `fini` is consumed by the next-touching epoch's +`init`, anchored by GENESIS (the one source) and FINAL (the one sink). That +chain *is* "memory stayed consistent across epochs." Inside each epoch, ordinary +memory checking (MEMW + timestamp ordering) handles consistency; L2G only +provides the seam at the edges. + +--- + +## 3. Soundness, by component + +The skeleton above is correct but not *sound* on its own — a cheating prover +could make the buses balance while lying. Four mechanisms close the gaps. + +### 3.1 Range checks on the L2G columns + +Raw field columns must be forced into their intended ranges, or a prover can +stuff out-of-range junk into them. + +Principle: **only check what nothing else already checks.** + +- `address`, `fini_timestamp`, the value bytes — these travel on the Memory bus + and are matched against **MEMW**, which already range-checks them (exactly how + PAGE relied on MEMW). No extra check. +- The **cross-epoch-only** field `init_epoch` has no MEMW partner, so L2G checks it + itself: store as 16-bit halfwords, check each with the `IsHalfword` lookup, and + rebuild the value as `lo + 2^16·hi`. Because only the range-checked halfwords feed + the reconstruction, no extra AIR constraint is needed. (There is no + `init_timestamp` to check — the GlobalMemory bus carries no timestamp; see §2.) + +The value bytes get PAGE's batched `AreBytes` check (the `init` value is a +trusted source and must be checked). + +### 3.2 `fini_epoch` as a per-table constant + +Inside epoch *i*'s table, **every** row's `fini_epoch` is just *i*. So it does +not need to be a per-row committed column — it is supplied to the AIR as a +constant `epoch_label`, computed by the verifier from the epoch's position. + +This is *strictly more sound* than a column: the prover cannot choose it. The +genesis sentinel is `0` and real epochs are labelled `1, 2, 3, …` +(`epoch_label(i) = i + 1`), so genesis is below every real epoch. + +### 3.3 Cross-epoch ordering (the subtle one) + +The GlobalMemory bus only proves the tokens **match as a set** — not that they +are chained in increasing-epoch order. Without that, a cheater can make a row's +`init` and `fini` cancel each other (point `init` at its own epoch), so the row +**vanishes** from the chain — letting an epoch read a *forged* value for a cell +while a later epoch absorbs that cell's real genesis. The bus balances; the +program ran on a lie. + +Fix: force every row to reference a strictly earlier source — +`init_epoch < fini_epoch`. With genesis `= 0` and 1-based epochs, genesis (`0`) +satisfies it with no special case. + +How `a < b` is checked without a dedicated comparison table (the same trick +MEMW uses for timestamps): in the field, `a < b` ⟺ `b − 1 − a` is a small, +in-range number. If `a ≥ b`, that subtraction wraps to a huge field element that +fails the range check. So we range-check `fini_epoch − 1 − init_epoch` with the +`IsB20` (20-bit) lookup — reusing the bit-table already present, near-zero cost. + +``` + honest: init=2, fini=5 → 5-1-2 = 2 small ✓ passes + cheat: init=5, fini=5 → 5-1-5 = -1 wraps ✗ fails (self-reference) + cheat: init=9, fini=5 → 5-1-9 = -5 wraps ✗ fails (future reference) +``` + +Strict `<` (not `≤`) is required: `≤` would permit `init_epoch == fini_epoch`, +which is exactly the self-cancel that enables the forgery. Strict `<` guarantees +a real row's `init` and `fini` epochs always differ, so a real row can never +self-cancel. + +Cost: this bounds the **number** of epochs to `< 2^20` (~1M) — *not* their size. +Unreachable in practice (optimal epochs are millions of cycles → thousands of +epochs even for a billion-cycle run) and fails closed. If ever needed, widen the +gap check to 32-bit or switch to the LT table. + +### 3.4 The `MU` selector + +Traces are padded with blank rows to a power of two (an FFT requirement). Those +padding rows must not disturb any bus. + +Originally padding was harmless because a blank row's `init` and `fini` tokens +were identical and self-cancelled. **Two** of the changes above broke that, each +on its own: + +- §3.2 (constant `fini_epoch`): a padding row's `fini` now carries + `epoch = the constant` while its `init` carries `epoch = 0`, so the tokens + differ and no longer cancel. +- §3.3 (the ordering check): a padding row has `init_epoch == fini_epoch` (both + `0`), which fails the strict `<` check. + +So `MU` is required by *either* change. + +Fix: a selector column `MU` (1 on real rows, 0 on padding). Interactions gated by +`Multiplicity::Column(MU)` contribute nothing on padding rows. + +`MU` is itself constrained boolean (`MU·(1−MU)=0`), and pinned to the right +rows by bus balance (a real row with `MU=0` drops its telescoping link → +imbalance). + +### 3.5 CPU padding and the power-of-two epoch size + +The CPU table is padded to a power of two (the same FFT requirement). After the +inline-PC rework, padding rows are **not** inert: each carries `pc = 1` and +reads/writes it on the inline-PC `memory` chain, and that chain is anchored only by +the HALT chip's `consume_pc`/`emit_pc` — which converts the last real `next_pc` +into the `pc = 1` sentinel the padding rows expect. + +An **intermediate** continuation epoch excludes HALT (only the *final* epoch +halts). So if an intermediate epoch had padding rows, their `pc = 1` tokens would +dangle — no HALT to anchor them, and the REGISTER FINI carries the real next PC, +not `1` — and the Memory bus would not balance. The honest prover could not produce +a verifying proof. + +Fix: **epoch size is expressed as `epoch_size_log2`**, so the driver slices at +exactly `2^epoch_size_log2` cycles. An intermediate epoch runs that exact +power-of-two number of cycles, so its CPU table already has a power-of-two row +count and therefore **zero padding rows** — nothing to dangle. The final epoch +keeps its remainder *and* its HALT, so its padding chain is anchored as usual. A +program shorter than one epoch runs as a single final (monolithic-style) epoch. + +This is a **completeness** fix: it changes no constraint and nothing the verifier +accepts — only how the driver slices cycles. A debug-assert enforces the +"intermediate epoch ⟹ power-of-two cycle count" invariant. + +--- + +## 4. Design X vs Design Y — *where* `MU` is applied + +`MU` is needed to neutralize padding, but **which** interactions should it gate? + +``` + GlobalMemory Memory range + + (telescoping) (bookend) ordering + Design X (SOUND): MU MU MU ← MU gates everything + Design Y (UNSOUND): MU One One ← MU only on GlobalMemory +``` + +**Conclusion up front: Design X is sound; Design Y is *not*.** We initially +believed Y was a cleaner equivalent (and two adversarial reviews agreed). They +were wrong — Y opens a chain-truncation attack. Below is X, then Y, then the +attack and why X blocks it. + +### Design X + +`MU` gates **every** L2G interaction (matches the standard table pattern — +LT/MUL/MEMW each gate all their interactions with one multiplicity column). + +The crucial consequence — which we first mistook for redundancy — is that gating +the **Memory bus bookend** with `MU` forces `MU = 1` on every *touched* cell: +a touched cell's MEMW accesses need the L2G seed/fini on the Memory bus (PAGE is +off), so `MU = 0` would dangle them → the epoch proof fails. This is **Statement +S** below. Forcing `MU = 1` on every touched cell forces every touching epoch +**into the global chain** — so the chain is **complete**, and cannot be truncated. + +### Design Y (rejected — unsound) + +`MU` gates **only the GlobalMemory bus**; the Memory bus and range/ordering checks +use `Multiplicity::One`. The intended win was that the ordering check then fires +unconditionally so `MU` can't skip it. But decoupling the Memory bookend from `MU` +**broke Statement S**: a touched cell's bookend now fires regardless of `MU` +(`Multiplicity::One`), so the epoch proof passes even with `MU = 0`. Nothing then +forces `MU = 1` on a *non-first-touch* row — and that is exploitable. + +### The attack Design Y allows: orphan a touched epoch + +Cell A, touched by epochs e1 then e2. Honest: genesis `v0` → e1 writes `f1` → +e2 writes `f2` → final `f2`. A cheating prover sets **`MU = 0` on e2's L2G row** +and sets `global_memory`'s finalization for A to `f1`: + +``` + genesis(v0) ──► e1.init ✓ (genesis must be consumed — forces e1 only) + e1.fini(f1) ──► FINAL(f1) ✓ (prover-chosen finalization absorbs it) + e2.init / e2.fini ✗ MU=0 — orphaned, don't fire +``` + +- The GlobalMemory bus **balances** (every fired token matched). +- e2's **epoch proof still passes** — in Design Y its Memory bookend is + `Multiplicity::One`, so it fires regardless of `MU`; e2 ran internally-consistently. +- **Nothing forces `MU_e2 = 1`:** e2 isn't first-touch (genesis went to e1), and + the finalization is a *prover column*, so it just absorbs whatever the last fired + fini was. + +Result: e2's write to A is silently dropped — A's final value is claimed `f1` +when it's really `f2`. A false statement, proven. (For a *middle* epoch, reroute +the later init to consume the earlier fini, skipping the middle one.) + +The root cause is the **input/output asymmetry** of the anchors: genesis is the +*input* and is ELF-bound (fixed), but the finalization is the *output* — a prover +column. The finalization is only trustworthy if the chain is **complete** so that +the last fini is *forced* to be consumed by it. A complete chain pins the +finalization; a truncatable chain leaves it free. Design X forces completeness +(via `MU=1` on every touched cell); Design Y does not. + +### Statement S (why Design X is sound, and what Y broke) + +> In a continuation epoch, the only table that provides a RAM cell's seed (its +> value at timestamp 0) on the Memory bus is L2G (PAGE is off). If a cell is +> accessed by MEMW during the epoch, the memory argument requires that seed; with +> `MU = 0` the seed is absent and the Memory bus cannot balance. Therefore any +> accessed cell is forced to `MU = 1`. + +S rests on three checkable facts: (1) PAGE is off in continuation epochs; +(2) MEMW enforces timestamp ordering, so a cell's access chain must bottom out at +the seed; (3) no other table provides a RAM seed (REGISTER is registers only, a +disjoint token subspace). + +**S requires the Memory bookend to be `MU`-gated** — that is exactly what Design X +has and Design Y removed. So the "redundant" `MU` on the Memory bus in Design X is +in fact load-bearing: it's what forces every touched epoch into the chain, making +the chain complete and the finalization trustworthy. + +### The anchoring chain (why a real access cannot be dropped at all) + +`MU = 1` being forced bottoms out at the program itself: + +``` + ELF ─DECODE(preprocessed)─► each row's instruction (LOAD/STORE flags) is fixed + PC-continuity ───────────► every executed instruction is present, in order + │ + ▼ a real load/store row has its flag = 1 (DECODE match + IsBit) ⟹ CPU sends Memw req + ▼ MEMW must receive it (MU_READ/MU_WRITE) — dropping it ⟹ Memw-bus imbalance + ▼ MEMW's bookend pairing needs the L2G seed/fini — in Design X (MU-gated) ⟹ MU=1 + ▼ MU=1 ⟹ the cell is in the global chain ⟹ chain complete ⟹ finalization pinned +``` + +This is the VM's core execution soundness (DECODE + PC-continuity + IsBit flags, +verified in `cpu.rs` / `constraints/cpu.rs`), extended one link at a time up to +cross-epoch memory. Design X keeps every link; Design Y cut the MEMW→L2G link. + +### How `global_memory`'s finalization is constrained — and the parallel with `main` + +The finalization is **not** checked against an external value (it's the computed +output, not a known input). It is pinned **internally** by the bus: it must consume +the last fini of each cell's chain, which (with a complete chain) is the cell's +real last-written value. This is exactly how **PAGE** works in the monolithic +prover — PAGE's `fini` is pinned by the (single, complete) Memory bus to the last +MEMW write. Design X is the faithful cross-epoch extension; Design Y silently +dropped the "chain is complete" property both rely on. + +--- + +## 5. Adversarial review summary + +1. **`MU` safety (Design X).** Could `MU=0` on a real row, or a non-boolean `MU`, + skip the ordering or forge a balance? No — caught by the Memory bus (Statement + S) and the boolean constraint. **Holds.** +2. **Design Y.** Two adversarial reviews concluded Y was sound (padding harmless, + ordering unconditional, "ghost row" attack defeated). **They were wrong.** Both + only tested *first-touch* `MU=0` (genesis dangles → caught) and added/forged + rows; neither tested **truncating the chain at a non-first-touch row** while + pointing the prover-controlled finalization at the truncation. That attack (§4) + makes Y unsound. Lesson: a review that misses an attack class proves nothing + about it — the truncation/orphan class was the gap. +3. **`fini_epoch` as a constant.** Sound — strictly more so than a column. Labels + are verifier-computed from epoch position (unforgeable); prove/verify use + identical labels (no off-by-one); the free `init_epoch` column and + `global_memory`'s `FINI_EPOCH` column are pinned by bus balance **when the chain + is complete** (Design X). Independent of the X/Y choice. + +--- + +## 6. Registers (cross-epoch) + +Registers must also carry across epochs: epoch *i+1* must start from epoch *i*'s +final register file. Unlike memory, the register file is **small and fixed** (34 +registers / 67 word-addresses, all present every epoch), so it needs no L2G / +global telescoping — we bind the whole snapshot directly. + +**Mechanism (no new bus).** The REGISTER table is the register analog of PAGE — it +already puts each register's init/fini tokens on the epoch-local Memory bus +(REG-C1 init, REG-C2 fini, matched against MEMW). For continuation epochs we +**also preprocess the FINI column** = the epoch's final register file `R_{i+1}` +(on top of the already-preprocessed INIT = `R_i`). "Preprocessed" means +*verifier-known*: the verifier recomputes the column's commitment, so the prover +cannot choose it. The verifier reuses the **same** `R_{i+1}` as epoch *i*'s FINI +and epoch *i+1*'s INIT, so `init(i+1) == fini(i)` **by construction** — no equality +check and no bus. Genesis is epoch 0's INIT = the ELF entry-point registers +(verifier-derived). + +``` + epoch i REGISTER epoch i+1 REGISTER + INIT = R_i (pre) INIT = R_{i+1} (pre) ← same R_{i+1} + FINI = R_{i+1} (pre) ────────┘ reused both sides +``` + +### Register soundness (two locks) + +For `R_{i+1}` to be the *real* final registers (not a free prover claim), two +locks compose: + +1. **Preprocessing** pins the trace's FINI column = the public `R_{i+1}` (the + verifier recomputes the commitment; the proof's FINI openings must authenticate + against it, so the prover can't deviate). +2. **REG-C2 on the Memory bus** pins that FINI column = MEMW's true last write to + each register (or the Memory bus doesn't balance). + +Compose them: public `R_{i+1}` = trace FINI = real last write. So the value handed +to the next epoch is pinned to real execution. + +The **monolithic prover is unchanged**: it keeps FINI as a main-trace column (it +has no verifier-known final state) and preprocesses 2 columns, not 3. + +### Commit index (x254) + +The COMMIT chip's running output index lives in a synthetic single-word register +**x254** (word-address 508), so it rides the **same** register binding above — +epoch *i*'s `FINI[x254]` becomes epoch *i+1*'s `INIT[x254]`, pinned by the two +locks like any register. Each epoch therefore indexes its committed bytes from the +*carried* value, not from `0`: + +- the COMMIT trace seeds `current_commit_index` from x254 + (`register_state.read_index()` in `trace_builder.rs`), with a debug-assert + pinning the two in sync every step; +- the verifier's commit-bus offset (`compute_commit_bus_offset`'s `start_index`) + starts at the same carried x254. + +The driver concatenates each epoch's committed slice into the run-wide output. +Because every slice is commit-bus-bound *and* the x254 indices are forced +contiguous (`init(i+1) == fini(i)`), the concatenation equals the true output +stream — no separate global "commit output" bus is needed. + +--- + +## 7. Fiat-Shamir statement binding + +Each epoch proof and the global proof seed their Fiat-Shamir transcript with a +**statement** before the challenges are drawn (they previously started empty). The +seeding only *adds* input to the transcript, so it can strengthen binding but never +weaken soundness — and it pins every proof to its program and position, so a proof +can't be replayed elsewhere: + +- Each **epoch** absorbs: a domain tag, the ELF digest, the public output, the + table layout, and the **epoch label** (its position). +- The **global** proof absorbs: a (distinct) domain tag, the ELF digest, and the + **epoch count**. + +The monolithic encoding is unchanged (same function, monolithic tag, no label). +The genesis / register / memory anchor values are *additionally* bound via the +preprocessed commitments absorbed during proving. + +The standalone *split* verifier (§8) carries these statement fields in the proof +bundle and takes the epoch label / count from its own trusted enumeration, so the +binding holds there too — not just on the integrated path. + +--- + +## 8. Standalone (split) prover/verifier + +The continuation can be proved and verified by separate parties. `prove_continuation` +emits a self-contained `ContinuationProof` bundle; `verify_continuation(elf, &bundle)` +checks it using **only the bundle and the ELF** — nothing from the prover's memory. +The integrated `prove_and_verify_continuation` is now a thin wrapper +(`prove_continuation` then `verify_continuation`), and `prove_verify_epoch` is +likewise split into `prove_epoch` + `verify_epoch`. + +The bundle is prover-supplied and therefore **untrusted**. Per epoch it carries the +`MultiProof`, the `public_output` slice, `table_counts`, +`num_private_input_pages`, `runtime_page_ranges`, the bound `reg_fini` (`R_{i+1}`), +the epoch `l2g_root`, and the touched-cell `boundary`; plus the global `MultiProof` +and the `private_inputs`. Everything the integrated path reused from prover memory +becomes an **explicit verifier action**: + +- **Enumerate, don't trust.** The verifier assigns each epoch's `label` and the + `is_final` flag **by position** (`0..N-1`; the last is final), so the prover can't + relabel, reorder, truncate, or append epochs — a wrong label diverges that epoch's + Fiat-Shamir challenges, and a wrong `is_final` builds the HALT table in/out and + mismatches the committed proof. +- **Derive the register / x254 chain.** Epoch 0's register INIT is derived from the + ELF entry point; epoch *i+1*'s INIT is derived from epoch *i*'s bundle `reg_fini` + (incl. x254 @ 508). So `init(i+1) == fini(i)` is now *enforced by the verifier + rebuilding the AIR from the previous FINI* (via the shared `build_epoch_airs`), + not merely true-by-construction. The commit-bus `start_index` is taken from the + carried `register_init[508]`, not a free scalar. +- **Genesis from the ELF.** `verify_global` rebuilds the memory genesis from the ELF + (+ bundle private inputs) and closes the GlobalMemory bus; + `verify_l2g_commitment_binding` ties each epoch's `l2g_root` to the corresponding + global-proof sub-table root — which is what makes the prover-supplied `boundary` + trustworthy. +- **Reconstruct the output** by concatenating the per-epoch commit slices (each + commit-bus-bound, contiguous via the x254 chain). +- The verifier also `validate()`s `table_counts` and never trusts a prover-supplied + page config (continuation epochs have none — PAGE is skipped under the L2G + bookend, so `page_configs` is always empty). + +A single `build_epoch_airs` helper builds the AIR set identically on both sides, so +prove and verify cannot diverge. + +**Reviewed.** An adversarial "construct-a-break" audit (Phase-3 dismissal audit with +fresh agents) of the register/x254 chain, the L2G root binding, and +completeness-by-enumeration found no false-accept: each forgery is caught by a +Merkle/hash collision, a bus imbalance, or a Fiat-Shamir divergence. + +The bundle derives serde and round-trips through `bincode` (exactly like a +monolithic `VmProof`); the CLI drives it via `prove --continuations` (writes the +bundle) and `verify --continuations` (checks bundle + ELF only). `prove` picks the +epoch size from `--epoch-size-log2 N` (`N=20` means 1,048,576 cycles), defaulting +to `20`. A local ethrex 10-transfer distinct-account +sweep measured peak heap at roughly 6.9 GB (`19`), 9.5 GB (`20`), 15.8 GB (`21`), +and 26.8 GB (`22`); pick the highest value the workload and machine can run +without swapping. + +**Limitation — not succinct.** The bundle carries, and the verifier checks, all *N* +epoch proofs plus the global proof. Continuations keep peak *prover* memory flat; +they do **not** shrink proof size or verify time. A single succinct proof needs a +recursion/aggregation layer (deferred). + +--- + +## 9. Status and open items + +- Implemented and tested: range checks (§3.1), `fini_epoch` constant (§3.2), + ordering check (§3.3), the `MU` selector (§3.4), the **power-of-two epoch size** + (§3.5), **cross-epoch registers** (§6), the **commit index x254** across epochs + (§6), the **Fiat-Shamir statement binding** (§7), and the **standalone split + prover/verifier** (§8) — bundle serialized with `bincode` and driven from the CLI + (`prove`/`verify --continuations`). +- **The committed code implements Design X** (`MU` gates every L2G interaction), + which is the sound design. Design Y was implemented briefly, then found unsound + (§4, the chain-truncation attack) and **reverted**. Do not re-introduce the + Design Y wiring: gating only the GlobalMemory bus reopens the orphan attack. +- Deferred: + - **Succinctness.** The split verifier is non-succinct (N+1 proofs, §8). A single + small proof needs a recursion/aggregation layer — a separate, larger effort. + - **Private-input binding.** The genesis image depends on `private_inputs`, which + the bundle carries in the clear; binding them into the statement (so "which input + produced this output" is pinned) is a follow-up that also touches the monolithic + proof. + +--- + +## 10. Where the code lives + +- `prover/src/tables/local_to_global.rs` — L2G columns, trace generation, the + Memory/GlobalMemory bus interactions, range checks, the ordering lookup, and + the per-row selector. +- `prover/src/tables/global_memory.rs` — the genesis (ELF-bound) and + finalization anchors. +- `prover/src/tables/register.rs` — the REGISTER table: REG-C1/REG-C2 Memory-bus + tokens, the preprocessed FINI commitment (`compute_precomputed_commitment_with_fini`, + `NUM_PREPROCESSED_COLS_WITH_FINI`), and `fini_from_trace`. +- `prover/src/statement.rs` — the Fiat-Shamir statement absorbers + (`absorb_statement` with `StatementKind`, `absorb_continuation_global_statement`). +- `prover/src/continuation.rs` — the split prover/verifier: `prove_continuation` / + `verify_continuation` and the `ContinuationProof` bundle; the per-epoch + `prove_epoch` / `verify_epoch` with the shared `build_epoch_airs` helper; the + global proof (`prove_global` / `verify_global`); the per-epoch AIRs + (`l2g_memory_air` / `l2g_global_air`); the power-of-two epoch sizing from + `epoch_size_log2`; the register-FINI preprocessing; the transcript seeding; and + `prove_and_verify_continuation` (the thin integrated wrapper). +- `prover/src/lib.rs` — `verify_l2g_commitment_binding` (epoch L2G root ↔ global + sub-table root) and the commit-bus offset/balance helpers + (`compute_commit_bus_offset`, `compute_expected_commit_bus_balance`) that take the + carried x254 as `start_index`. +- `prover/src/tables/trace_builder.rs` — seeds `current_commit_index` from x254 + (`read_index`) so committed-byte indexing carries across epochs. diff --git a/executor/programs/asm/array_multipass_20M.s b/executor/programs/asm/array_multipass_20M.s new file mode 100644 index 000000000..9d5fab40a --- /dev/null +++ b/executor/programs/asm/array_multipass_20M.s @@ -0,0 +1,36 @@ + .attribute 5, "rv64i2p1" + .globl main +main: + # Multi-pass array: P passes over an N-word array, each element + # load+add+store. Touches a LARGE distinct RAM footprint (N words) + # and REUSES it every pass (so each cell is touched in multiple + # epochs) -> worst-case stress for the local-to-global table. + # + # Footprint = N words = 4*N bytes (here 262144 words = 1 MiB). + # Steps ~= P * N * 6 (here 13 * 262144 * 6 ~= 20.4M). + # + # Tuning knobs: + # t5 init (N) -> distinct footprint (bytes = 4*N) + # t6 init (P) -> number of passes (cross-epoch reuse) + # keep P*N*6 ~= target step count. + + li t3, 1 # increment k + li t6, 13 # P = passes + li t0, 0x40000000 # BASE = array address (free RAM) + +.outer: + mv t1, t0 # ptr = BASE + li t5, 262144 # N = words per pass +.inner: + lw t4, 0(t1) # t4 = a[i] + add t4, t4, t3 # a[i] += k + sw t4, 0(t1) # a[i] = t4 + addi t1, t1, 4 # ptr += 4 + addi t5, t5, -1 # i-- + bnez t5, .inner + addi t6, t6, -1 # pass-- + bnez t6, .outer + + li a0, 0 + li a7, 93 + ecall diff --git a/executor/programs/asm/test_commit_split.s b/executor/programs/asm/test_commit_split.s new file mode 100644 index 000000000..1b8dab7f0 --- /dev/null +++ b/executor/programs/asm/test_commit_split.s @@ -0,0 +1,47 @@ + .attribute 5, "rv64i2p1" + .globl main +main: + # Commit [0xAA,0xBB] early, do filler work, then commit [0xCC,0xDD] later — + # so with a small epoch size the two commits fall in DIFFERENT epochs and the + # second commit's epoch starts with x254 (commit index) already = 2. + addi sp, sp, -16 # allocate stack + + # --- first commit: bytes [0xAA, 0xBB] --- + addi t0, zero, 0xAA + sb t0, 0(sp) + addi t0, zero, 0xBB + sb t0, 1(sp) + li a0, 1 # fd = 1 + mv a1, sp # buf = sp + li a2, 2 # count = 2 + li a7, 64 # syscall = Commit + ecall + + # --- filler work (room for an epoch boundary between the two commits) --- + addi t1, zero, 0 + addi t1, t1, 1 + addi t1, t1, 1 + addi t1, t1, 1 + addi t1, t1, 1 + addi t1, t1, 1 + addi t1, t1, 1 + addi t1, t1, 1 + addi t1, t1, 1 + addi t1, t1, 1 + + # --- second commit: bytes [0xCC, 0xDD] --- + addi t0, zero, 0xCC + sb t0, 2(sp) + addi t0, zero, 0xDD + sb t0, 3(sp) + li a0, 1 # fd = 1 + addi a1, sp, 2 # buf = sp+2 + li a2, 2 # count = 2 + li a7, 64 # syscall = Commit + ecall + + # --- halt --- + addi sp, sp, 16 + li a0, 0 + li a7, 93 # syscall = Halt + ecall diff --git a/executor/programs/asm/test_ecsm_split.s b/executor/programs/asm/test_ecsm_split.s new file mode 100644 index 000000000..e0e1666ae --- /dev/null +++ b/executor/programs/asm/test_ecsm_split.s @@ -0,0 +1,49 @@ + .attribute 5, "rv64i2p1_m2p0_zmmul1p0" + .globl main +main: + # Like test_ecsm.s, but the ECSM pointer registers (a0=&xR, a1=&xG, a2=&k) + # are set at the very START and never rewritten before the ecall. With a small + # continuation epoch size the ecall lands in a LATER epoch than the one that set + # the pointers, so the per-epoch touched-cell pass must carry registers across + # the boundary to compute the right addresses. + addi sp, sp, -96 + addi a0, sp, 64 + addi a1, sp, 0 + addi a2, sp, 32 + li a7, -11 + + # xG = secp256k1 Gx, little-endian (4 doublewords). The heavy 64-bit immediates + # act as natural filler between the pointer setup and the ecall. + li t0, 0x59F2815B16F81798 + sd t0, 0(sp) + li t0, 0x029BFCDB2DCE28D9 + sd t0, 8(sp) + li t0, 0x55A06295CE870B07 + sd t0, 16(sp) + li t0, 0x79BE667EF9DCBBAC + sd t0, 24(sp) + + # k = 5 (little-endian); exercises double, double, add. + li t0, 5 + sd t0, 32(sp) + sd zero, 40(sp) + sd zero, 48(sp) + sd zero, 56(sp) + + # ECSM ecall: a0/a1/a2 were set far above (possibly in an earlier epoch). + ecall + + # Commit the 32-byte result xR so the test can check it equals x(5G). + li a0, 1 + addi a1, sp, 64 + li a2, 32 + li a7, 64 + ecall + + # Restore stack and halt. + addi sp, sp, 96 + li a0, 0 + li a7, 93 + ecall +.Lfunc_end1: + .size main, .Lfunc_end1-main diff --git a/executor/src/vm/execution.rs b/executor/src/vm/execution.rs index 614aad649..99eb0a00f 100644 --- a/executor/src/vm/execution.rs +++ b/executor/src/vm/execution.rs @@ -30,6 +30,17 @@ pub struct ExecutionResult { /// Size of each log chunk - balances memory usage vs callback overhead const CHUNK_SIZE: usize = 100_000; +/// Result of executing one continuation epoch: the logs produced during the +/// epoch and the VM state at the epoch boundary. The boundary state is the +/// starting state of the next epoch. +#[derive(Debug)] +pub struct EpochExecution { + pub logs: Vec, + pub end_pc: u64, + pub end_registers: Registers, + pub end_memory: Memory, +} + /// Executor state for chunked execution pub struct Executor { memory: Memory, @@ -57,13 +68,34 @@ impl Executor { /// Resume execution and return next logs. Returns None when program is finished. pub fn resume(&mut self) -> Result, ExecutorError> { + self.resume_with_limit(CHUNK_SIZE) + } + + /// Current program counter (0 once the program has halted). + pub fn pc(&self) -> u64 { + self.pc + } + + /// Current register state. + pub fn registers(&self) -> &Registers { + &self.registers + } + + /// Current memory state. + pub fn memory(&self) -> &Memory { + &self.memory + } + + /// Resume execution, running at most `limit` cycles, and return the logs + /// produced. Returns None when the program is finished. + pub fn resume_with_limit(&mut self, limit: usize) -> Result, ExecutorError> { if self.pc == 0 { return Ok(None); } self.logs.clear(); - while self.pc != 0 && self.logs.len() < CHUNK_SIZE { + while self.pc != 0 && self.logs.len() < limit { if !self.pc.is_multiple_of(4) { return Err(ExecutorError::InstructionAddressMisaligned(self.pc)); } @@ -117,6 +149,29 @@ impl Executor { instructions: self.instructions.into_instruction_map(), }) } + + /// Run to completion, splitting execution into epochs of at most `epoch_size` + /// cycles. Each epoch captures its logs and the VM state at the epoch + /// boundary, which is the starting state of the next epoch. Consumes the + /// executor. + /// + /// Test/bench helper — the production continuation prover streams epochs via + /// `resume_with_limit` directly. + pub fn run_epochs(mut self, epoch_size: usize) -> Result, ExecutorError> { + assert!(epoch_size > 0, "epoch_size must be greater than zero"); + + let mut epochs = Vec::new(); + while let Some(logs) = self.resume_with_limit(epoch_size)? { + let logs = logs.to_vec(); + epochs.push(EpochExecution { + logs, + end_pc: self.pc, + end_registers: self.registers.clone(), + end_memory: self.memory.clone(), + }); + } + Ok(epochs) + } } fn load_program(segments: &[crate::elf::Segment], memory: &mut Memory) -> Result<(), MemoryError> { diff --git a/executor/src/vm/memory.rs b/executor/src/vm/memory.rs index ea84e2620..1bc4549fd 100644 --- a/executor/src/vm/memory.rs +++ b/executor/src/vm/memory.rs @@ -50,7 +50,7 @@ pub const MAX_PRIVATE_INPUT_SIZE: u64 = 6700000; /// Must match `PRIVATE_INPUT_START` in `syscalls/src/syscalls.rs`. pub const PRIVATE_INPUT_START_INDEX: u64 = 0xFF000000; -#[derive(Default, Debug)] +#[derive(Default, Debug, Clone)] pub struct Memory { cells: U64HashMap<[u8; 4]>, /// Bytes committed to public output via `commit_public_output`. The @@ -80,6 +80,18 @@ impl Memory { entry[(address % 4) as usize] = value; } + /// Iterate over all stored bytes as `(address, value)` pairs. Cells are + /// stored as 4-byte words; each word expands into its four byte addresses. + /// Used to snapshot memory at an epoch boundary. + pub fn iter_bytes(&self) -> impl Iterator + '_ { + self.cells.iter().flat_map(|(&addr, bytes)| { + bytes + .iter() + .enumerate() + .map(move |(i, &b)| (addr + i as u64, b)) + }) + } + pub fn load_word(&self, address: u64) -> Result { if address.is_multiple_of(4) { let bytes = self.cells.get(&address).cloned().unwrap_or_default(); diff --git a/executor/src/vm/registers.rs b/executor/src/vm/registers.rs index 61945b732..a82ef44f1 100644 --- a/executor/src/vm/registers.rs +++ b/executor/src/vm/registers.rs @@ -2,7 +2,7 @@ use std::fmt::Display; pub const STACK_TOP: u64 = 0xFFFFFFFFFFFFFFF0; // 64-bit max (Multiple of 16 for RV64 ABI) -#[derive(Debug)] +#[derive(Debug, Clone)] /// Holds the current value of all 32 registers /// Register zero is implicit as it cannot hold any value other than zero pub struct Registers([u64; 31]); diff --git a/executor/tests/asm.rs b/executor/tests/asm.rs index e9c9c08dd..a1c9baf2b 100644 --- a/executor/tests/asm.rs +++ b/executor/tests/asm.rs @@ -923,3 +923,44 @@ fn test_keccak() { assert_eq!(result.return_values.memory_values, expected_bytes); assert_eq!(result.return_values.register_values.0, 0); } + +#[test] +fn test_run_epochs_splits_execution_into_n_cycle_epochs() { + let elf_data = std::fs::read("./program_artifacts/asm/basic_program.elf").unwrap(); + let program = Elf::load(&elf_data).unwrap(); + + // Reference: full single-pass run. + let full = Executor::new(&program, vec![]).unwrap().run().unwrap(); + + // Pick an epoch size that splits this program into a few epochs, whatever + // its exact length. + let total_cycles = full.logs.len(); + assert!(total_cycles >= 2); + let epoch_size = (total_cycles / 3).max(1); + + let epochs = Executor::new(&program, vec![]) + .unwrap() + .run_epochs(epoch_size) + .unwrap(); + + // The program is long enough to span several epochs. + assert!(epochs.len() >= 2); + + // Concatenated epoch logs reproduce the full run's instruction stream. + let concat: Vec = epochs + .iter() + .flat_map(|e| e.logs.iter().map(|l| l.current_pc)) + .collect(); + let expected: Vec = full.logs.iter().map(|l| l.current_pc).collect(); + assert_eq!(concat, expected); + + // Every epoch except the last runs exactly `epoch_size` cycles. + for epoch in &epochs[..epochs.len() - 1] { + assert_eq!(epoch.logs.len(), epoch_size); + } + let last = epochs.last().unwrap(); + assert!(!last.logs.is_empty() && last.logs.len() <= epoch_size); + + // The program finished, so the final epoch's boundary pc is 0. + assert_eq!(last.end_pc, 0); +} diff --git a/prover/Cargo.toml b/prover/Cargo.toml index da9ceb9af..61d2aa61a 100644 --- a/prover/Cargo.toml +++ b/prover/Cargo.toml @@ -43,3 +43,7 @@ harness = false [[bench]] name = "profile_vm_prover" harness = false + +[[bench]] +name = "bench_continuation" +harness = false diff --git a/prover/benches/bench_continuation.rs b/prover/benches/bench_continuation.rs new file mode 100644 index 000000000..c8638346f --- /dev/null +++ b/prover/benches/bench_continuation.rs @@ -0,0 +1,141 @@ +//! Peak-memory benchmark: monolithic proving vs continuation (streaming-epoch) +//! proving, for large programs. +//! +//! This is a plain one-shot binary (`harness = false`), not a Criterion bench: +//! Criterion measures time over many iterations, whereas the point here is the +//! peak resident set of a SINGLE prove. Wrap it in the OS timer to capture RSS, +//! on Linux: +//! /usr/bin/time -v main +//! /usr/bin/time -v cont 65536 +//! +//! Build + locate the binary: +//! cargo build --release --bench bench_continuation +//! ls target/release/deps/bench_continuation-* # the executable (no .d) +//! +//! Args: +//! "count", "main" (monolithic prove) or "cont" (continuation) +//! path to a compiled ELF artifact +//! [epoch_size] epoch length in cycles for "cont" (default 65536) +//! +//! Env: +//! BENCH_PRIVATE_INPUT optional path to a private-input file (e.g. an +//! ethrex ProgramInput .bin). Empty if unset. + +use std::time::Instant; + +fn main() { + let args: Vec = std::env::args().collect(); + if args.len() < 3 { + eprintln!("usage: bench_continuation [epoch_size]"); + std::process::exit(2); + } + let mode = args[1].as_str(); + let elf_path = &args[2]; + let elf = std::fs::read(elf_path).expect("failed to read ELF"); + let private_inputs: Vec = match std::env::var("BENCH_PRIVATE_INPUT") { + Ok(path) if !path.is_empty() => { + std::fs::read(&path).expect("failed to read BENCH_PRIVATE_INPUT file") + } + _ => Vec::new(), + }; + + let start = Instant::now(); + match mode { + "count" => { + // Count cycles by running the executor to completion (no proving). + // Cycle count is a linear proxy for monolithic proving memory. + use executor::elf::Elf; + use executor::vm::execution::Executor; + let program = Elf::load(&elf).expect("bad ELF"); + let result = Executor::new(&program, private_inputs) + .expect("executor") + .run() + .expect("execution failed"); + println!("cycles = {}", result.logs.len()); + } + "footprint" => { + // Run to completion, then classify the touched memory by region so we + // can see how much of the footprint is stack (contiguous, near + // STACK_TOP) vs the rest (ELF data / heap / private input, low + // addresses). Tells us whether a stack-specific Vec store would help. + use executor::elf::Elf; + use executor::vm::execution::Executor; + use executor::vm::registers::STACK_TOP; + let program = Elf::load(&elf).expect("bad ELF"); + let mut ex = Executor::new(&program, private_inputs).expect("executor"); + while ex.pc() != 0 { + match ex.resume_with_limit(usize::MAX).expect("execution failed") { + Some(_) => {} + None => break, + } + } + // Stack lives in the top half of the address space (grows down from + // STACK_TOP); ELF data / heap / input are in the low addresses. + const STACK_THRESHOLD: u64 = 1 << 63; + let (mut stack, mut other) = (0u64, 0u64); + let (mut min_stack, mut min_other, mut max_other) = (u64::MAX, u64::MAX, 0u64); + for (addr, _) in ex.memory().iter_bytes() { + if addr >= STACK_THRESHOLD { + stack += 1; + min_stack = min_stack.min(addr); + } else { + other += 1; + min_other = min_other.min(addr); + max_other = max_other.max(addr); + } + } + let total = stack + other; + let pct = |n: u64| 100.0 * n as f64 / total.max(1) as f64; + println!("footprint: {total} touched bytes"); + if stack > 0 { + let span = STACK_TOP - min_stack + 1; + println!( + " stack: {stack} bytes ({:.1}%), range [{:#x}..={:#x}], span {span} bytes, density {:.1}%", + pct(stack), + min_stack, + STACK_TOP, + 100.0 * stack as f64 / span as f64, + ); + } + if other > 0 { + println!( + " other (data/heap/input): {other} bytes ({:.1}%), range [{:#x}..={:#x}]", + pct(other), + min_other, + max_other, + ); + } + } + "main" => { + lambda_vm_prover::prove_with_inputs(&elf, &private_inputs) + .expect("monolithic prove failed"); + println!("main prove ok ({} bytes ELF)", elf.len()); + } + "cont" => { + let epoch_size_log2: u32 = args + .get(3) + .map(|s| s.parse().expect("bad epoch_size_log2")) + .unwrap_or(16); + // Match the monolithic `main` mode's options (blowup 2) for a fair comparison. + let opts = stark::proof::options::GoldilocksCubicProofOptions::with_blowup(2) + .expect("blowup=2 is always valid"); + let output = lambda_vm_prover::continuation::prove_and_verify_continuation( + &elf, + &private_inputs, + epoch_size_log2, + &opts, + ) + .expect("continuation failed"); + assert!(output.is_some(), "continuation did not verify"); + println!( + "cont prove+verify ok (epoch_size_log2={epoch_size_log2}, epoch_size={})", + 1usize << epoch_size_log2 + ); + } + other => { + eprintln!("unknown mode {other:?}; use count|footprint|main|cont"); + std::process::exit(2); + } + } + println!("elapsed {:.2}s", start.elapsed().as_secs_f64()); +} diff --git a/prover/src/continuation.rs b/prover/src/continuation.rs new file mode 100644 index 000000000..15b56a1e0 --- /dev/null +++ b/prover/src/continuation.rs @@ -0,0 +1,1210 @@ +//! First production implementation of continuations (Approach 2). +//! +//! Splits an execution into fixed-size epochs, proves each epoch independently +//! (its memory is initialized/finalized by the per-epoch local-to-global table), +//! and proves one cross-epoch "global memory" LogUp that links every epoch's +//! `fini` to the next epoch's `init` (so `fini(epoch i) == init(epoch i+1)`). +//! +//! The global proof's genesis anchor is bound to the ELF: the verifier +//! recomputes the per-page preprocessed init commitment from the ELF in +//! `verify_global`, so the starting memory cannot be prover-supplied. +//! +//! The local-to-global columns are range-checked in the epoch proof (which +//! carries the BITWISE provider): values are bytes, and the cross-epoch-only +//! `init_epoch` is built from `IsHalfword`-checked halfwords. Address and +//! fini-timestamp need no extra check — they are matched against MEMW on the +//! epoch-local Memory bus, exactly as PAGE relies on MEMW. The global proof +//! commits the identical trace, so it inherits the guarantee via the commitment +//! binding. There is no cross-epoch timestamp; the chain is ordered by epoch. +//! +//! Cross-epoch registers are bound the same way: each continuation epoch +//! preprocesses its REGISTER `FINI` column to the epoch's final register file +//! `R_{i+1}` (alongside `INIT = R_i`), and the driver reuses the same `R_{i+1}` +//! as the next epoch's preprocessed `INIT` — so `init(epoch i+1) == fini(epoch i)` +//! by construction, with the REG-C2 Memory bus binding `FINI` to the true final +//! registers. No extra bus. +//! +//! The x254 commit index is carried across epochs by that same register binding, +//! so a continuation epoch indexes its commits from the carried value: both the +//! COMMIT trace (`current_commit_index` seeded from x254) and the verifier's +//! `compute_commit_bus_offset` (a `start_index` parameter) count from it, and the +//! driver concatenates each epoch's committed bytes into the run-wide output. +//! +//! The prover and verifier are split: `prove_continuation` emits a self-contained +//! `ContinuationProof` bundle and `verify_continuation` checks it from the bundle +//! and ELF alone (`prove_and_verify_continuation` is a thin wrapper over both). + +use std::collections::HashMap; + +use crypto::fiat_shamir::default_transcript::DefaultTranscript; +use executor::elf::Elf; +use executor::vm::execution::Executor; +use executor::vm::memory::MAX_PRIVATE_INPUT_SIZE; +use math::field::element::FieldElement; +use stark::config::Commitment; +use stark::lookup::{AirWithBuses, AuxiliaryTraceBuildData, NullBoundaryConstraintBuilder}; +use stark::proof::options::ProofOptions; +use stark::proof::stark::MultiProof; +use stark::prover::{IsStarkProver, Prover}; +use stark::trace::TraceTable; +use stark::traits::AIR; +use stark::verifier::{IsStarkVerifier, Verifier}; + +use crate::statement::{StatementKind, absorb_continuation_global_statement, absorb_statement}; +use crate::tables::local_to_global::{self, CellBoundary}; +use crate::tables::page::{self, PageConfig}; +use crate::tables::register; +use crate::tables::trace_builder::{Traces, build_init_page_data, build_initial_image_paged}; +use crate::tables::types::{GoldilocksExtension, GoldilocksField}; +use crate::tables::{MaxRowsConfig, global_memory}; +use crate::{ + Error, FIXED_TABLE_COUNT, RuntimePageRange, TableCounts, VmAirs, + compute_expected_commit_bus_balance, verify_l2g_commitment_binding, +}; + +type F = GoldilocksField; +type E = GoldilocksExtension; +type AirRef<'a> = &'a dyn AIR; + +fn empty_constraints() +-> Vec>> { + vec![] +} + +/// Fresh transcript seeded with the epoch's statement (ELF, public output, table +/// layout) and `epoch_label` (its position). The epoch's prove, verify, and +/// bus-balance replay all seed via this so their challenges match; the seeding +/// pins each epoch proof to its program and position (replay protection). +fn epoch_transcript( + elf_bytes: &[u8], + public_output: &[u8], + table_counts: &TableCounts, + num_private_input_pages: usize, + runtime_page_ranges: &[RuntimePageRange], + epoch_label: u64, +) -> DefaultTranscript { + let mut transcript = DefaultTranscript::::new(&[]); + absorb_statement( + &mut transcript, + StatementKind::ContinuationEpoch { epoch_label }, + elf_bytes, + public_output, + table_counts, + num_private_input_pages, + runtime_page_ranges, + ); + transcript +} + +/// Fresh transcript seeded with the global proof's statement (ELF + epoch count). +/// `prove_global` and `verify_global` both seed via this so their challenges match. +fn global_transcript(elf_bytes: &[u8], num_epochs: usize) -> DefaultTranscript { + let mut transcript = DefaultTranscript::::new(&[]); + absorb_continuation_global_statement(&mut transcript, elf_bytes, num_epochs); + transcript +} + +/// The L2G table's AIR constraint: the `MU` selector column is boolean. +/// +/// The Memory bus already pins `MU = 1` on real rows and `MU = 0` on padding — +/// it's anchored to MEMW's own bit-constrained multiplicity, since a non-1 `MU` +/// leaves the cell's seed/fini tokens unmatched. This constraint makes +/// "`MU ∈ {0,1}`" explicit on the table itself rather than relying on that +/// cross-bus argument. Lives on the epoch-local air; the global proof commits the +/// identical trace (root-bound), so it inherits it. +fn l2g_constraints() +-> Vec>> { + use crate::constraints::templates::IsBitConstraint; + use stark::constraints::transition::TransitionConstraint; + vec![IsBitConstraint::unconditional(local_to_global::cols::MU, 0).boxed()] +} + +/// Local-to-global AIR on the cross-epoch GlobalMemory bus (used in the global proof). +/// +/// `epoch_label` is this epoch's 1-based label; it is the `fini_epoch` constant +/// the fini token carries (not a trace column, since it's the same for every row). +fn l2g_global_air( + opts: &ProofOptions, + epoch_label: u64, +) -> AirWithBuses { + AirWithBuses::new( + local_to_global::cols::NUM_COLUMNS, + AuxiliaryTraceBuildData { + interactions: local_to_global::bus_interactions(epoch_label), + }, + opts, + 1, + empty_constraints(), + ) +} + +/// Local-to-global AIR on the epoch-local Memory bus (used inside an epoch proof). +/// +/// Carries the column range checks and the `init_epoch < fini_epoch` ordering +/// check too: this proof has the BITWISE provider, and the global proof commits +/// the identical trace (the commitment binding compares roots), so checking here +/// covers both. `epoch_label` is the `fini_epoch` constant used by both. +fn l2g_memory_air( + opts: &ProofOptions, + epoch_label: u64, +) -> AirWithBuses { + let interactions = [ + local_to_global::memory_bus_interactions(), + local_to_global::range_check_interactions(epoch_label), + ] + .concat(); + AirWithBuses::new( + local_to_global::cols::NUM_COLUMNS, + AuxiliaryTraceBuildData { interactions }, + opts, + 1, + l2g_constraints(), + ) +} + +/// GLOBAL_MEMORY AIR for one touched page (the cross-epoch analog of PAGE). +/// +/// It sends each cell's genesis init and receives its finalization on the +/// GlobalMemory bus. The genesis `init` column is preprocessed, so the verifier +/// recomputes its commitment from the ELF — exactly PAGE's binding mechanism: +/// ELF-data pages via `page::compute_precomputed_commitment`, zero-init pages +/// (stack/heap) via the static zero-page commitment. The prover cannot choose +/// the genesis values. +fn global_memory_air( + opts: &ProofOptions, + config: &PageConfig, +) -> AirWithBuses { + let air = AirWithBuses::new( + global_memory::cols::NUM_COLUMNS, + AuxiliaryTraceBuildData { + interactions: global_memory::bus_interactions(config.page_base), + }, + opts, + 1, + empty_constraints(), + ); + let commitment = if config.init_values.is_some() { + page::compute_precomputed_commitment(config, opts) + } else { + page::zero_init_preprocessed_commitment(opts) + }; + air.with_preprocessed(commitment, global_memory::NUM_PREPROCESSED_COLS) +} + +/// The touched pages (sorted) and their ELF-derived genesis configs, rebuilt +/// identically by prover and verifier from the ELF + private input. Each cell +/// the program touched lives on one of these pages; a page in the ELF/input +/// image carries its bytes as `init`, every other (stack/heap) page is zero-init. +fn global_memory_configs( + boundaries: &[Vec], + elf: &Elf, + private_inputs: &[u8], +) -> Vec { + let image = build_initial_image_paged(elf, private_inputs); + let init_page_data = build_init_page_data(&image); + global_memory_configs_from_init_page_data(boundaries, &init_page_data) +} + +fn global_memory_configs_from_init_page_data( + boundaries: &[Vec], + init_page_data: &HashMap>, +) -> Vec { + let touched_pages: std::collections::BTreeSet = boundaries + .iter() + .flatten() + .map(|b| page::page_base_for_address(b.address)) + .collect(); + touched_pages + .into_iter() + .map(|page_base| match init_page_data.get(&page_base) { + Some(data) => PageConfig::with_data(page_base, data.clone()), + None => PageConfig::zero_init(page_base), + }) + .collect() +} + +/// Per-epoch register state and label. +struct EpochStart<'a> { + register_init: &'a [u32], + /// This epoch's 1-based table label (the `fini_epoch` constant). + label: u64, +} + +/// One epoch's proof plus everything a standalone verifier needs to re-check it +/// using ONLY the bundle (never the prover's in-memory traces). Each field is a +/// public value the verifier re-binds: a wrong value either makes the proof's +/// transcript challenges diverge or the AIRs not match the committed trace, so the +/// proof fails to verify. +/// +/// Note: continuation epochs use the L2G memory bookend, so PAGE is skipped and the +/// per-epoch page config set is empty — the verifier builds the AIRs with no PAGE +/// tables rather than trusting any prover-supplied page config. +#[derive(serde::Serialize, serde::Deserialize)] +struct EpochProof { + /// The epoch's STARK proof (its tables + the epoch-local L2G sub-table last). + proof: MultiProof, + /// Bytes this epoch committed — the COMMIT-bus receiver reference. + public_output: Vec, + /// Statement values the epoch transcript is seeded with (re-derived on verify). + table_counts: TableCounts, + /// Always zero for continuation epochs: PAGE is replaced by L2G, and private + /// input genesis is carried by the continuation bundle for global verification. + num_private_input_pages: usize, + /// Always empty for continuation epochs: PAGE tables are skipped, so runtime + /// pages are not part of the epoch AIR statement. + runtime_page_ranges: Vec, + /// The epoch's final register file `R_{i+1}` (its preprocessed FINI), which the + /// driver/verifier reuses as the next epoch's derived INIT — the cross-epoch + /// register binding. x254 (commit index) rides along at address 508. + reg_fini: Vec, + /// The committed L2G table root, tied to the global proof by + /// [`verify_l2g_commitment_binding`]. + l2g_root: Commitment, + /// Touched-cell boundaries; the verifier rebuilds the global AIRs (touched-page + /// set) from these. Values are redundant with the committed L2G trace. + boundary: Vec, +} + +/// A self-contained continuation proof: the per-epoch proofs in execution order, +/// the one cross-epoch global-memory proof, and the private inputs (needed to +/// rebuild the genesis image — bound by the global proof's genesis-from-ELF check). +/// +/// `verify_continuation` checks this using only the bundle and the ELF. It derives +/// serde, so it round-trips through `bincode` exactly like a monolithic `VmProof`. +#[derive(serde::Serialize, serde::Deserialize)] +pub struct ContinuationProof { + epochs: Vec, + global: MultiProof, + private_inputs: Vec, +} + +impl ContinuationProof { + /// Number of epochs the execution was split into. + pub fn num_epochs(&self) -> usize { + self.epochs.len() + } +} + +/// Build an epoch's AIRs identically on the prove and verify sides — the single +/// source of truth for the AIR set, so the two halves can never diverge. Mirrors +/// the old integrated path: `VmAirs` (HALT included iff `is_final`), with REGISTER +/// preprocessed to INIT = `register_init` and FINI = `reg_fini`. Continuation epochs +/// use the L2G bookend, so PAGE is skipped and `page_configs` is empty. The +/// epoch-local L2G air is built separately by the caller (it needs the `label`). +fn build_epoch_airs( + elf: &Elf, + opts: &ProofOptions, + page_configs: &[PageConfig], + table_counts: &TableCounts, + register_init: &[u32], + reg_fini: &[u32], + is_final: bool, +) -> VmAirs { + // Continuation epochs preprocess FINI = R_{i+1} too (not just INIT = R_i), so the + // final register file is a verifier-known public value bound by the REG-C2 + // Memory-bus token; reusing the same R_{i+1} as the next epoch's INIT binds + // init(epoch i+1) == fini(epoch i). + let register_preprocessed = Some(( + register::compute_precomputed_commitment_with_fini(opts, register_init, reg_fini), + register::NUM_PREPROCESSED_COLS_WITH_FINI, + )); + VmAirs::new( + elf, + opts, + false, + page_configs, + table_counts, + None, + is_final, + None, + None, + register_preprocessed, + ) +} + +/// Prove one epoch (prove half only). Commits its local-to-global table (built from +/// `boundary`) on the epoch-local Memory bus and its REGISTER table with FINI +/// preprocessed to the epoch's final register file. Returns the [`EpochProof`] the +/// standalone verifier later re-checks; does NOT verify here. +#[allow(clippy::too_many_arguments)] +fn prove_epoch( + elf: &Elf, + elf_bytes: &[u8], + start: &EpochStart, + mut traces: Traces, + is_final: bool, + boundary: &[CellBoundary], + opts: &ProofOptions, +) -> Result { + // Count this L2G table's range-check lookups into the BITWISE table so its + // AreBytes/IsHalfword multiplicities balance the range-check senders. + crate::tables::bitwise::update_multiplicities( + &mut traces.bitwise, + &local_to_global::collect_bitwise_from_l2g(boundary), + ); + + // Continuation epochs use the L2G bookend, so PAGE is skipped: page_configs is + // empty. The verifier hard-codes this (passes `&[]`); check the prover agrees so + // the two sides build identical AIRs. + if !traces.page_configs.is_empty() { + return Err(Error::ContinuationInvariant( + "continuation epoch must have no PAGE configs (L2G bookend replaces PAGE)".to_string(), + )); + } + + // R_{i+1}, read from the committed REGISTER trace (FINI, bound to the last write). + let reg_fini = register::fini_from_trace(&traces.register); + + let table_counts = traces.table_counts(); + let public_output = traces.public_output_bytes.clone(); + let runtime_page_ranges = traces.runtime_page_ranges(); + let num_private_input_pages = traces + .page_configs + .iter() + .filter(|c| c.is_private_input) + .count(); + + let airs = build_epoch_airs( + elf, + opts, + &[], + &table_counts, + start.register_init, + ®_fini, + is_final, + ); + + let label = start.label; + let seed = || { + epoch_transcript( + elf_bytes, + &public_output, + &table_counts, + num_private_input_pages, + &runtime_page_ranges, + label, + ) + }; + + let l2g_air = l2g_memory_air(opts, label); + // Build this epoch's L2G table from the cross-epoch boundary so it is identical + // to the one the global proof commits (the commitment binding compares their + // roots). It is appended to the proof below, not through `air_trace_pairs`. + let mut l2g_trace = local_to_global::generate_local_to_global_trace(boundary); + + let mut pairs = airs.air_trace_pairs(&mut traces); + pairs.push((&l2g_air, &mut l2g_trace, &())); + let proof = Prover::multi_prove( + pairs, + &mut seed(), + #[cfg(feature = "disk-spill")] + stark::storage_mode::StorageMode::Ram, + ) + .map_err(|e| Error::Prover(format!("{e:?}")))?; + + let l2g_root = proof + .proofs + .last() + .ok_or_else(|| { + Error::ContinuationInvariant("epoch proof is missing the L2G sub-table".to_string()) + })? + .lde_trace_main_merkle_root; + + Ok(EpochProof { + proof, + public_output, + table_counts, + num_private_input_pages, + runtime_page_ranges, + reg_fini, + l2g_root, + boundary: boundary.to_vec(), + }) +} + +/// Verify one epoch using ONLY the [`EpochProof`] bundle plus the verifier-derived +/// `register_init` (epoch 0: from the ELF; epoch i>0: from the previous epoch's +/// `reg_fini`), `is_final`, and `label`. Rebuilds the AIRs and transcript +/// from the bundle's statement values and indexes commits from the carried x254 +/// (`register_init[X254_INDEX]`), never from the prover's memory. PAGE is skipped for +/// continuation epochs, so the AIRs are built with no page configs (the bundle does +/// not get to supply any). Returns `true` iff the proof verifies and its committed +/// L2G root matches the claimed one. +fn verify_epoch( + elf: &Elf, + elf_bytes: &[u8], + epoch: &EpochProof, + register_init: &[u32], + is_final: bool, + label: u64, + opts: &ProofOptions, +) -> bool { + // Reject degenerate table counts (mirrors the monolithic verifier). + if epoch.table_counts.validate().is_err() { + return false; + } + + // Cross-check table_counts before building AIRs from bundle data. Continuation + // epochs have no PAGE proofs, and append one epoch-local L2G proof after the VM + // tables. HALT is present only on the final epoch. + let fixed_tables = if is_final { + FIXED_TABLE_COUNT + } else { + FIXED_TABLE_COUNT - 1 + }; + let expected_proof_count = epoch.table_counts.total() + fixed_tables + 1; + if expected_proof_count != epoch.proof.proofs.len() { + return false; + } + + let airs = build_epoch_airs( + elf, + opts, + &[], + &epoch.table_counts, + register_init, + &epoch.reg_fini, + is_final, + ); + let l2g_air = l2g_memory_air(opts, label); + let mut refs = airs.air_refs(); + refs.push(&l2g_air); + + let seed = || { + epoch_transcript( + elf_bytes, + &epoch.public_output, + &epoch.table_counts, + epoch.num_private_input_pages, + &epoch.runtime_page_ranges, + label, + ) + }; + + // Start the commit index from the carried x254 (the derived INIT), not a free + // input — this is what binds the per-epoch commit slice to its global position. + let commit_start_index = register_init + .get(register::X254_INDEX) + .copied() + .unwrap_or(0) as u64; + + let expected = match compute_expected_commit_bus_balance( + &refs, + &epoch.proof, + &epoch.public_output, + commit_start_index, + &mut seed(), + ) { + Some(expected) => expected, + None => return false, + }; + + if !Verifier::multi_verify(&refs, &epoch.proof, &mut seed(), &expected) { + return false; + } + + // The claimed L2G root must be the one this proof actually committed (it is what + // verify_l2g_commitment_binding later ties to the global proof). + epoch + .proof + .proofs + .last() + .map(|p| p.lde_trace_main_merkle_root) + == Some(epoch.l2g_root) +} + +/// Build the cross-epoch global memory proof: every epoch's L2G sub-table on the +/// GlobalMemory bus, plus one GLOBAL_MEMORY table per touched page that sends each +/// cell's genesis init (preprocessed from the ELF, so the verifier recomputes it) +/// and receives its final value. The bus balances iff every `fini` matches the next +/// epoch's `init` and every genesis value matches the ELF. +fn prove_global( + boundaries: &[Vec], + elf_bytes: &[u8], + init_page_data: &HashMap>, + opts: &ProofOptions, +) -> Result, Error> { + // Each cell's final state (boundaries are in epoch order, so the last fini wins). + let mut final_state: global_memory::FiniStateMap = HashMap::new(); + for epoch in boundaries { + for b in epoch { + final_state.insert( + b.address, + global_memory::FiniState { + value: (b.fini.value & 0xFF) as u8, + epoch: b.fini.epoch, + }, + ); + } + } + + let gm_configs = global_memory_configs_from_init_page_data(boundaries, init_page_data); + + let mut l2g_traces: Vec> = boundaries + .iter() + .map(|epoch| local_to_global::generate_local_to_global_trace(epoch)) + .collect(); + let mut gm_traces: Vec> = gm_configs + .iter() + .map(|config| global_memory::generate_global_trace(config, &final_state)) + .collect(); + + // One L2G air per epoch, each carrying its own 1-based `fini_epoch` constant. + let l2g_airs: Vec<_> = (0..boundaries.len()) + .map(|i| l2g_global_air(opts, local_to_global::epoch_label(i as u64))) + .collect(); + let gm_airs: Vec<_> = gm_configs + .iter() + .map(|config| global_memory_air(opts, config)) + .collect(); + + let mut pairs: Vec<(AirRef, &mut TraceTable, &())> = l2g_airs + .iter() + .zip(l2g_traces.iter_mut()) + .map(|(air, t)| (air as AirRef, t, &())) + .collect(); + for (air, trace) in gm_airs.iter().zip(gm_traces.iter_mut()) { + pairs.push((air as AirRef, trace, &())); + } + + Prover::multi_prove( + pairs, + &mut global_transcript(elf_bytes, boundaries.len()), + #[cfg(feature = "disk-spill")] + stark::storage_mode::StorageMode::Ram, + ) + .map_err(|e| Error::Prover(format!("{e:?}"))) +} + +fn verify_global( + boundaries: &[Vec], + proof: &MultiProof, + elf: &Elf, + elf_bytes: &[u8], + private_inputs: &[u8], + opts: &ProofOptions, +) -> bool { + // One L2G air per epoch, each with its own 1-based `fini_epoch` constant — + // must match the order/labels the global proof committed in `prove_global`. + let l2g_airs: Vec<_> = (0..boundaries.len()) + .map(|i| l2g_global_air(opts, local_to_global::epoch_label(i as u64))) + .collect(); + // Rebuild the genesis configs FROM THE ELF and recompute their commitments: + // this is the binding — a prover that claimed different genesis values would + // commit a different root and fail to verify. + let gm_configs = global_memory_configs(boundaries, elf, private_inputs); + let gm_airs: Vec<_> = gm_configs + .iter() + .map(|config| global_memory_air(opts, config)) + .collect(); + + let mut refs: Vec = l2g_airs.iter().map(|a| a as AirRef).collect(); + for air in &gm_airs { + refs.push(air as AirRef); + } + + Verifier::multi_verify( + &refs, + proof, + &mut global_transcript(elf_bytes, boundaries.len()), + &FieldElement::zero(), + ) +} + +/// Prove a full continuation and return a self-contained [`ContinuationProof`] +/// (prove half only — no verification). Splits the execution into `2^epoch_size_log2` +/// cycle epochs, proves each, and proves the one cross-epoch global-memory linkage. +/// +/// Intermediate epochs run exactly `2^epoch_size_log2` cycles, so their CPU tables +/// have power-of-two row counts and therefore zero padding rows — important because +/// CPU padding rows participate in the inline-PC `memory` chain (carrying pc=1) +/// which is only anchored by the HALT chip's emit_pc/consume_pc, and intermediate +/// epochs exclude HALT. With padding rows present and no HALT their pc=1 tokens +/// dangle and the Memory bus fails to balance; zero padding rows sidestep that. The +/// final epoch keeps its remainder and its HALT, so its padding chain is anchored as +/// usual. A program that fits in one epoch runs as a single final (monolithic-style) +/// epoch. +pub fn prove_continuation( + elf_bytes: &[u8], + private_inputs: &[u8], + epoch_size_log2: u32, + opts: &ProofOptions, +) -> Result { + if epoch_size_log2 < 2 { + return Err(Error::InvalidContinuationEpochSize( + "epoch_size_log2 must be at least 2 (4 cycles)".to_string(), + )); + } + let epoch_size = 1usize.checked_shl(epoch_size_log2).ok_or_else(|| { + Error::InvalidContinuationEpochSize(format!( + "epoch_size_log2 {epoch_size_log2} is too large for this platform" + )) + })?; + + let elf = Elf::load(elf_bytes).map_err(|e| Error::ElfLoad(format!("{e}")))?; + let mut executor = Executor::new(&elf, private_inputs.to_vec()) + .map_err(|e| Error::Execution(format!("{e}")))?; + + // The cross-epoch memory image, carried forward: epoch i+1's init is epoch i's + // fini, updated in place with each epoch's touched-cell final values. + let mut image = build_initial_image_paged(&elf, private_inputs); + let init_page_data = build_init_page_data(&image); + let mut provenance = + local_to_global::genesis_provenance(image.iter().map(|(a, v)| (a, v as u64))); + + let mut epochs: Vec = Vec::new(); + // The previous epoch's bound final register file R_{i+1}; epoch i+1's init is + // derived from it (the cross-epoch register binding). + let mut prev_fini: Option> = None; + + let mut index: u64 = 0; + loop { + if executor.pc() == 0 { + break; + } + // The cross-epoch ordering check (IsB20 on `fini_epoch - 1 - init_epoch`) + // only spans `local_to_global::MAX_EPOCHS` epochs. Beyond that the IsB20 bus + // cannot balance, so an honest proof is impossible — fail fast with a clear + // error instead of building an unprovable trace. The verifier already + // rejects any such proof; this is a prover-side guard for a clean message. + if index >= local_to_global::MAX_EPOCHS { + return Err(Error::InvalidContinuationEpochSize(format!( + "execution needs more than {} continuation epochs (the IsB20 cross-epoch \ + ordering range); use a larger epoch size", + local_to_global::MAX_EPOCHS + ))); + } + let register_init: Vec = if index == 0 { + register::register_init_from_entry_point(elf.entry_point) + } else { + // Epoch i+1's init is epoch i's bound fini, reused directly (same + // `register_word_address_list` order) — the cross-epoch register binding. + prev_fini.clone().ok_or_else(|| { + Error::ContinuationInvariant( + "previous epoch final registers are missing after the first epoch".to_string(), + ) + })? + }; + + // Run one epoch; `logs` is this epoch's chunk only (the executor clears it). + let logs = match executor + .resume_with_limit(epoch_size) + .map_err(|e| Error::Execution(format!("{e}")))? + { + Some(logs) => logs.to_vec(), + None => break, + }; + let is_final = executor.pc() == 0; + + // Invariant: a non-final epoch ran the full `epoch_size` (a power of two), + // so its CPU table has no padding rows. + if !is_final && logs.len() != epoch_size { + return Err(Error::ContinuationInvariant(format!( + "intermediate epoch ran {} cycles, expected {epoch_size}", + logs.len() + ))); + } + + let label = local_to_global::epoch_label(index); + let traces = Traces::from_image_and_logs( + &elf, + &image, + ®ister_init, + &logs, + &MaxRowsConfig::default(), + private_inputs, + is_final, + true, + #[cfg(feature = "disk-spill")] + stark::storage_mode::StorageMode::Ram, + )?; + let boundary = + local_to_global::epoch_boundary(&mut provenance, label, &traces.touched_memory_cells); + + let start = EpochStart { + register_init: ®ister_init, + label, + }; + let epoch = prove_epoch(&elf, elf_bytes, &start, traces, is_final, &boundary, opts)?; + prev_fini = Some(epoch.reg_fini.clone()); + + // Carry the image forward: this epoch's fini is the next epoch's init. + for cell in &boundary { + image.set(cell.address, (cell.fini.value & 0xFF) as u8); + } + epochs.push(epoch); + + if is_final { + break; + } + index += 1; + } + + // One global LogUp over all the (kept) local-to-global tables. + let all_boundaries: Vec> = + epochs.iter().map(|e| e.boundary.clone()).collect(); + let global = prove_global(&all_boundaries, elf_bytes, &init_page_data, opts)?; + + Ok(ContinuationProof { + epochs, + global, + private_inputs: private_inputs.to_vec(), + }) +} + +/// Verify a [`ContinuationProof`] using ONLY the bundle and the ELF — nothing from +/// the prover's memory. Returns `Ok(Some(public_output))` (the run-wide committed +/// bytes, reconstructed from the per-epoch bound slices) iff every check holds, else +/// `Ok(None)`. +/// +/// The verifier (1) enumerates epochs itself, assigning `epoch_label` and `is_final` +/// by position (a trusted enumeration); (2) verifies each epoch, deriving its +/// `register_init` from the ELF (epoch 0) or the previous epoch's bound `reg_fini` +/// (epoch i>0) — this is the cross-epoch register binding, and forces epoch 0 to start +/// at the genesis register file; (3) closes the cross-epoch GlobalMemory bus with +/// genesis rebuilt from the ELF; (4) ties each epoch's L2G root to the global proof; +/// (5) reconstructs the output by concatenating the per-epoch slices in order. +/// +/// Completeness is forced by the enumeration: epoch 0's INIT must be the ELF genesis +/// (else its preprocessed-INIT commitment mismatches), and the last epoch must be +/// `is_final` (HALT included — so the program actually terminated); a truncated run +/// would have a non-halting last epoch built with HALT and fail. +pub fn verify_continuation( + elf_bytes: &[u8], + bundle: &ContinuationProof, + opts: &ProofOptions, +) -> Result>, Error> { + if bundle.private_inputs.len() as u64 > MAX_PRIVATE_INPUT_SIZE { + return Err(Error::InvalidTableCounts(format!( + "private input size ({}) exceeds max ({MAX_PRIVATE_INPUT_SIZE})", + bundle.private_inputs.len() + ))); + } + + let elf = Elf::load(elf_bytes).map_err(|e| Error::ElfLoad(format!("{e}")))?; + + let n = bundle.epochs.len(); + if n == 0 { + return Ok(None); + } + + // Reject a malformed bundle up front. `reg_fini` is prover-supplied (deserialized, + // untrusted) and is indexed by `NUM_REGISTER_ADDRESSES` when building each epoch's + // preprocessed REGISTER commitment, so a wrong length would otherwise panic the + // verifier instead of cleanly rejecting the proof. + if bundle + .epochs + .iter() + .any(|e| e.reg_fini.len() != register::NUM_REGISTER_ADDRESSES) + { + return Ok(None); + } + + // Derived from the ELF for epoch 0, then from each epoch's bound fini. + let mut register_init = register::register_init_from_entry_point(elf.entry_point); + let mut epoch_roots: Vec = Vec::with_capacity(n); + let mut public_output: Vec = Vec::new(); + + for (index, epoch) in bundle.epochs.iter().enumerate() { + let is_final = index == n - 1; + let label = local_to_global::epoch_label(index as u64); + + if !verify_epoch( + &elf, + elf_bytes, + epoch, + ®ister_init, + is_final, + label, + opts, + ) { + return Ok(None); + } + + epoch_roots.push(epoch.l2g_root); + public_output.extend_from_slice(&epoch.public_output); + // Next epoch's init is this epoch's bound fini — the cross-epoch register + // (and x254) binding. A mismatched fini desyncs the next epoch's AIRs. + register_init = epoch.reg_fini.clone(); + } + + // Cross-epoch global memory: genesis rebuilt FROM THE ELF (+ private inputs), + // so the starting memory cannot be prover-chosen; the bus telescopes fini→init. + let all_boundaries: Vec> = + bundle.epochs.iter().map(|e| e.boundary.clone()).collect(); + if !verify_global( + &all_boundaries, + &bundle.global, + &elf, + elf_bytes, + &bundle.private_inputs, + opts, + ) { + return Ok(None); + } + + // Each epoch's committed L2G table is the same one the global proof used. + if !verify_l2g_commitment_binding(&epoch_roots, &bundle.global) { + return Ok(None); + } + + Ok(Some(public_output)) +} + +/// Convenience wrapper: prove then verify in one call (the original integrated API). +/// Returns `Ok(Some(public_output))` iff the continuation proves and verifies. +pub fn prove_and_verify_continuation( + elf_bytes: &[u8], + private_inputs: &[u8], + epoch_size_log2: u32, + opts: &ProofOptions, +) -> Result>, Error> { + let bundle = prove_continuation(elf_bytes, private_inputs, epoch_size_log2, opts)?; + verify_continuation(elf_bytes, &bundle, opts) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test_utils::asm_elf_bytes; + + // `test_commit_split` issues two Commit syscalls, one early and one late, so a + // small epoch puts the second commit in a later epoch. That epoch starts with + // x254 > 0 (the carried commit index), which exercises the cross-epoch commit + // indexing: both the COMMIT trace and the verifier's `compute_commit_bus_offset` + // index from the carried x254 rather than 0. Regression test for that fix. + #[test] + fn test_commit_across_epochs_verifies() { + let _ = env_logger::builder().is_test(true).try_init(); + let elf_bytes = asm_elf_bytes("test_commit_split"); + let expected_output: [u8; 4] = [0xAA, 0xBB, 0xCC, 0xDD]; + + let total = Executor::new(&Elf::load(&elf_bytes).unwrap(), vec![]) + .unwrap() + .run() + .unwrap() + .logs + .len(); + + // Both commits in a single 64-cycle epoch (x254 starts at 0). + let single = prove_and_verify_continuation( + &elf_bytes, + &[], + 6, + &ProofOptions::default_test_options(), + ) + .unwrap(); + assert_eq!(single.as_deref(), Some(&expected_output[..])); + assert!(total <= (1 << 6), "single-epoch log2 must cover the run"); + + // The late commit (only `halt` follows it) lands past the midpoint, so a + // 16-cycle epoch forces it into a later epoch where x254 is already 2. + // Prove first so we can assert the run actually split into >1 epoch — without + // this the test would silently pass even if it degraded to a single epoch. + let bundle = + prove_continuation(&elf_bytes, &[], 4, &ProofOptions::default_test_options()).unwrap(); + assert!( + bundle.num_epochs() > 1, + "16-cycle epochs must split the run into multiple epochs" + ); + let split = verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options()) + .unwrap(); + assert_eq!( + split.as_deref(), + Some(&expected_output[..]), + "commit in a later epoch must verify and aggregate to the same output" + ); + } + + // A memory-heavy multi-epoch continuation. `all_loadstore_32` is ~34 cycles, so + // `epoch_size_log2 = 3` (8 cycles) yields several intermediate epochs (each an + // exact power-of-two cycle count → no CPU padding rows) plus a final epoch. + #[test] + fn test_prove_and_verify_continuation() { + let _ = env_logger::builder().is_test(true).try_init(); + let elf_bytes = asm_elf_bytes("all_loadstore_32"); + let epoch_size_log2 = 3; + let epoch_size = 8; + // Guard against silent degradation: the program must be longer than one + // epoch, otherwise this collapses to a single final epoch and stops testing + // the cross-epoch (intermediate-epoch) path. + let total = Executor::new(&Elf::load(&elf_bytes).unwrap(), vec![]) + .unwrap() + .run() + .unwrap() + .logs + .len(); + assert!( + total > epoch_size, + "program too short ({total} cycles) to exercise intermediate epochs" + ); + assert!( + prove_and_verify_continuation( + &elf_bytes, + &[], + epoch_size_log2, + &ProofOptions::default_test_options() + ) + .unwrap() + .is_some() + ); + } + + // Regression for touched-cell prediction from carried registers. A syscall + // whose operand pointers live in registers (ECSM reads a0/a1/a2) can have those + // registers set in an EARLIER epoch than the call. `test_ecsm_split` sets + // a0/a1/a2 at the very start and runs the ECSM ~46 cycles later; + // `epoch_size_log2 = 5` (32 cycles) puts the pointer setup in epoch 0 and the + // ecall in epoch 1. The per-epoch touched-cell pass must carry registers across + // the boundary — otherwise it reads the pointers as 0, mispredicts the touched + // cells (and the ECSM operands), and the epoch cannot verify. + #[test] + fn test_ecsm_across_epochs_verifies() { + let _ = env_logger::builder().is_test(true).try_init(); + let elf_bytes = asm_elf_bytes("test_ecsm_split"); + let total = Executor::new(&Elf::load(&elf_bytes).unwrap(), vec![]) + .unwrap() + .run() + .unwrap() + .logs + .len(); + assert!(total > 32, "the ECSM ecall must fall past the first epoch"); + let out = prove_and_verify_continuation( + &elf_bytes, + &[], + 5, + &ProofOptions::default_test_options(), + ) + .unwrap(); + assert!( + out.is_some(), + "an ECSM whose pointer registers were set in an earlier epoch must still verify" + ); + } + + // Guards that the continuation API takes `epoch_size_log2` directly. A log2 of + // 4 produces 16-cycle epochs over the 33-cycle `test_commit_split`, putting its + // two commits in different epochs and exercising the cross-epoch x254 carry. + #[test] + fn test_continuation_epoch_size_log2() { + let _ = env_logger::builder().is_test(true).try_init(); + let elf_bytes = asm_elf_bytes("test_commit_split"); + let out = prove_and_verify_continuation( + &elf_bytes, + &[], + 4, + &ProofOptions::default_test_options(), + ) + .unwrap(); + assert_eq!(out.as_deref(), Some(&[0xAA, 0xBB, 0xCC, 0xDD][..])); + } + + #[test] + fn test_continuation_rejects_too_small_epoch_size_log2() { + assert!(matches!( + prove_continuation(&[], &[], 1, &ProofOptions::default_test_options()), + Err(Error::InvalidContinuationEpochSize(_)) + )); + } + + // ---- Standalone (split) prover/verifier ---- + + // Round-trip: a bundle from prove_continuation verifies on its own (only the + // bundle + ELF) and reconstructs the exact run-wide output. + #[test] + fn test_split_verify_roundtrip() { + let _ = env_logger::builder().is_test(true).try_init(); + let elf_bytes = asm_elf_bytes("test_commit_split"); + let bundle = + prove_continuation(&elf_bytes, &[], 4, &ProofOptions::default_test_options()).unwrap(); + let out = verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options()) + .unwrap(); + assert_eq!(out.as_deref(), Some(&[0xAA, 0xBB, 0xCC, 0xDD][..])); + } + + // A bundle survives a bincode round-trip and still verifies to the same output — + // the serialization path the CLI's `prove`/`verify --continuations` relies on. + #[test] + fn test_continuation_bincode_roundtrip() { + let _ = env_logger::builder().is_test(true).try_init(); + let elf_bytes = asm_elf_bytes("test_commit_split"); + let bundle = + prove_continuation(&elf_bytes, &[], 4, &ProofOptions::default_test_options()).unwrap(); + + let bytes = bincode::serialize(&bundle).unwrap(); + let restored: ContinuationProof = bincode::deserialize(&bytes).unwrap(); + + let out = verify_continuation(&elf_bytes, &restored, &ProofOptions::default_test_options()) + .unwrap(); + assert_eq!(out.as_deref(), Some(&[0xAA, 0xBB, 0xCC, 0xDD][..])); + } + + // Negative: dropping the final (halting) epoch must be rejected — the new last + // epoch is non-halting but the verifier builds it as `is_final` (HALT included), + // so it can't verify. Guards completeness / no-truncation. + #[test] + fn test_split_verify_rejects_dropped_last_epoch() { + let _ = env_logger::builder().is_test(true).try_init(); + let elf_bytes = asm_elf_bytes("all_loadstore_32"); + let mut bundle = + prove_continuation(&elf_bytes, &[], 3, &ProofOptions::default_test_options()).unwrap(); + assert!(bundle.epochs.len() >= 3, "need multiple epochs"); + bundle.epochs.pop(); + assert!( + verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options()) + .unwrap() + .is_none() + ); + } + + // Negative: reordering epochs must be rejected — each epoch proof is bound to its + // 1-based label (and register chain), so a swapped epoch fails to verify. Guards + // the trusted-enumeration ordering. + #[test] + fn test_split_verify_rejects_reordered_epochs() { + let _ = env_logger::builder().is_test(true).try_init(); + let elf_bytes = asm_elf_bytes("all_loadstore_32"); + let mut bundle = + prove_continuation(&elf_bytes, &[], 3, &ProofOptions::default_test_options()).unwrap(); + assert!(bundle.epochs.len() >= 3, "need multiple epochs"); + bundle.epochs.swap(0, 1); + assert!( + verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options()) + .unwrap() + .is_none() + ); + } + + // Negative: corrupting an epoch's bound final register file (R_{i+1}) must be + // rejected — the verifier derives the next epoch's INIT from it, so it no longer + // matches that epoch's committed preprocessed INIT. Guards the cross-epoch + // register binding (incl. x254). + #[test] + fn test_split_verify_rejects_tampered_register_fini() { + let _ = env_logger::builder().is_test(true).try_init(); + let elf_bytes = asm_elf_bytes("all_loadstore_32"); + let mut bundle = + prove_continuation(&elf_bytes, &[], 3, &ProofOptions::default_test_options()).unwrap(); + assert!( + bundle.epochs.len() >= 2, + "need a second epoch to chain into" + ); + bundle.epochs[0].reg_fini[0] ^= 1; + assert!( + verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options()) + .unwrap() + .is_none() + ); + } + + // Negative: a malformed bundle whose `reg_fini` has the wrong length must be + // rejected with `Ok(None)`, not panic the verifier. `reg_fini` is deserialized + // (untrusted) and indexed by `NUM_REGISTER_ADDRESSES` when building the + // preprocessed REGISTER commitment, so a short one would otherwise be an + // out-of-bounds panic in release builds. + #[test] + fn test_split_verify_rejects_malformed_register_fini_length() { + let _ = env_logger::builder().is_test(true).try_init(); + let elf_bytes = asm_elf_bytes("all_loadstore_32"); + let mut bundle = + prove_continuation(&elf_bytes, &[], 3, &ProofOptions::default_test_options()).unwrap(); + assert!(!bundle.epochs.is_empty()); + bundle.epochs[0].reg_fini.pop(); + assert!( + verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options()) + .unwrap() + .is_none() + ); + } + + // Negative: table_counts are bundle data. Inflating a positive count must be + // rejected before the verifier builds AIRs from the malformed shape. + #[test] + fn test_split_verify_rejects_inflated_epoch_table_count() { + let _ = env_logger::builder().is_test(true).try_init(); + let elf_bytes = asm_elf_bytes("all_loadstore_32"); + let mut bundle = + prove_continuation(&elf_bytes, &[], 8, &ProofOptions::default_test_options()).unwrap(); + bundle.epochs[0].table_counts.cpu += 1; + assert!( + verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options()) + .unwrap() + .is_none() + ); + } + + // Negative: the verifier rebuilds private-input genesis from bundle bytes. + // Changing those bytes after proving changes the global-memory preprocessed + // genesis commitment, so the standalone verifier must reject. + #[test] + fn test_split_verify_rejects_tampered_private_input_genesis() { + let _ = env_logger::builder().is_test(true).try_init(); + let elf_bytes = asm_elf_bytes("test_private_input_xpage"); + let private_inputs: Vec = (0u8..16).collect(); + let mut bundle = prove_continuation( + &elf_bytes, + &private_inputs, + 4, + &ProofOptions::default_test_options(), + ) + .unwrap(); + assert!( + verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options()) + .unwrap() + .is_some(), + "baseline must verify before tampering" + ); + + bundle.private_inputs[4] ^= 0xFF; + assert!( + verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options()) + .unwrap() + .is_none() + ); + } + + // Negative: verifier-side private inputs are deserialized/untrusted, so reject + // oversized bundles before rebuilding genesis page configs from them. + #[test] + fn test_split_verify_rejects_oversized_private_inputs() { + let _ = env_logger::builder().is_test(true).try_init(); + let elf_bytes = asm_elf_bytes("all_loadstore_32"); + let mut bundle = + prove_continuation(&elf_bytes, &[], 8, &ProofOptions::default_test_options()).unwrap(); + bundle.private_inputs = vec![0; MAX_PRIVATE_INPUT_SIZE as usize + 1]; + assert!(matches!( + verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options()), + Err(Error::InvalidTableCounts(_)) + )); + } + + // The bundle's `boundary` field is used only to rebuild the global AIRs' touched- + // PAGE set (genesis is recomputed from the ELF). The cross-epoch memory values + // live in the committed L2G traces, tied to the epoch proofs by + // `verify_l2g_commitment_binding` (exercised by test_split_verify_rejects_tampered_l2g_root + // below). Tampering a boundary value is therefore inconsequential; omitting/adding + // a touched page is caught by the GlobalMemory bus (unmatched fini / air count + // mismatch). So there is no meaningful "tamper a boundary value" negative test. + + // Negative: corrupting an epoch's claimed L2G table root must be rejected — + // `verify_l2g_commitment_binding` compares each epoch's `l2g_root` against the + // corresponding sub-proof root in the global proof, so a mismatched root causes + // the binding to fail. Guards the L2G root↔global commitment binding. + #[test] + fn test_split_verify_rejects_tampered_l2g_root() { + let _ = env_logger::builder().is_test(true).try_init(); + let elf_bytes = asm_elf_bytes("all_loadstore_32"); + let mut bundle = + prove_continuation(&elf_bytes, &[], 3, &ProofOptions::default_test_options()).unwrap(); + assert!( + bundle.epochs.len() >= 2, + "need multiple epochs to exercise the binding" + ); + bundle.epochs[0].l2g_root[0] ^= 0xFF; + assert!( + verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options()) + .unwrap() + .is_none() + ); + } +} diff --git a/prover/src/lib.rs b/prover/src/lib.rs index 81233d39f..143d1ead6 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -13,10 +13,12 @@ #[cfg(feature = "disk-spill")] pub mod auto_storage; pub mod constraints; +pub mod continuation; #[cfg(feature = "debug-checks")] mod debug_report; #[cfg(feature = "instruments")] pub mod instruments; +mod paged_mem; mod statement; pub mod tables; pub mod test_utils; @@ -37,7 +39,7 @@ use stark::storage_mode::StorageMode; use stark::traits::AIR; use stark::verifier::{IsStarkVerifier, Verifier}; -use crate::statement::absorb_statement; +use crate::statement::{StatementKind, absorb_statement}; pub use crate::tables::MaxRowsConfig; use crate::tables::bitwise; use crate::tables::decode; @@ -184,6 +186,13 @@ pub enum Error { Prover(String), /// Proof contains invalid table_counts (e.g. zero for a required table) InvalidTableCounts(String), + /// Continuation epoch size exponent is invalid. + InvalidContinuationEpochSize(String), + /// Continuation proof construction hit an internal invariant failure. + ContinuationInvariant(String), + /// A non-final continuation epoch contains the program-terminating + /// instruction. The terminating instruction must be in the final epoch. + HaltInNonFinalEpoch, } impl fmt::Display for Error { @@ -197,6 +206,18 @@ impl fmt::Display for Error { Error::Execution(msg) => write!(f, "execution error: {msg}"), Error::Prover(msg) => write!(f, "proving error: {msg}"), Error::InvalidTableCounts(msg) => write!(f, "invalid table_counts: {msg}"), + Error::InvalidContinuationEpochSize(msg) => { + write!(f, "invalid continuation epoch size: {msg}") + } + Error::ContinuationInvariant(msg) => { + write!(f, "continuation invariant failed: {msg}") + } + Error::HaltInNonFinalEpoch => { + write!( + f, + "the program-terminating instruction must be in the final epoch" + ) + } } } } @@ -234,6 +255,9 @@ pub(crate) struct VmAirs { pub register: VmAir, pub pages: Vec, pub memw_registers: Vec, + /// Whether the HALT table participates in this proof. False for intermediate + /// continuation epochs, which do not terminate the program. + pub include_halt: bool, // Auxiliary ALU / memory / CPU32 dispatch chips pub eqs: Vec, pub bytewises: Vec, @@ -247,7 +271,6 @@ impl VmAirs { let mut pairs: Vec> = vec![ (&self.bitwise, &mut traces.bitwise, &()), (&self.decode, &mut traces.decode, &()), - (&self.halt, &mut traces.halt, &()), (&self.commit, &mut traces.commit, &()), (&self.keccak, &mut traces.keccak, &()), (&self.keccak_rnd, &mut traces.keccak_rnd, &()), @@ -257,6 +280,9 @@ impl VmAirs { (&self.ecdas, &mut traces.ecdas, &()), (&self.register, &mut traces.register, &()), ]; + if self.include_halt { + pairs.push((&self.halt, &mut traces.halt, &())); + } for (air, trace) in self.cpus.iter().zip(traces.cpus.iter_mut()) { pairs.push((air, trace, &())); @@ -320,7 +346,6 @@ impl VmAirs { let mut refs: Vec<&dyn AIR> = vec![ &self.bitwise, &self.decode, - &self.halt, &self.commit, &self.keccak, &self.keccak_rnd, @@ -330,6 +355,9 @@ impl VmAirs { &self.ecdas, &self.register, ]; + if self.include_halt { + refs.push(&self.halt); + } for air in &self.cpus { refs.push(air); @@ -410,6 +438,7 @@ impl VmAirs { /// here. A wrong value is rejected, never silently accepted: it either /// mismatches the prover's committed precomputed root (an explicit /// verifier check) or yields diverging Fiat-Shamir challenges. + #[allow(clippy::too_many_arguments)] pub fn new( elf: &Elf, proof_options: &ProofOptions, @@ -417,7 +446,10 @@ impl VmAirs { page_configs: &[crate::tables::page::PageConfig], table_counts: &TableCounts, decode_commitment: Option, + include_halt: bool, + register_init: Option<&[u32]>, page_commitments: Option<&[(u64, Commitment)]>, + register_preprocessed: Option<(Commitment, usize)>, ) -> Self { let cpus: Vec<_> = (0..table_counts.cpu) .map(|i| create_cpu_air(proof_options).with_name(&format!("CPU[{}]", i))) @@ -471,10 +503,17 @@ impl VmAirs { let ecsm = create_ecsm_air(proof_options); let ec_scalar = create_ec_scalar_air(proof_options); let ecdas = create_ecdas_air(proof_options); - let register = create_register_air(proof_options).with_preprocessed( - register::preprocessed_commitment(proof_options, elf.entry_point), - register::NUM_PREPROCESSED_COLS, - ); + let register = if let Some((commitment, num_preprocessed_cols)) = register_preprocessed { + create_register_air(proof_options).with_preprocessed(commitment, num_preprocessed_cols) + } else { + let register_init = register_init + .map(<[u32]>::to_vec) + .unwrap_or_else(|| register::register_init_from_entry_point(elf.entry_point)); + create_register_air(proof_options).with_preprocessed( + register::preprocessed_commitment(proof_options, ®ister_init), + register::NUM_PREPROCESSED_COLS, + ) + }; // Every zero-init page shares one preprocessed commitment: OFFSET is // page-relative and INIT is all-zero, so it depends only on // (blowup, coset) — all fixed here. Compute it once (static const @@ -553,6 +592,7 @@ impl VmAirs { register, pages, memw_registers, + include_halt, eqs, bytewises, stores, @@ -597,6 +637,7 @@ pub(crate) fn replay_transcript_phase_a( /// which the caller should treat as verification failure. pub(crate) fn compute_commit_bus_offset( public_output: &[u8], + start_index: u64, z: &FieldElement, alpha: &FieldElement, ) -> Option> { @@ -607,13 +648,16 @@ pub(crate) fn compute_commit_bus_offset( let bus_id = FieldElement::::from(BusId::Commit as u64); let alpha_sq = alpha * alpha; - // fingerprint_i = z - (BusId::Commit + i·α + value_i·α²) + // fingerprint_i = z - (BusId::Commit + (start_index + i)·α + value_i·α²). + // `start_index` is the carried x254: 0 for a monolithic proof or the first + // epoch, nonzero for a continuation epoch whose commits continue a prior one. let mut fingerprints: Vec> = public_output .iter() .enumerate() .map(|(i, &value)| { + let global_index = start_index + i as u64; let linear_combination = bus_id - + (FieldElement::::from(i as u64) * alpha) + + (FieldElement::::from(global_index) * alpha) + (FieldElement::::from(value as u64) * alpha_sq); z - linear_combination }) @@ -639,10 +683,32 @@ pub(crate) fn compute_expected_commit_bus_balance( airs: &[&dyn AIR], proof: &MultiProof, public_output_bytes: &[u8], + start_index: u64, transcript: &mut DefaultTranscript, ) -> Option> { let (z, alpha) = replay_transcript_phase_a(airs, proof, transcript); - compute_commit_bus_offset(public_output_bytes, &z, &alpha) + compute_commit_bus_offset(public_output_bytes, start_index, &z, &alpha) +} + +/// Bind the final cross-epoch GlobalMemory proof to the per-epoch proofs. +/// +/// The final proof commits one local-to-global sub-table per epoch as its first +/// `N` tables, so `final_proof.proofs[i].lde_trace_main_merkle_root` is epoch +/// `i`'s L2G commitment. `epoch_l2g_roots[i]` is the same root as committed in +/// epoch `i`'s own proof. Equal roots prove the cross-epoch matching ran over +/// the very same L2G tables the epochs committed (shared commitments). +/// +/// Called by `continuation::verify_continuation`; also exercised by the +/// local-to-global bus tests. +pub(crate) fn verify_l2g_commitment_binding( + epoch_l2g_roots: &[Commitment], + final_proof: &MultiProof, +) -> bool { + final_proof.proofs.len() >= epoch_l2g_roots.len() + && epoch_l2g_roots + .iter() + .enumerate() + .all(|(i, root)| final_proof.proofs[i].lde_trace_main_merkle_root == *root) } // ============================================================================= @@ -772,6 +838,9 @@ pub fn prove_with_options_and_inputs( &traces.page_configs, &table_counts, None, + true, + None, + None, None, ); @@ -793,6 +862,7 @@ pub fn prove_with_options_and_inputs( let mut transcript = DefaultTranscript::::new(&[]); absorb_statement( &mut transcript, + StatementKind::Monolithic, elf_bytes, &traces.public_output_bytes, &table_counts, @@ -930,7 +1000,10 @@ pub fn verify_with_options( &page_configs, &vm_proof.table_counts, decode_commitment, + true, + None, page_commitments, + None, ); // Recompute the COMMIT output bus offset from VmProof.public_output. @@ -944,6 +1017,7 @@ pub fn verify_with_options( let mut transcript = DefaultTranscript::::new(&[]); absorb_statement( &mut transcript, + StatementKind::Monolithic, elf_bytes, &vm_proof.public_output, &vm_proof.table_counts, @@ -959,6 +1033,8 @@ pub fn verify_with_options( &air_refs, &vm_proof.proof, &vm_proof.public_output, + // Monolithic proof: commits are indexed from 0. + 0, &mut transcript_for_replay, ) { Some(balance) => balance, diff --git a/prover/src/paged_mem.rs b/prover/src/paged_mem.rs new file mode 100644 index 000000000..196d077bf --- /dev/null +++ b/prover/src/paged_mem.rs @@ -0,0 +1,190 @@ +//! Page-bucketed dense memory store: `page_base -> [T; PAGE_SIZE]`. +//! +//! The prover's per-cell memory bookkeeping (the local-to-global `provenance`, +//! and the carried memory `image`) is `O(footprint)` and held across the whole +//! run. A per-cell `HashMap` is wasteful for that: per cell it also stores the +//! 8-byte address key, hashing metadata, and ~30% empty load-factor slack. +//! +//! Measurement (ethrex 1-tx, `bench_continuation footprint`) showed the touched +//! footprint is ~98% two big *contiguous* blocks — i.e. dense. For dense data a +//! flat array indexed by offset is far cheaper: no keys, no hashing, no slack, +//! and cache-friendly. This stores one dense `[T; PAGE_SIZE]` array per touched +//! 256 KB page, in a small `Vec` sorted by page base (few entries — binary-search +//! lookup + sorted insert, no hashing at all; the bulk lives in the arrays). +//! +//! Unset cells read back as `fill` (the genesis/default value) — pages are +//! allocated filled — so callers that only `get`/`set` need no occupancy map. +//! An occupancy bitmap is tracked so [`PagedMem::iter`] can yield exactly the +//! cells that were explicitly `set`. + +use std::collections::HashMap; + +use crate::tables::page::DEFAULT_PAGE_SIZE; + +const WORD_BITS: usize = 64; +const OCC_WORDS: usize = DEFAULT_PAGE_SIZE / WORD_BITS; + +struct Page { + /// Dense values, length `DEFAULT_PAGE_SIZE`, initialized to `fill`. + data: Box<[T]>, + /// 1 bit per offset: set iff that offset was explicitly written via `set`. + occupied: Box<[u64]>, +} + +/// A dense, page-bucketed `addr -> T` store. Cheaper than a per-cell `HashMap` +/// when the touched addresses are contiguous. `get` on an unset cell returns +/// the `fill` value supplied at construction. +/// +/// The pages themselves are kept in a `Vec` sorted by base address (page bases +/// are sparse across the 64-bit space, so a flat Vec-by-page-number is +/// infeasible, but there are only a handful of touched pages, so binary-search +/// lookup + sorted insert are cheap — and no hashing). The bulk (the cells) +/// lives in each page's dense array. +pub struct PagedMem { + pages: Vec<(u64, Page)>, + fill: T, +} + +impl PagedMem { + /// Create an empty store. Unset cells read back as `fill`. + pub fn new(fill: T) -> Self { + Self { + pages: Vec::new(), + fill, + } + } + + #[inline] + fn split(addr: u64) -> (u64, usize) { + // DEFAULT_PAGE_SIZE is a power of two, so the mask isolates the offset. + let mask = DEFAULT_PAGE_SIZE as u64 - 1; + (addr & !mask, (addr & mask) as usize) + } + + /// Value at `addr`, or `fill` if never `set`. + #[inline] + pub fn get(&self, addr: u64) -> T { + let (base, off) = Self::split(addr); + match self.pages.binary_search_by_key(&base, |(b, _)| *b) { + Ok(i) => self.pages[i].1.data[off], + Err(_) => self.fill, + } + } + + /// Set `addr` to `val`, allocating its page (filled) on first touch. + #[inline] + pub fn set(&mut self, addr: u64, val: T) { + let (base, off) = Self::split(addr); + let i = match self.pages.binary_search_by_key(&base, |(b, _)| *b) { + Ok(i) => i, + Err(i) => { + self.pages.insert( + i, + ( + base, + Page { + data: vec![self.fill; DEFAULT_PAGE_SIZE].into_boxed_slice(), + occupied: vec![0u64; OCC_WORDS].into_boxed_slice(), + }, + ), + ); + i + } + }; + let page = &mut self.pages[i].1; + page.data[off] = val; + page.occupied[off / WORD_BITS] |= 1u64 << (off % WORD_BITS); + } + + /// Base addresses of the pages that hold at least one `set` cell, ascending. + /// (For a `DEFAULT_PAGE_SIZE`-aligned page, this equals `page_base_for_address` + /// of every cell in it, so it replaces `cells.keys().map(page_base)`.) + pub fn page_bases(&self) -> impl Iterator + '_ { + self.pages.iter().map(|(b, _)| *b) + } + + /// Number of cells that were explicitly `set`. + pub fn len(&self) -> usize { + self.pages + .iter() + .map(|(_, p)| { + p.occupied + .iter() + .map(|w| w.count_ones() as usize) + .sum::() + }) + .sum() + } + + /// True if no cell was ever `set`. + pub fn is_empty(&self) -> bool { + self.pages + .iter() + .all(|(_, p)| p.occupied.iter().all(|&w| w == 0)) + } + + /// Iterate `(addr, value)` over exactly the cells that were `set`. + pub fn iter(&self) -> impl Iterator + '_ { + self.pages.iter().flat_map(|(base, page)| { + let base = *base; + page.occupied + .iter() + .enumerate() + .flat_map(move |(w, &bits)| { + BitIter { bits }.map(move |b| { + let off = w * WORD_BITS + b; + (base + off as u64, page.data[off]) + }) + }) + }) + } +} + +/// A read-only initial-memory image: `addr -> byte`, with an iterator over the +/// bytes it holds. Implemented for both `HashMap` (the monolithic +/// prover's image) and [`PagedMem`] (the continuation's carried image), so +/// trace generation can consume either without changing the monolithic path. +pub trait ImageSource { + /// Byte at `addr`, or 0 if absent. + fn image_get(&self, addr: u64) -> u8; + /// Iterate `(addr, byte)` over every byte present in the image. + fn image_iter(&self) -> impl Iterator + '_; +} + +impl ImageSource for HashMap { + #[inline] + fn image_get(&self, addr: u64) -> u8 { + self.get(&addr).copied().unwrap_or(0) + } + fn image_iter(&self) -> impl Iterator + '_ { + self.iter().map(|(&addr, &byte)| (addr, byte)) + } +} + +impl ImageSource for PagedMem { + #[inline] + fn image_get(&self, addr: u64) -> u8 { + self.get(addr) + } + fn image_iter(&self) -> impl Iterator + '_ { + self.iter() + } +} + +/// Yields the set-bit indices of a 64-bit word, low to high. +struct BitIter { + bits: u64, +} + +impl Iterator for BitIter { + type Item = usize; + fn next(&mut self) -> Option { + if self.bits == 0 { + None + } else { + let b = self.bits.trailing_zeros() as usize; + self.bits &= self.bits - 1; // clear lowest set bit + Some(b) + } + } +} diff --git a/prover/src/statement.rs b/prover/src/statement.rs index 7935abe66..cca961be5 100644 --- a/prover/src/statement.rs +++ b/prover/src/statement.rs @@ -24,15 +24,34 @@ fn elf_digest(elf: &[u8]) -> [u8; 32] { h.finalize().into() } +/// Which statement is being bound. Selects the leading domain tag and whether an +/// epoch label is appended, so monolithic and continuation-epoch proofs share one +/// function while each starts with its own tag. `Monolithic` reproduces the +/// original encoding byte-for-byte (no label), so existing proofs are unaffected. +#[derive(Clone, Copy)] +pub(crate) enum StatementKind { + /// Whole-program (monolithic) proof. + Monolithic, + /// One continuation epoch proof, pinned to its position by `epoch_label`. + ContinuationEpoch { epoch_label: u64 }, +} + pub(crate) fn absorb_statement( t: &mut impl IsTranscript, + kind: StatementKind, elf_bytes: &[u8], public_output: &[u8], table_counts: &TableCounts, num_private_input_pages: usize, runtime_page_ranges: &[RuntimePageRange], ) { - t.append_bytes(DOMAIN_TAG); + // Leading domain tag — distinct per statement kind, so a monolithic proof and + // a continuation epoch proof can never share a transcript prefix. + let domain_tag = match kind { + StatementKind::Monolithic => DOMAIN_TAG, + StatementKind::ContinuationEpoch { .. } => CONTINUATION_EPOCH_TAG, + }; + t.append_bytes(domain_tag); // ELF: fixed 32-byte digest — no length prefix needed. t.append_bytes(&elf_digest(elf_bytes)); @@ -90,4 +109,29 @@ pub(crate) fn absorb_statement( t.append_bytes(&base.to_le_bytes()); t.append_bytes(&count.to_le_bytes()); } + + // Continuation epochs additionally bind their position (replay protection). + // Monolithic proofs append nothing here, so their encoding is unchanged. + if let StatementKind::ContinuationEpoch { epoch_label } = kind { + t.append_bytes(&epoch_label.to_le_bytes()); + } +} + +/// Continuation domain tags. Distinct from the monolithic `DOMAIN_TAG` so a +/// monolithic proof and a continuation proof can never share a transcript prefix. +const CONTINUATION_EPOCH_TAG: &[u8] = b"LAMBDAVM_CONTINUATION_EPOCH_V1"; +const CONTINUATION_GLOBAL_TAG: &[u8] = b"LAMBDAVM_CONTINUATION_GLOBAL_V1"; + +/// Statement bound into the cross-epoch **global** proof's transcript before +/// Phase A: the ELF (so the global proof is program-bound) and the epoch count +/// (so a global proof from a run with a different number of epochs cannot be +/// spliced in). Prove and verify must call this with identical arguments. +pub(crate) fn absorb_continuation_global_statement( + t: &mut impl IsTranscript, + elf_bytes: &[u8], + num_epochs: usize, +) { + t.append_bytes(CONTINUATION_GLOBAL_TAG); + t.append_bytes(&elf_digest(elf_bytes)); + t.append_bytes(&(num_epochs as u64).to_le_bytes()); } diff --git a/prover/src/tables/global_memory.rs b/prover/src/tables/global_memory.rs new file mode 100644 index 000000000..b3f22b172 --- /dev/null +++ b/prover/src/tables/global_memory.rs @@ -0,0 +1,203 @@ +//! GLOBAL_MEMORY table for cross-epoch memory initialization and finalization. +//! +//! The cross-epoch analog of PAGE (`page.rs`): one dense table instance per +//! touched page, bookending the `GlobalMemory` bus that links each epoch's +//! local-to-global (`local_to_global.rs`) boundary claims. For every byte of the +//! page it **sends** a genesis token (the cell's program-start value) and +//! **receives** a finalization token (the cell's value after the last epoch that +//! touched it). Untouched bytes send and receive the identical token, so they +//! cancel — exactly as PAGE's init/fini bookend does on the epoch-local bus. +//! +//! Because the genesis value lives in a PREPROCESSED column (OFFSET + INIT, +//! byte-for-byte identical to PAGE's), the verifier recomputes the same +//! commitment from the ELF via [`page::compute_precomputed_commitment`]. This +//! binds the program's initial memory to the ELF binary. +//! +//! ## Columns +//! +//! | Column | Type | Description | +//! |--------|------|-------------| +//! | offset | RowIndex | 0, 1, ..., page_size-1 (preprocessed) | +//! | init | Byte | Genesis value (from ELF or 0) (preprocessed) | +//! | fini | Byte | Value after the last touching epoch | +//! | fini_epoch | Epoch | Last touching epoch (`GENESIS_EPOCH` if untouched) | +//! +//! Virtual: `address = page_base + offset`, `page_base` constant per instance. +//! +//! ## Bus Interactions +//! +//! GlobalMemory token: `[address_lo, address_hi, value, epoch]` (same order as +//! `local_to_global::bus_interactions`; no timestamp — the chain is ordered by epoch). +//! +//! | Tag | Bus | Token | Multiplicity | +//! |-----|-----|-------|--------------| +//! | GM-GENESIS | GlobalMemory | `[address, init, GENESIS]` | 1 (sender) | +//! | GM-FINAL | GlobalMemory | `[address, fini, fini_epoch]` | 1 (receiver) | + +use std::collections::HashMap; + +use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity}; +use stark::trace::TraceTable; + +use super::local_to_global::{GENESIS_EPOCH, direct}; +use super::page::{DEFAULT_PAGE_SIZE, PageConfig}; +use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; + +// ========================================================================= +// Column indices +// ========================================================================= + +/// Column definitions for the GLOBAL_MEMORY table. +/// +/// `address` is virtual, computed as `page_base + offset`; it is NOT a column. +pub mod cols { + /// offset: Row index (0, 1, ..., page_size-1) - preprocessed + pub const OFFSET: usize = 0; + + /// init: Genesis byte value (from ELF or 0) - preprocessed + pub const INIT: usize = 1; + + // Note: there is no init-epoch column. The genesis token always carries + // `GENESIS_EPOCH`, so the GM-GENESIS sender emits it as a constant (like L2G's + // `fini_epoch`), saving a column and removing a prover-chosen value. + + /// fini: Final byte value after the last touching epoch + pub const FINI: usize = 2; + + /// fini_epoch: Last epoch that touched the cell (`GENESIS_EPOCH` if untouched) + pub const FINI_EPOCH: usize = 3; + + // Note: no fini-timestamp column. The GlobalMemory bus carries no timestamp + // (the cross-epoch chain is ordered by epoch); timestamps are epoch-local. + + /// Total number of columns + pub const NUM_COLUMNS: usize = 4; +} + +/// Number of preprocessed columns (OFFSET, INIT). Identical to PAGE's preprocessed +/// columns, so the preprocessed commitment is shared with PAGE — compute it with +/// [`page::compute_precomputed_commitment`]. +pub const NUM_PREPROCESSED_COLS: usize = 2; + +// ========================================================================= +// Types +// ========================================================================= + +/// Final state for a single byte address after the last epoch that touched it. +#[derive(Debug, Clone, Copy, Default)] +pub struct FiniState { + /// Final byte value. + pub value: u8, + /// Index of the last epoch that touched the cell. + pub epoch: u64, +} + +/// Map from byte address to final state, for the bytes touched across all epochs. +pub type FiniStateMap = HashMap; + +// ========================================================================= +// Trace generation +// ========================================================================= + +/// Generates a GLOBAL_MEMORY trace for a single page. +/// +/// `config` supplies `page_base` and the genesis `init_values` (from the ELF); +/// `final_state` maps each touched byte to its final value and last-touch epoch. +pub fn generate_global_trace( + config: &PageConfig, + final_state: &FiniStateMap, +) -> TraceTable { + let page_size = DEFAULT_PAGE_SIZE; + let page_base = config.page_base; + + assert!( + page_base.is_multiple_of(page_size as u64), + "Page base must be page-aligned" + ); + + let num_rows = page_size; // One row per byte in the page + let mut data = vec![FE::zero(); num_rows * cols::NUM_COLUMNS]; + + for offset in 0..page_size { + let byte_addr = page_base + (offset as u64); + let base = offset * cols::NUM_COLUMNS; + + // Offset (preprocessed) - address is virtual: page_base + offset + data[base + cols::OFFSET] = FE::from(offset as u64); + + // Genesis value (init_values may be shorter than the page → trailing zeros) + let init_value = config + .init_values + .as_ref() + .and_then(|v| v.get(offset).copied()) + .unwrap_or(0); + data[base + cols::INIT] = FE::from(init_value as u64); + + // Final state: if touched use it, otherwise the cell stays at genesis + // (fini=init, epoch=GENESIS) so its genesis/finalization tokens cancel. + let (fini_value, fini_epoch) = match final_state.get(&byte_addr) { + Some(state) => (state.value, state.epoch), + None => (init_value, GENESIS_EPOCH), + }; + + data[base + cols::FINI] = FE::from(fini_value as u64); + data[base + cols::FINI_EPOCH] = FE::from(fini_epoch); + } + + TraceTable::new_main(data, cols::NUM_COLUMNS, 1) +} + +// ========================================================================= +// Bus interactions +// ========================================================================= + +/// Creates the GlobalMemory bus interactions for a GLOBAL_MEMORY table. +/// +/// The token order matches `local_to_global::bus_interactions` exactly: +/// `[address_lo, address_hi, value, epoch]` (no timestamp — the cross-epoch chain +/// is ordered by epoch). The address is computed as `page_base + offset` via a +/// linear combination, like PAGE. +/// +/// - GM-GENESIS: sends `[address, init, GENESIS]` — the token an L2G +/// init-receiver consumes for a genesis-origin cell. +/// - GM-FINAL: receives `[address, fini, fini_epoch]` — the token the +/// last touching epoch's L2G fini-sender produces. +pub fn bus_interactions(page_base: u64) -> Vec { + let page_base_lo = page_base & 0xFFFF_FFFF; + let page_base_hi = page_base >> 32; + + let address_lo = BusValue::linear(vec![ + LinearTerm::Constant(page_base_lo as i64), + LinearTerm::Column { + coefficient: 1, + column: cols::OFFSET, + }, + ]); + let address_hi = BusValue::constant(page_base_hi); + + vec![ + // GM-GENESIS: send the genesis token [address, init, GENESIS]. No timestamp: + // the GlobalMemory chain is ordered by epoch (timestamps are epoch-local). + BusInteraction::sender( + BusId::GlobalMemory, + Multiplicity::One, + vec![ + address_lo.clone(), + address_hi.clone(), + direct(cols::INIT), + BusValue::constant(GENESIS_EPOCH), + ], + ), + // GM-FINAL: receive the finalization token [address, fini, fini_epoch]. + BusInteraction::receiver( + BusId::GlobalMemory, + Multiplicity::One, + vec![ + address_lo, + address_hi, + direct(cols::FINI), + direct(cols::FINI_EPOCH), + ], + ), + ] +} diff --git a/prover/src/tables/local_to_global.rs b/prover/src/tables/local_to_global.rs new file mode 100644 index 000000000..3f9221d57 --- /dev/null +++ b/prover/src/tables/local_to_global.rs @@ -0,0 +1,831 @@ +//! Local-to-global memory boundary claims for cross-epoch continuations. +//! +//! Each epoch, for every memory cell it touches, +//! makes an `init` claim (the cell's value when first touched this epoch, which +//! earlier epoch last wrote it, and that write's timestamp) and a `fini` claim +//! (the cell's value at this epoch's end, this epoch's index, and the last +//! access timestamp). A final LogUp matches each `fini` against the `init` of the +//! next epoch that touches the same cell, proving global memory consistency. +//! +//! ## Epoch labels +//! +//! Epochs are labelled 1-based (epoch index `i` → label `i+1`) and the genesis +//! sentinel is `0` ([`GENESIS_EPOCH`]). This makes "the originating epoch is +//! strictly earlier" a plain `init_epoch < fini_epoch` — genesis (`0`) is below +//! every real epoch, so it needs no special case. +//! +//! ## Ordering constraint +//! +//! The GlobalMemory LogUp only proves the init/fini tokens *match as a set*; it +//! does not by itself force the chain to be consumed in increasing-epoch order. +//! Without that, a prover could let an init consume a *later* epoch's fini (a +//! backward/self edge), seeding a cell with an unjustified value. So each real +//! row also proves `init_epoch < fini_epoch` via an `IsB20` lookup on +//! `fini_epoch − 1 − init_epoch` (it must be a valid 20-bit value). This bounds +//! the number of epochs to `< 2^20` (~1M) — unreachable in practice (optimal +//! epochs are millions of cycles, so thousands of epochs) and fails closed. +//! +//! ## Range-checked columns +//! +//! A column needs an explicit range check only if nothing else already pins it. +//! Most L2G columns travel on the epoch-local `Memory` bus and are matched there +//! against MEMW, which already range/order-checks address, timestamp and value — +//! exactly how PAGE relies on MEMW in the monolithic prover. So `address` and +//! `fini_timestamp` are plain 32-bit columns with no extra check, and the value +//! bytes get the same batched `AreBytes` check PAGE uses (the `init` value is a +//! trusted source, so it must be checked). `fini_epoch` is the same constant for +//! every row of an epoch's table, so it is supplied as a per-table constant (not +//! a column) by [`bus_interactions`]. +//! +//! The only column that lives ONLY on the cross-epoch `GlobalMemory` bus has no +//! MEMW partner: `init_epoch`. It is stored as two 16-bit halfword columns, each +//! checked via `IsHalfword`, and the 32-bit bus value is rebuilt from them by a +//! linear combination (see [`word`]). The checks are emitted on the epoch-local +//! table (which has the BITWISE provider); the global proof commits the identical +//! trace (the commitment binding compares their roots), so it inherits the same +//! guarantee. There is no `init_timestamp` column: timestamps are epoch-local, and +//! the cross-epoch chain is ordered by epoch. +//! +//! ## Padding +//! +//! Real rows carry `MU = 1`; the power-of-two padding rows carry `MU = 0`. Every +//! interaction uses `Multiplicity::Column(MU)`, so padding rows fire nothing — +//! we never rely on token self-cancellation (this is the standard pattern used +//! by every variable-length table). `MU` is self-enforced: dropping a real row +//! (`MU = 0`) breaks its telescoping link → bus imbalance. + +use std::collections::HashMap; + +use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing}; +use stark::trace::TraceTable; + +use super::bitwise::{BitwiseOperation, BitwiseOperationType}; +use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; +use crate::paged_mem::PagedMem; + +/// Per-cell provenance: `(last_writer_epoch, value, timestamp)`. Unset cells read +/// back as the genesis default `(GENESIS_EPOCH, 0, 0)`. +type Provenance = PagedMem<(u64, u64, u64)>; + +/// Sentinel `originating_epoch` for cells whose value comes from the program's +/// initial memory — no prior epoch wrote them. Chosen as `0`, below every real +/// (1-based) epoch label, so `init_epoch < fini_epoch` holds for genesis cells. +pub const GENESIS_EPOCH: u64 = 0; + +/// Maximum number of epochs a continuation run may have. +/// +/// The cross-epoch ordering check proves `init_epoch < fini_epoch` via an `IsB20` +/// (20-bit) lookup on `fini_epoch - 1 - init_epoch`. A genesis-sourced cell +/// finalized in epoch `index` (0-based) has gap `index`, so every epoch must +/// satisfy `index < 2^20`. A run needing more epochs cannot be proved — the +/// IsB20 bus would not balance — so the driver rejects it up front (see +/// `prove_continuation`). +pub const MAX_EPOCHS: u64 = 1 << 20; + +/// A cell's state when an epoch first touches it. +#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +pub struct InitClaim { + /// Value the cell held when this epoch first touched it. + pub value: u64, + /// Epoch that last wrote the cell (or [`GENESIS_EPOCH`]). + pub originating_epoch: u64, + /// Timestamp of that originating write. + pub timestamp: u64, +} + +/// A cell's state at the end of the epoch that touched it. +#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +pub struct FiniClaim { + /// Value the cell holds at this epoch's end. + pub value: u64, + /// This epoch's label (1-based). + pub epoch: u64, + /// Last access timestamp for the cell this epoch. + pub timestamp: u64, +} + +/// The init/fini boundary claims for a single touched cell. +#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +pub struct CellBoundary { + pub address: u64, + pub init: InitClaim, + pub fini: FiniClaim, +} + +/// One epoch's touched cells, each as `(address, end_value, end_timestamp)`. +pub type EpochTouches = Vec<(u64, u64, u64)>; + +/// Convert a 0-based epoch index into its 1-based table label. +pub fn epoch_label(epoch_index: u64) -> u64 { + epoch_index + 1 +} + +/// Compute the sparse per-epoch boundary claims. +/// +/// `initial_memory` maps each address to its program-start value (originating +/// epoch [`GENESIS_EPOCH`], timestamp 0). `epochs[e]` lists the cells touched in +/// epoch `e` with their end value and end timestamp. Returns, per epoch, the +/// boundary claims for exactly the cells that epoch touched (sparse): each +/// cell's `init` is taken from the previous epoch that wrote it, and its `fini` +/// records this epoch (1-based label) as the new writer. +pub fn epoch_boundaries( + initial_memory: &HashMap, + epochs: &[EpochTouches], +) -> Vec> { + // provenance[addr] = (last_writer_epoch, value, timestamp) + let mut provenance = genesis_provenance(initial_memory.iter().map(|(&a, &v)| (a, v))); + + let mut result = Vec::with_capacity(epochs.len()); + for (epoch, touched) in epochs.iter().enumerate() { + result.push(epoch_boundary( + &mut provenance, + epoch_label(epoch as u64), + touched, + )); + } + result +} + +/// One epoch's boundaries, taking `init` from the running `provenance` (the cell's +/// last writer) and updating `provenance` with this epoch's `fini`. `epoch` is the +/// 1-based label. This is the per-epoch step of [`epoch_boundaries`], exposed so +/// the streaming continuation prover can build each epoch's table incrementally +/// without all epochs at once. +pub fn epoch_boundary( + provenance: &mut Provenance, + epoch: u64, + touched: &[(u64, u64, u64)], +) -> Vec { + let mut boundaries = Vec::with_capacity(touched.len()); + for &(address, end_value, end_timestamp) in touched { + // Unset cells read back as the genesis default `(GENESIS_EPOCH, 0, 0)`. + let (originating_epoch, init_value, init_timestamp) = provenance.get(address); + boundaries.push(CellBoundary { + address, + init: InitClaim { + value: init_value, + originating_epoch, + timestamp: init_timestamp, + }, + fini: FiniClaim { + value: end_value, + epoch, + timestamp: end_timestamp, + }, + }); + provenance.set(address, (epoch, end_value, end_timestamp)); + } + boundaries +} + +/// Seed the provenance store from the program's initial memory (genesis cells), +/// supplied as an `(address, value)` iterator. The continuation prover feeds the +/// paged genesis image directly, avoiding an intermediate address→value map. +pub fn genesis_provenance(genesis: impl IntoIterator) -> Provenance { + let mut provenance = Provenance::new((GENESIS_EPOCH, 0, 0)); + for (addr, value) in genesis { + provenance.set(addr, (GENESIS_EPOCH, value, 0)); + } + provenance +} + +// ========================================================================= +// AIR trace columns +// ========================================================================= + +/// Column indices for the local-to-global table: one row per touched cell. +/// +/// `address` and `fini_timestamp` are plain 32-bit columns (matched on the Memory +/// bus against MEMW). The cross-epoch-only `init_epoch` is stored as 16-bit +/// halfword columns ([`RANGE_CHECKED_HALFWORDS`]), checked via `IsHalfword`, and +/// rebuilt into its 32-bit bus value via [`word`]. The value bytes get the +/// batched `AreBytes` check. `fini_epoch` is a per-table constant (not a column). +/// `MU` is the real-row selector / multiplicity. +pub mod cols { + /// address_lo: 32-bit; matched on the Memory bus against MEMW. + pub const ADDRESS_LO: usize = 0; + /// address_hi: 32-bit; matched on the Memory bus against MEMW. + pub const ADDRESS_HI: usize = 1; + + /// Init value: a single byte, like PAGE's `value`. + pub const INIT_VALUE: usize = 2; + + // Init epoch — GlobalMemory-bus only, range-checked: two halfwords + // (`init_epoch = INIT_EPOCH_0 + 2^16·INIT_EPOCH_1`). + pub const INIT_EPOCH_0: usize = 3; + pub const INIT_EPOCH_1: usize = 4; + + // Note: there is no init-timestamp column. Timestamps are epoch-local ordering + // scratch (the Memory-bus init token is seeded at ts=0); across epochs the chain + // is ordered by `init_epoch < fini_epoch`, so the GlobalMemory bus carries no + // timestamp at all (see `bus_interactions`). + + /// Fini value: a single byte. + pub const FINI_VALUE: usize = 5; + + /// fini_timestamp_lo: 32-bit; matched on the Memory bus against MEMW. + pub const FINI_TIMESTAMP_LO: usize = 6; + /// fini_timestamp_hi: 32-bit; matched on the Memory bus against MEMW. + pub const FINI_TIMESTAMP_HI: usize = 7; + + /// MU: real-row selector / LogUp multiplicity (1 on real rows, 0 on padding). + pub const MU: usize = 8; + + pub const NUM_COLUMNS: usize = 9; + + /// The halfword columns (cross-epoch-only quantities), in order — every column + /// that is `IsHalfword`-checked. + pub const RANGE_CHECKED_HALFWORDS: [usize; 2] = [INIT_EPOCH_0, INIT_EPOCH_1]; +} + +/// The two halfwords of an epoch label (genesis `0` or a small 1-based index, all +/// well under 2^32). +fn epoch_halfwords(epoch: u64) -> [u64; 2] { + debug_assert!(epoch < (1 << 32), "epoch label exceeds 32 bits"); + [epoch & 0xFFFF, (epoch >> 16) & 0xFFFF] +} + +// ========================================================================= +// Trace generation +// ========================================================================= + +/// Build the local-to-global trace: one row per touched cell's boundary claims, +/// padded up to a power of two. Real rows set `MU = 1`; padding rows stay all-zero +/// (`MU = 0`), so they fire no interactions. +pub fn generate_local_to_global_trace( + boundaries: &[CellBoundary], +) -> TraceTable { + let num_rows = boundaries.len().next_power_of_two().max(1); + let mut data = vec![FE::zero(); num_rows * cols::NUM_COLUMNS]; + + for (row, b) in boundaries.iter().enumerate() { + let base = row * cols::NUM_COLUMNS; + let init_epoch = epoch_halfwords(b.init.originating_epoch); + + // Plain 32-bit columns (MEMW-checked on the Memory bus). + data[base + cols::ADDRESS_LO] = FE::from(b.address & 0xFFFF_FFFF); + data[base + cols::ADDRESS_HI] = FE::from(b.address >> 32); + data[base + cols::FINI_TIMESTAMP_LO] = FE::from(b.fini.timestamp & 0xFFFF_FFFF); + data[base + cols::FINI_TIMESTAMP_HI] = FE::from(b.fini.timestamp >> 32); + // Byte values (AreBytes-checked). + data[base + cols::INIT_VALUE] = FE::from(b.init.value & 0xFF); + data[base + cols::FINI_VALUE] = FE::from(b.fini.value & 0xFF); + // Cross-epoch-only quantity as IsHalfword-checked halfwords. + data[base + cols::INIT_EPOCH_0] = FE::from(init_epoch[0]); + data[base + cols::INIT_EPOCH_1] = FE::from(init_epoch[1]); + // Real-row selector. + data[base + cols::MU] = FE::one(); + } + + TraceTable::new_main(data, cols::NUM_COLUMNS, 1) +} + +// ========================================================================= +// Bus interactions +// ========================================================================= + +/// A 32-bit value reconstructed from its two halfword columns: `lo + 2^16·hi`. +fn word(lo_col: usize, hi_col: usize) -> BusValue { + BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: lo_col, + }, + LinearTerm::Column { + coefficient: 1 << 16, + column: hi_col, + }, + ]) +} + +/// A column read directly as a single field element (a 32-bit word or a byte). +pub(crate) fn direct(column: usize) -> BusValue { + BusValue::Packed { + start_column: column, + packing: Packing::Direct, + } +} + +fn mu() -> Multiplicity { + Multiplicity::Column(cols::MU) +} + +/// Cross-epoch memory bus interactions, two per row (one touched cell): +/// - **receive** the `init` token `(address, value, originating_epoch)` left by the +/// epoch that last wrote the cell; +/// - **send** the `fini` token `(address, value, epoch_label)` for the next epoch +/// that touches the cell. +/// +/// `fini_epoch` is the per-table constant `epoch_label`; `init_epoch` comes from the +/// range-checked halfword columns via [`word`]; `address` is direct 32-bit columns. +/// No timestamp is carried: the chain is ordered by epoch, and timestamps are +/// epoch-local (only the Memory bus, not this one, uses them). +/// +/// These tokens are matched ACROSS epochs by the final aggregation LogUp (step 4), +/// so within a single epoch's table the GlobalMemory bus is deliberately +/// unbalanced (real rows have `init != fini`). Padding rows fire nothing (`MU = 0`). +pub fn bus_interactions(epoch_label: u64) -> Vec { + vec![ + // init: receive the token left by the originating epoch. No timestamp: the + // chain is ordered by epoch, and timestamps are epoch-local (see cols). + BusInteraction::receiver( + BusId::GlobalMemory, + mu(), + vec![ + direct(cols::ADDRESS_LO), + direct(cols::ADDRESS_HI), + direct(cols::INIT_VALUE), + word(cols::INIT_EPOCH_0, cols::INIT_EPOCH_1), + ], + ), + // fini: send the token for the next epoch to consume. + BusInteraction::sender( + BusId::GlobalMemory, + mu(), + vec![ + direct(cols::ADDRESS_LO), + direct(cols::ADDRESS_HI), + direct(cols::FINI_VALUE), + BusValue::constant(epoch_label), + ], + ), + ] +} + +/// Epoch-LOCAL memory bus interactions, mirroring PAGE-C3/C4 (`page.rs`). +/// +/// Inside an epoch proof the L2G table bookends the epoch's `Memory` bus for the +/// RAM bytes it touches: it receives each cell's initial token at timestamp 0 +/// (the epoch-start seed, matching the first MEMW read's `old_timestamp`) and +/// sends its final token at the last access timestamp. This replaces PAGE's +/// init/fini bookend for touched bytes. The `Memory` token layout is +/// `[is_register, address_lo, address_hi, timestamp_lo, timestamp_hi, value]`; +/// RAM only, so `is_register = 0`, and the byte value is the LO column. +/// +/// Address, fini timestamp and the values appear here, so MEMW range-checks them +/// for us — they need no L2G range check (see [`range_check_interactions`]). +pub fn memory_bus_interactions() -> Vec { + vec![ + // init: receive the cell's initial token at the epoch-start seed (ts = 0). + BusInteraction::receiver( + BusId::Memory, + mu(), + vec![ + BusValue::constant(0), + direct(cols::ADDRESS_LO), + direct(cols::ADDRESS_HI), + BusValue::constant(0), + BusValue::constant(0), + direct(cols::INIT_VALUE), + ], + ), + // fini: send the cell's final token at the last access timestamp. + BusInteraction::sender( + BusId::Memory, + mu(), + vec![ + BusValue::constant(0), + direct(cols::ADDRESS_LO), + direct(cols::ADDRESS_HI), + direct(cols::FINI_TIMESTAMP_LO), + direct(cols::FINI_TIMESTAMP_HI), + direct(cols::FINI_VALUE), + ], + ), + ] +} + +/// Range-check + ordering bus interactions for the columns nothing else +/// constrains, all with multiplicity `MU` (so padding fires none): +/// - one `AreBytes` for the two value bytes (the `init` value is a trusted source); +/// - one `IsHalfword` per cross-epoch-only halfword column; +/// - one `IsB20` proving `init_epoch < fini_epoch` (the ordering constraint), via +/// `fini_epoch − 1 − init_epoch` being a valid 20-bit value. With genesis epoch +/// `0` this also covers genesis cells (`0 < fini_epoch`) with no special case. +/// +/// Address and fini timestamp are NOT here — MEMW checks them on the Memory bus. +/// These are committed only on the epoch-local table (`l2g_memory_air`), whose +/// proof carries the BITWISE provider; the global proof commits the identical +/// trace, so its columns inherit the same guarantee via the commitment binding. +/// Keep this in sync with [`collect_bitwise_from_l2g`]. +pub fn range_check_interactions(epoch_label: u64) -> Vec { + let mut interactions = Vec::with_capacity(2 + cols::RANGE_CHECKED_HALFWORDS.len()); + interactions.push(BusInteraction::sender( + BusId::AreBytes, + mu(), + vec![direct(cols::INIT_VALUE), direct(cols::FINI_VALUE)], + )); + for &column in &cols::RANGE_CHECKED_HALFWORDS { + interactions.push(BusInteraction::sender( + BusId::IsHalfword, + mu(), + vec![direct(column)], + )); + } + // Ordering: IsB20[epoch_label - 1 - init_epoch], where + // init_epoch = INIT_EPOCH_0 + 2^16·INIT_EPOCH_1. + interactions.push(BusInteraction::sender( + BusId::IsB20, + mu(), + vec![BusValue::linear(vec![ + LinearTerm::Constant(epoch_label as i64 - 1), + LinearTerm::Column { + coefficient: -1, + column: cols::INIT_EPOCH_0, + }, + LinearTerm::Column { + coefficient: -(1 << 16), + column: cols::INIT_EPOCH_1, + }, + ])], + )); + interactions +} + +/// The BITWISE lookups the L2G range checks + ordering check send, so the BITWISE +/// table's multiplicities balance the [`range_check_interactions`] senders. Emits, +/// per real row, one `AreBytes`, one `IsHalfword` per cross-epoch halfword, and one +/// `IsB20` for the ordering difference. Padding rows fire nothing (`MU = 0`), so +/// none are emitted for them. +pub fn collect_bitwise_from_l2g(boundaries: &[CellBoundary]) -> Vec { + let per_row = 2 + cols::RANGE_CHECKED_HALFWORDS.len(); + let mut ops = Vec::with_capacity(boundaries.len() * per_row); + + let push_halfword = |ops: &mut Vec, v16: u64| { + ops.push(BitwiseOperation::halfword( + BitwiseOperationType::IsHalf, + (v16 & 0xFF) as u8, + ((v16 >> 8) & 0xFF) as u8, + )); + }; + + for b in boundaries { + ops.push(BitwiseOperation::byte_op( + BitwiseOperationType::AreBytes, + (b.init.value & 0xFF) as u8, + (b.fini.value & 0xFF) as u8, + )); + let init_epoch = epoch_halfwords(b.init.originating_epoch); + for v in init_epoch { + push_halfword(&mut ops, v); + } + // Ordering: IsB20[fini_epoch - 1 - init_epoch]. Honest rows have + // init_epoch < fini_epoch, so the difference is a small non-negative value. + let diff = b.fini.epoch - 1 - b.init.originating_epoch; + debug_assert!(diff < MAX_EPOCHS, "epoch gap exceeds IsB20 range"); + ops.push(BitwiseOperation::b20( + (diff & 0xFF) as u8, + ((diff >> 8) & 0xFF) as u8, + ((diff >> 16) & 0xF) as u8, + )); + } + + ops +} + +#[cfg(test)] +mod tests { + use super::*; + + fn find(epoch: &[CellBoundary], address: u64) -> &CellBoundary { + epoch + .iter() + .find(|b| b.address == address) + .expect("address not found in epoch boundaries") + } + + #[test] + fn test_sparse_only_touched_cells() { + let initial_memory = HashMap::from([(10, 5)]); + let epochs = vec![ + vec![(10, 7, 3), (20, 9, 4)], // epoch 0 touches 10 and 20 + vec![(10, 8, 10)], // epoch 1 touches only 10 + vec![(20, 9, 20)], // epoch 2 touches only 20 + ]; + let boundaries = epoch_boundaries(&initial_memory, &epochs); + + assert_eq!(boundaries.len(), 3); + // Only touched cells appear, nothing else. + assert_eq!(boundaries[0].len(), 2); + assert_eq!(boundaries[1].len(), 1); + assert_eq!(boundaries[2].len(), 1); + assert_eq!(boundaries[1][0].address, 10); + assert_eq!(boundaries[2][0].address, 20); + } + + #[test] + fn test_genesis_init_for_first_touch() { + let initial_memory = HashMap::from([(10, 5)]); + let epochs = vec![vec![(10, 7, 3), (20, 9, 4)]]; + let boundaries = epoch_boundaries(&initial_memory, &epochs); + + // Cell 10 starts from program memory: value 5, genesis epoch, ts 0. + let c10 = find(&boundaries[0], 10); + assert_eq!( + c10.init, + InitClaim { + value: 5, + originating_epoch: GENESIS_EPOCH, + timestamp: 0, + } + ); + // Cell 20 was never in initial memory: genesis, value 0. + let c20 = find(&boundaries[0], 20); + assert_eq!( + c20.init, + InitClaim { + value: 0, + originating_epoch: GENESIS_EPOCH, + timestamp: 0, + } + ); + } + + #[test] + fn test_fini_records_current_epoch_label_and_timestamp() { + let initial_memory = HashMap::from([(10, 5)]); + let epochs = vec![vec![(10, 7, 3)], vec![(10, 8, 10)]]; + let boundaries = epoch_boundaries(&initial_memory, &epochs); + + // Labels are 1-based: epoch index 0 → label 1, index 1 → label 2. + assert_eq!( + find(&boundaries[0], 10).fini, + FiniClaim { + value: 7, + epoch: 1, + timestamp: 3, + } + ); + assert_eq!( + find(&boundaries[1], 10).fini, + FiniClaim { + value: 8, + epoch: 2, + timestamp: 10, + } + ); + } + + #[test] + fn test_telescoping_consecutive_epochs() { + let initial_memory = HashMap::from([(10, 5)]); + let epochs = vec![vec![(10, 7, 3)], vec![(10, 8, 10)]]; + let boundaries = epoch_boundaries(&initial_memory, &epochs); + + // Epoch 0's fini for cell 10 is consumed as epoch 1's init. + let fini0 = find(&boundaries[0], 10).fini; + let init1 = find(&boundaries[1], 10).init; + assert_eq!(fini0.value, init1.value); + assert_eq!(fini0.epoch, init1.originating_epoch); + assert_eq!(fini0.timestamp, init1.timestamp); + // Concretely: epoch 0 (label 1) left (7, label 1, ts 3). + assert_eq!( + init1, + InitClaim { + value: 7, + originating_epoch: 1, + timestamp: 3, + } + ); + // And init_epoch (1) < fini_epoch (2), the ordering invariant. + assert!(init1.originating_epoch < find(&boundaries[1], 10).fini.epoch); + } + + #[test] + fn test_telescoping_skips_untouched_epochs() { + // Cell 20 is touched in epoch 0, skipped in epoch 1, touched again in 2. + let initial_memory = HashMap::new(); + let epochs = vec![ + vec![(20, 9, 4)], // epoch 0 writes 20 + vec![(10, 1, 5)], // epoch 1 does not touch 20 + vec![(20, 9, 20)], // epoch 2 touches 20 again + ]; + let boundaries = epoch_boundaries(&initial_memory, &epochs); + + // Epoch 2's init for cell 20 links straight back to epoch 0 (label 1). + let fini0 = find(&boundaries[0], 20).fini; + let init2 = find(&boundaries[2], 20).init; + assert_eq!(init2.originating_epoch, 1); + assert_eq!(init2.value, fini0.value); + assert_eq!(init2.timestamp, fini0.timestamp); + } + + fn sample_boundary(address: u64) -> CellBoundary { + CellBoundary { + address, + init: InitClaim { + value: 0x1_0000_0005, + originating_epoch: GENESIS_EPOCH, + timestamp: 0, + }, + fini: FiniClaim { + value: 0x2_0000_0007, + epoch: 1, + timestamp: 0x3_0000_0009, + }, + } + } + + /// Reconstruct a 32-bit value from its two halfword columns, as the bus does. + fn word_value( + trace: &TraceTable, + lo: usize, + hi: usize, + ) -> FE { + *trace.main_table.get(0, lo) + FE::from(1u64 << 16) * *trace.main_table.get(0, hi) + } + + #[test] + fn test_num_columns() { + assert_eq!(cols::NUM_COLUMNS, 9); + assert_eq!(cols::RANGE_CHECKED_HALFWORDS.len(), 2); + } + + #[test] + fn test_columns_hold_the_split_values() { + let b = sample_boundary(0x4_0000_0001); + let trace = generate_local_to_global_trace(&[b]); + + assert_eq!(trace.num_rows(), 1); + + let lo32 = |v: u64| FE::from(v & 0xFFFF_FFFF); + let hi32 = |v: u64| FE::from(v >> 32); + let byte = |v: u64| FE::from(v & 0xFF); + let at = |c: usize| *trace.main_table.get(0, c); + + // Address and fini timestamp are plain 32-bit columns (MEMW-checked). + assert_eq!(at(cols::ADDRESS_LO), lo32(b.address)); + assert_eq!(at(cols::ADDRESS_HI), hi32(b.address)); + assert_eq!(at(cols::FINI_TIMESTAMP_LO), lo32(b.fini.timestamp)); + assert_eq!(at(cols::FINI_TIMESTAMP_HI), hi32(b.fini.timestamp)); + // Values are stored as single bytes. + assert_eq!(at(cols::INIT_VALUE), byte(b.init.value)); + assert_eq!(at(cols::FINI_VALUE), byte(b.fini.value)); + // The cross-epoch-only quantity reconstructs from its halfwords. + // Genesis init epoch reconstructs to 0 (== GENESIS_EPOCH). + assert_eq!( + word_value(&trace, cols::INIT_EPOCH_0, cols::INIT_EPOCH_1), + FE::from(GENESIS_EPOCH) + ); + // Real row carries MU = 1. + assert_eq!(at(cols::MU), FE::one()); + } + + #[test] + fn test_padding_rows_are_zero_including_mu() { + // 3 boundaries pad up to 4 rows; the padding row is all zero, MU = 0. + let boundaries: Vec = (0..3).map(sample_boundary).collect(); + let trace = generate_local_to_global_trace(&boundaries); + assert_eq!(trace.num_rows(), 4); + for col in 0..cols::NUM_COLUMNS { + assert_eq!(*trace.main_table.get(3, col), FE::zero()); + } + // And real rows have MU = 1. + for row in 0..3 { + assert_eq!(*trace.main_table.get(row, cols::MU), FE::one()); + } + } + + #[test] + fn test_empty_trace_is_padded_to_one_row() { + let trace = generate_local_to_global_trace(&[]); + assert_eq!(trace.num_rows(), 1); + for col in 0..cols::NUM_COLUMNS { + assert_eq!(*trace.main_table.get(0, col), FE::zero()); + } + } + + #[test] + fn test_bus_interactions() { + let interactions = bus_interactions(1); + assert_eq!(interactions.len(), 2); // init (receive) + fini (send) + + let global_memory = u64::from(BusId::GlobalMemory); + let init = &interactions[0]; + let fini = &interactions[1]; + + // init consumes the originating epoch's token; fini produces this epoch's. + assert!(!init.is_sender); + assert!(fini.is_sender); + assert_eq!(init.bus_id, global_memory); + assert_eq!(fini.bus_id, global_memory); + + // Both tokens have the same 4-element shape so they can match across + // epochs: address(lo,hi), value(byte), epoch. No timestamp — the chain is + // ordered by epoch, and timestamps are epoch-local. + assert_eq!(init.values.len(), 4); + assert_eq!(fini.values.len(), 4); + } + + #[test] + fn test_range_check_interactions_cover_every_column() { + let interactions = range_check_interactions(1); + // 1 AreBytes + one IsHalfword per cross-epoch halfword + 1 IsB20 ordering. + assert_eq!(interactions.len(), 2 + cols::RANGE_CHECKED_HALFWORDS.len()); + let are_bytes = u64::from(BusId::AreBytes); + let is_halfword = u64::from(BusId::IsHalfword); + let is_b20 = u64::from(BusId::IsB20); + assert_eq!(interactions[0].bus_id, are_bytes); + assert_eq!(interactions[0].values.len(), 2); + for interaction in &interactions[1..1 + cols::RANGE_CHECKED_HALFWORDS.len()] { + assert!(interaction.is_sender); + assert_eq!(interaction.bus_id, is_halfword); + assert_eq!(interaction.values.len(), 1); + } + let ordering = interactions.last().unwrap(); + assert!(ordering.is_sender); + assert_eq!(ordering.bus_id, is_b20); + } + + #[test] + fn test_collect_bitwise_matches_sender_count() { + // Per row: 1 AreBytes + one IsHalfword per cross-epoch halfword + 1 IsB20. + // No padding ops (padding has MU = 0 and fires nothing). + let boundaries: Vec = (0..3).map(sample_boundary).collect(); + let ops = collect_bitwise_from_l2g(&boundaries); + let per_row = 2 + cols::RANGE_CHECKED_HALFWORDS.len(); + assert_eq!(ops.len(), boundaries.len() * per_row); + + let count = |t: BitwiseOperationType| ops.iter().filter(|o| o.lookup_type == t).count(); + assert_eq!(count(BitwiseOperationType::AreBytes), boundaries.len()); + assert_eq!( + count(BitwiseOperationType::IsHalf), + boundaries.len() * cols::RANGE_CHECKED_HALFWORDS.len() + ); + assert_eq!(count(BitwiseOperationType::IsB20), boundaries.len()); + } + + #[test] + fn test_collect_bitwise_values_match_the_committed_halfword_columns() { + // Each IsHalfword op the collector emits must carry the same value as the + // corresponding halfword column the range-check sender reads. Use a + // boundary with distinct values, and a real (>=1) originating epoch. + let b = CellBoundary { + address: 0x1234_5678_9abc_def0, + init: InitClaim { + value: 0xAB, + originating_epoch: 3, + timestamp: 0x4455_6677_8899_aabb, + }, + fini: FiniClaim { + value: 0xCD, + epoch: 9, + timestamp: 0xccdd_eeff_0011_2233, + }, + }; + let trace = generate_local_to_global_trace(&[b]); + let ops = collect_bitwise_from_l2g(&[b]); + + // The single AreBytes op carries the two value bytes. + assert_eq!(ops[0].lookup_type, BitwiseOperationType::AreBytes); + assert_eq!(ops[0].x as u64, b.init.value & 0xFF); + assert_eq!(ops[0].y as u64, b.fini.value & 0xFF); + + // The IsHalfword ops follow, in RANGE_CHECKED_HALFWORDS order, each + // matching the value committed in that column. + for (i, &col) in cols::RANGE_CHECKED_HALFWORDS.iter().enumerate() { + let op = &ops[1 + i]; + assert_eq!(op.lookup_type, BitwiseOperationType::IsHalf); + let op_value = op.x as u64 + ((op.y as u64) << 8); + assert_eq!( + FE::from(op_value), + *trace.main_table.get(0, col), + "IsHalfword op {i} value disagrees with column {col}" + ); + } + + // The last op is the ordering IsB20 of `fini_epoch - 1 - init_epoch`. + let ordering = ops.last().unwrap(); + assert_eq!(ordering.lookup_type, BitwiseOperationType::IsB20); + let value = ordering.x as u64 + ((ordering.y as u64) << 8) + ((ordering.z as u64) << 16); + assert_eq!(value, b.fini.epoch - 1 - b.init.originating_epoch); + } + + #[test] + fn test_ordering_rejects_future_reference() { + // The ordering sender computes the field value `fini_epoch - 1 - init_epoch`. + // For an honest row (init_epoch < fini_epoch) it's a small valid IsB20 value; + // for a forged FUTURE reference (init_epoch >= fini_epoch) it underflows in + // the field to a value far outside [0, 2^20), so no IsB20 row matches and the + // bus cannot balance. + let order_value = |fini_label: u64, init_epoch: u64| -> FE { + FE::from(fini_label - 1) - FE::from(init_epoch) + }; + + // Honest: epoch 5 consuming epoch 2's fini → 5 - 1 - 2 = 2, in range. + let honest = order_value(5, 2); + assert!(*honest.value() < (1 << 20)); + + // Forged future reference: epoch 5's init claims originating epoch 9. + let forged = order_value(5, 9); + assert!( + *forged.value() >= (1 << 20), + "a future-epoch reference must fall outside the IsB20 range" + ); + + // Forged self reference: epoch 5's init claims originating epoch 5. + // 5 - 1 - 5 = -1 in the field → also out of range. + let self_ref = order_value(5, 5); + assert!(*self_ref.value() >= (1 << 20)); + } +} diff --git a/prover/src/tables/mod.rs b/prover/src/tables/mod.rs index 50bc399af..78d5f9a6a 100644 --- a/prover/src/tables/mod.rs +++ b/prover/src/tables/mod.rs @@ -33,11 +33,13 @@ pub mod ec_scalar; pub mod ecdas; pub mod ecsm; pub mod eq; +pub mod global_memory; pub mod halt; pub mod keccak; pub mod keccak_rc; pub mod keccak_rnd; pub mod load; +pub mod local_to_global; pub mod lt; pub mod memw; pub mod memw_aligned; diff --git a/prover/src/tables/register.rs b/prover/src/tables/register.rs index 5a09fb2fa..09485595a 100644 --- a/prover/src/tables/register.rs +++ b/prover/src/tables/register.rs @@ -28,6 +28,9 @@ use stark::proof::options::ProofOptions; use stark::prover::evaluate_polynomial_on_lde_domain; use stark::trace::{TraceTable, columns2rows}; +#[cfg(test)] +use executor::vm::registers::Registers; + use super::page::STACK_TOP; use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField, VmTable}; @@ -48,11 +51,21 @@ pub const WORDS_PER_REGISTER: usize = 2; /// -1 because x254 is single-word (1 address instead of 2). pub const NUM_REGISTER_ADDRESSES: usize = NUM_REGISTERS * WORDS_PER_REGISTER - 1; -/// Number of preprocessed columns (OFFSET, INIT). +/// Number of preprocessed columns (OFFSET, INIT) for the monolithic prover. /// OFFSET encodes the Word address, INIT holds the initial value. /// Program-dependent: x255 init = ELF entry point. pub const NUM_PREPROCESSED_COLS: usize = 2; +/// Number of preprocessed columns (OFFSET, INIT, FINI) for continuation epochs. +/// A continuation epoch additionally preprocesses FINI so the epoch's final +/// register file becomes a verifier-known public value (`R_{i+1}`): the verifier +/// recomputes the commitment from it, the REG-C2 Memory-bus token forces it to +/// equal the true final registers, and the next epoch reuses the same `R_{i+1}` +/// as its preprocessed INIT — binding `init(epoch i+1) == fini(epoch i)` with no +/// extra bus. The monolithic prover keeps FINI as a main-trace column (it has no +/// verifier-known final state), using `NUM_PREPROCESSED_COLS` instead. +pub const NUM_PREPROCESSED_COLS_WITH_FINI: usize = 3; + // ========================================================================= // Column indices for REGISTER table // ========================================================================= @@ -114,8 +127,22 @@ fn register_word_address_list() -> [u64; NUM_REGISTER_ADDRESSES] { addrs } +// Positions of the non-general-purpose registers within a register-init vector +// (indexed in `register_word_address_list` order). x0-x31 occupy positions 0..63 +// (position `i` is word address `i`), so register `r`'s two words are at `2r`, `2r+1`. +/// Position of x254 (synthetic commit index, word address 508). +pub(crate) const X254_INDEX: usize = 64; +/// Position of x255 (PC) low word (word address 510). +pub(crate) const PC_LO_INDEX: usize = 65; +/// Position of x255 (PC) high word (word address 511). +pub(crate) const PC_HI_INDEX: usize = 66; + /// Compute the initial value for a register Word address. /// +/// This is the **program-start** register image, so it only applies to the first +/// continuation epoch (or a whole-program run). Later epochs start mid-execution +/// and supply their own boundary register snapshot instead. +/// /// - SP (x2) words at offset 4,5 hold STACK_TOP /// - x254 at offset 508 is the synthetic commit index, initialized to 0 /// - PC (x255) words at offset 510,511 hold entry_point @@ -130,6 +157,48 @@ fn init_value_for_address(word_addr: u64, entry_point: u64) -> u32 { } } +/// Build the register init vector (one initial value per row, in +/// `register_word_address_list` order) for a program starting at `entry_point` +/// (the program-start register image). A continuation epoch would instead supply +/// its boundary register snapshot. +pub(crate) fn register_init_from_entry_point(entry_point: u64) -> Vec { + register_word_address_list() + .iter() + .map(|&addr| init_value_for_address(addr, entry_point)) + .collect() +} + +/// Build the register init map from an epoch's boundary register snapshot: the +/// executor `Registers` (x1-x31, including SP) plus the program counter (x255). +/// x0 and the synthetic commit index (x254) are zero in the naive version. +/// +/// Used by tests that build a single epoch from a boundary snapshot. The +/// continuation prover no longer uses this for chaining: epoch i+1's register +/// init comes from epoch i's *bound* fini (`fini_from_trace`, carried as the next +/// epoch's preprocessed INIT), not a trusted executor snapshot. +#[cfg(test)] +pub(crate) fn register_init_from_snapshot(registers: &Registers, pc: u64) -> Vec { + let mut init = vec![0u32; NUM_REGISTER_ADDRESSES]; + for reg in 0u8..32 { + let value = if reg == 0 { + 0 + } else { + registers.read(reg as u32).unwrap_or(0) + }; + let base = (reg as usize) * 2; + init[base] = (value & 0xFFFF_FFFF) as u32; + init[base + 1] = (value >> 32) as u32; + } + // x254 synthetic commit index, hardcoded to 0 in this test-only helper, so it + // is only correct for an epoch with no preceding COMMIT. The production path + // carries x254 across epochs via the previous epoch's bound FINI vector, not + // this snapshot helper. + init[X254_INDEX] = 0; + init[PC_LO_INDEX] = (pc & 0xFFFF_FFFF) as u32; + init[PC_HI_INDEX] = (pc >> 32) as u32; + init +} + /// Generates the REGISTER trace table. /// /// Creates a table with NUM_REGISTER_ADDRESSES rows. @@ -139,14 +208,15 @@ fn init_value_for_address(word_addr: u64, entry_point: u64) -> u32 { /// ## Arguments /// /// * `final_state` - Map from register Word address to final (timestamp, value) -/// * `entry_point` - ELF entry point (initial PC value for x255) +/// * `init` - Initial value per row, in `register_word_address_list` order +/// (program-start image, or an epoch's boundary register snapshot) /// /// ## Returns /// /// The trace table for registers. pub fn generate_register_trace( final_state: &FinalRegisterStateMap, - entry_point: u64, + init: &[u32], ) -> TraceTable { let num_rows = NUM_REGISTER_ADDRESSES.next_power_of_two(); let mut trace = TraceTable::new_main( @@ -161,7 +231,7 @@ pub fn generate_register_trace( // Offset = actual Word address in register space table.set_u64(row, cols::OFFSET, word_addr); - let init_value = init_value_for_address(word_addr, entry_point); + let init_value = init.get(row).copied().unwrap_or(0); table.set_word(row, cols::INIT, init_value); // Final state: if accessed use final, otherwise use initial (timestamp 1) @@ -186,6 +256,18 @@ pub fn generate_register_trace( trace } +/// Extract the per-register final values (`R_{i+1}`) from a committed REGISTER +/// trace: reads `FINI` on the real rows (the first `NUM_REGISTER_ADDRESSES`) into +/// a vector in `register_word_address_list` order — entry `i` is the final value +/// of the register at `register_word_address_list()[i]`. This is the epoch's final +/// register file; the continuation builds this epoch's preprocessed FINI +/// commitment from it and reuses it as the next epoch's preprocessed INIT. +pub fn fini_from_trace(trace: &TraceTable) -> Vec { + (0..NUM_REGISTER_ADDRESSES) + .map(|row| trace.main_table.get(row, cols::FINI).to_raw() as u32) + .collect() +} + // ========================================================================= // Preprocessed commitment // ========================================================================= @@ -195,21 +277,55 @@ pub fn generate_register_trace( /// Program-dependent: x255 (PC) init = entry_point. /// OFFSET encodes the Word address (0..63 for x0-x31, 508 for x254, 510-511 for x255). /// INIT holds the initial value (SP=STACK_TOP, PC=entry_point, rest=0). -pub fn compute_precomputed_commitment(options: &ProofOptions, entry_point: u64) -> Commitment { +pub fn compute_precomputed_commitment(options: &ProofOptions, init: &[u32]) -> Commitment { + let num_rows = NUM_REGISTER_ADDRESSES.next_power_of_two(); + let addr_list = register_word_address_list(); + + let mut offset_col = vec![FE::zero(); num_rows]; + let mut init_col = vec![FE::zero(); num_rows]; + + for i in 0..NUM_REGISTER_ADDRESSES { + offset_col[i] = FE::from(addr_list[i]); + init_col[i] = FE::from(init.get(i).copied().unwrap_or(0) as u64); + } + + commit_register_columns(options, vec![offset_col, init_col]) +} + +/// Continuation variant: commits OFFSET + INIT + FINI, so the verifier recomputes +/// the commitment from the public `init` (`R_i`) and `fini` (`R_{i+1}`) and the +/// proof's FINI column is locked to `R_{i+1}`. `fini` is the vector produced by +/// `fini_from_trace` (entry `i` = the register at `register_word_address_list()[i]`). +/// Used by continuation epochs with `NUM_PREPROCESSED_COLS_WITH_FINI`; must match +/// the column order of the REGISTER trace (OFFSET, INIT, FINI), and FINI on padding +/// rows is 0 (as the trace builds it). +pub fn compute_precomputed_commitment_with_fini( + options: &ProofOptions, + init: &[u32], + fini: &[u32], +) -> Commitment { + debug_assert_eq!(fini.len(), NUM_REGISTER_ADDRESSES); let num_rows = NUM_REGISTER_ADDRESSES.next_power_of_two(); let addr_list = register_word_address_list(); let mut offset_col = vec![FE::zero(); num_rows]; let mut init_col = vec![FE::zero(); num_rows]; + let mut fini_col = vec![FE::zero(); num_rows]; for i in 0..NUM_REGISTER_ADDRESSES { - let word_addr = addr_list[i]; - offset_col[i] = FE::from(word_addr); - init_col[i] = FE::from(init_value_for_address(word_addr, entry_point) as u64); + offset_col[i] = FE::from(addr_list[i]); + init_col[i] = FE::from(init.get(i).copied().unwrap_or(0) as u64); + fini_col[i] = FE::from(fini[i] as u64); } - let columns = [offset_col, init_col]; + commit_register_columns(options, vec![offset_col, init_col, fini_col]) +} +/// LDE + bit-reverse + Merkle-commit the given preprocessed columns (in column +/// order). Shared by the monolithic (OFFSET, INIT) and continuation +/// (OFFSET, INIT, FINI) preprocessed commitments. +fn commit_register_columns(options: &ProofOptions, columns: Vec>) -> Commitment { + let num_rows = NUM_REGISTER_ADDRESSES.next_power_of_two(); let polys: Vec> = columns .iter() .map(|col| { @@ -241,8 +357,8 @@ pub fn compute_precomputed_commitment(options: &ProofOptions, entry_point: u64) /// Returns the preprocessed commitment for the REGISTER table. /// /// Program-dependent (entry_point varies per ELF), so not globally cached. -pub fn preprocessed_commitment(options: &ProofOptions, entry_point: u64) -> Commitment { - compute_precomputed_commitment(options, entry_point) +pub fn preprocessed_commitment(options: &ProofOptions, init: &[u32]) -> Commitment { + compute_precomputed_commitment(options, init) } // ========================================================================= diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 99a0ded51..f3ca090d7 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -56,6 +56,7 @@ use super::keccak::{self, KeccakOperation}; use super::keccak_rc; use super::keccak_rnd::{self, KeccakRoundOperation}; use super::load::{self, LoadOperation}; +use super::local_to_global; use super::lt::{self, LtOperation}; use super::memw::{self, MemwOperation}; use super::memw_aligned; @@ -67,6 +68,7 @@ use super::shift::{self, ShiftOperation}; use super::store; use super::types::{GoldilocksExtension, GoldilocksField}; use crate::Error; +use crate::paged_mem::{ImageSource, PagedMem}; // ============================================================================= // Memory and Register State Tracking @@ -80,35 +82,30 @@ type RegisterCell = (u64, u64); /// Memory state tracker for generating MEMW/LOAD traces. struct MemoryState { - /// Map from byte address to (value, timestamp) - cells: HashMap, + /// Per byte-address `(value, timestamp)`, as a dense per-page store. This is + /// the hot structure — `read_byte`/`write_byte` hit it on every memory access + /// during the replay, and it's rebuilt each epoch — so a per-page array (small + /// page-map lookup + dense indexing, no per-cell hashing or rehash-on-grow) + /// is both lighter and faster than a per-cell `HashMap`. + cells: PagedMem, } impl MemoryState { fn new() -> Self { Self { - cells: HashMap::new(), + cells: PagedMem::new((0, 0)), } } - /// Initialize memory state from ELF segments. + /// Initialize memory state from a pre-built initial-memory image. /// - /// Pre-populates all ELF bytes with timestamp=0 so that when MEMW first + /// Pre-populates all starting bytes with timestamp=0 so that when MEMW first /// accesses an address, it gets the correct initial value for `old_value`. /// This is required for the Memory bus to balance (MEMW-M1 must match PAGE-C3). - fn from_elf(elf: &Elf) -> Self { - let mut cells = HashMap::new(); - for segment in &elf.data { - for (i, &word) in segment.values.iter().enumerate() { - let word_addr = segment.base_addr.wrapping_add(i as u64 * 4); - // Split 32-bit word into 4 bytes (little-endian) - for byte_offset in 0..4u64 { - let byte_addr = word_addr.wrapping_add(byte_offset); - let byte_value = ((word >> (byte_offset * 8)) & 0xFF) as u8; - // Initial state: value from ELF, timestamp=0 - cells.insert(byte_addr, (byte_value, 0)); - } - } + fn from_image(image: &I) -> Self { + let mut cells = PagedMem::new((0, 0)); + for (addr, value) in image.image_iter() { + cells.set(addr, (value, 0)); } Self { cells } } @@ -121,30 +118,18 @@ impl MemoryState { "page_size must be a power of two for the bitmask to work" ); let mask = !(page_size - 1); - let pages: HashSet = self.cells.keys().map(|&a| a & mask).collect(); + let pages: HashSet = self.cells.iter().map(|(a, _)| a & mask).collect(); pages.len() as u64 } - /// Pre-populate the private input memory region at `PRIVATE_INPUT_START_INDEX`. - fn add_private_input(&mut self, private_input: &[u8]) { - if private_input.is_empty() { - return; - } - use executor::vm::memory::PRIVATE_INPUT_START_INDEX; - let start = PRIVATE_INPUT_START_INDEX; - for (i, &b) in private_input_bytes(private_input).iter().enumerate() { - self.cells.insert(start + i as u64, (b, 0)); - } - } - /// Read a byte from memory. Returns (value, timestamp) or (0, 0) if never written. fn read_byte(&self, address: u64) -> MemoryCell { - self.cells.get(&address).copied().unwrap_or((0, 0)) + self.cells.get(address) } /// Write a byte to memory with the given timestamp. fn write_byte(&mut self, address: u64, value: u8, timestamp: u64) { - self.cells.insert(address, (value, timestamp)); + self.cells.set(address, (value, timestamp)); } /// Read multiple bytes. Returns arrays of values and timestamps. @@ -193,6 +178,28 @@ impl RegisterState { } } + /// Seed register state from a register init vector (one value per row, in + /// `register_word_address_list` order), so the first access in a continuation + /// epoch reads the epoch's boundary register values as `old_value`. All initial + /// timestamps are 1, matching the REGISTER table's init token. Mirrors + /// `MemoryState::from_image`. + fn from_init(init: &[u32]) -> Self { + let word = |pos: usize| init.get(pos).copied().unwrap_or(0) as u64; + let mut regs = [(0u64, 1u64); 32]; + for (reg, slot) in regs.iter_mut().enumerate() { + let base = reg * 2; + *slot = (word(base) | (word(base + 1) << 32), 1); + } + Self { + regs, + index_register: (init.get(register::X254_INDEX).copied().unwrap_or(0), 1), + pc_register: ( + word(register::PC_LO_INDEX) | (word(register::PC_HI_INDEX) << 32), + 1, + ), + } + } + /// Read a register. Returns (value, last_write_timestamp). fn read(&self, reg: u8) -> RegisterCell { self.regs[reg as usize] @@ -386,7 +393,12 @@ fn collect_ops_from_cpu( let mut ecsm_ops = Vec::new(); let mut ec_scalar_ops = Vec::new(); let mut ecdas_ops = Vec::new(); - let mut current_commit_index = 0u32; + // Seed from the carried x254 (0 for a monolithic run or the first epoch) so a + // continuation epoch indexes its commits globally, matching the x254 the + // register binding transports across epochs. Resetting to 0 here would drift + // from x254 and break the COMMIT chip's Memw token (see the drift assert below). + let start_commit_index = register_state.read_index().0; + let mut current_commit_index = start_commit_index; let mut commit_ecall_count = 0u32; for op in cpu_ops { @@ -510,10 +522,11 @@ fn collect_ops_from_cpu( bitwise_ops.extend(op.collect_bitwise_ops()); } - // Each ecall generates count+1 operations (count real rows + 1 end row) + // Each ecall generates count+1 operations (count real rows + 1 end row). + // Count only this epoch's rows, so subtract the carried start index. debug_assert_eq!( commit_ops.len(), - current_commit_index as usize + commit_ecall_count as usize, + (current_commit_index - start_commit_index) as usize + commit_ecall_count as usize, "commit_ops count should match accumulated commit index plus end rows" ); @@ -1058,8 +1071,15 @@ fn collect_commit_memw_ops( let old_value = [old_index as u64, 0, 0, 0, 0, 0, 0, 0]; let new_value = [new_index as u64, 0, 0, 0, 0, 0, 0, 0]; let old_timestamps = [old_ts, 0, 0, 0, 0, 0, 0, 0]; - let memw_op = MemwOperation::new(true, 508, new_value, ts, 1, true) - .with_old(old_value, old_timestamps); + let memw_op = MemwOperation::new( + true, + register::register_base_address(254), + new_value, + ts, + 1, + true, + ) + .with_old(old_value, old_timestamps); memw_ops.push(memw_op); register_state.write_index(new_index, ts); } @@ -1830,67 +1850,130 @@ fn private_input_bytes(private_input: &[u8]) -> Vec { .collect() } -fn build_init_page_data(elf: &Elf, private_input: &[u8]) -> HashMap> { - use executor::vm::memory::PRIVATE_INPUT_START_INDEX; - let page_size = page::DEFAULT_PAGE_SIZE; - let mut init_page_data: HashMap> = HashMap::new(); +/// Build the initial-memory image (byte address -> value) from the ELF segments +/// and the private-input region. Single source of "what memory starts as", read +/// by both `MemoryState` seeding and PAGE/bitwise init. +pub(crate) fn build_initial_image(elf: &Elf, private_input: &[u8]) -> HashMap { + let mut image: HashMap = HashMap::new(); for segment in &elf.data { for (i, &word) in segment.values.iter().enumerate() { - let word_addr = segment.base_addr + (i as u64 * 4); + let word_addr = segment.base_addr.wrapping_add(i as u64 * 4); for byte_offset in 0..4u64 { - let byte_addr = word_addr + byte_offset; + let byte_addr = word_addr.wrapping_add(byte_offset); let byte_value = ((word >> (byte_offset * 8)) & 0xFF) as u8; - let page_base = page::page_base_for_address(byte_addr); - let offset = page::offset_in_page(byte_addr); - let page_data = init_page_data - .entry(page_base) - .or_insert_with(|| vec![0u8; page_size]); - page_data[offset] = byte_value; + image.insert(byte_addr, byte_value); } } } if !private_input.is_empty() { + use executor::vm::memory::PRIVATE_INPUT_START_INDEX; for (i, &b) in private_input_bytes(private_input).iter().enumerate() { - let addr = PRIVATE_INPUT_START_INDEX + i as u64; - let page_base = page::page_base_for_address(addr); - let offset = page::offset_in_page(addr); - let page_data = init_page_data - .entry(page_base) - .or_insert_with(|| vec![0u8; page_size]); - page_data[offset] = b; + image.insert(PRIVATE_INPUT_START_INDEX + i as u64, b); } } - init_page_data + image } -fn collect_bitwise_from_page( +/// Build the initial-memory image as a dense per-page store instead of a +/// per-cell `HashMap`. Used by the streaming continuation, which carries the +/// image across all epochs (so its size matters); the byte values are identical +/// to [`build_initial_image`]. Unset cells read back as 0. +pub(crate) fn build_initial_image_paged(elf: &Elf, private_input: &[u8]) -> PagedMem { + let mut image = PagedMem::new(0u8); + for segment in &elf.data { + for (i, &word) in segment.values.iter().enumerate() { + let word_addr = segment.base_addr.wrapping_add(i as u64 * 4); + for byte_offset in 0..4u64 { + let byte_addr = word_addr.wrapping_add(byte_offset); + let byte_value = ((word >> (byte_offset * 8)) & 0xFF) as u8; + image.set(byte_addr, byte_value); + } + } + } + if !private_input.is_empty() { + use executor::vm::memory::PRIVATE_INPUT_START_INDEX; + for (i, &b) in private_input_bytes(private_input).iter().enumerate() { + image.set(PRIVATE_INPUT_START_INDEX + i as u64, b); + } + } + image +} + +/// Test helper for computing one epoch's local-to-global touched cells without +/// building every trace table. +#[cfg(test)] +pub(crate) fn epoch_touched_cells( elf: &Elf, + initial_image: &I, + register_init: &[u32], + logs: &[Log], +) -> Result, Error> { + let instructions = decode::instructions_from_elf(elf) + .map_err(|e| Error::Execution(format!("Failed to parse instructions: {e}")))?; + let cpu_ops = collect_cpu_ops(logs, &instructions)?; + + let mut memory_state = MemoryState::from_image(initial_image); + let mut register_state = RegisterState::from_init(register_init); + let _ = collect_ops_from_cpu(&cpu_ops, &mut memory_state, &mut register_state); + + Ok(touched_cells_from_memory_state(&memory_state)) +} + +fn touched_cells_from_memory_state(memory_state: &MemoryState) -> local_to_global::EpochTouches { + let mut touched: Vec<(u64, u64, u64)> = memory_state + .cells + .iter() + .filter(|(_, cell)| cell.1 > 0) + .map(|(addr, cell)| (addr, cell.0 as u64, cell.1)) + .collect(); + touched.sort_by_key(|&(addr, _, _)| addr); + touched +} + +/// Bucket an initial-memory image into per-page byte arrays for PAGE init columns. +pub(crate) fn build_init_page_data(image: &I) -> HashMap> { + let page_size = page::DEFAULT_PAGE_SIZE; + let mut init_page_data: HashMap> = HashMap::new(); + for (addr, value) in image.image_iter() { + let page_base = page::page_base_for_address(addr); + let offset = page::offset_in_page(addr); + let page_data = init_page_data + .entry(page_base) + .or_insert_with(|| vec![0u8; page_size]); + page_data[offset] = value; + } + init_page_data +} + +fn collect_bitwise_from_page( + image: &I, memory_state: &MemoryState, - private_input: &[u8], + exclude_touched: bool, ) -> Vec { use std::collections::BTreeSet; let page_size = page::DEFAULT_PAGE_SIZE; let mut bitwise_ops = Vec::new(); - let elf_page_data = build_init_page_data(elf, private_input); + let init_page_data = build_init_page_data(image); // Derive ALL page bases from memory_state (includes ELF + runtime pages) - let mut page_bases: BTreeSet = BTreeSet::new(); - for &addr in memory_state.cells.keys() { - page_bases.insert(page::page_base_for_address(addr)); - } + let page_bases: BTreeSet = memory_state.cells.page_bases().collect(); - // Build final state map from memory_state + // Build final state map from memory_state, matching `generate_page_tables`: + // when `exclude_touched`, touched cells (timestamp > 0) are dropped so PAGE + // emits `fini == init` for them, and the ARE_BYTES multiplicities here must + // agree (otherwise the AreBytes bus would not balance). let final_state: FinalStateMap = memory_state .cells .iter() - .map(|(&addr, &(value, timestamp))| (addr, FinalByteState { timestamp, value })) + .filter(|(_, cell)| !exclude_touched || cell.1 == 0) + .map(|(addr, (value, timestamp))| (addr, FinalByteState { timestamp, value })) .collect(); // For each page and each byte, add ARE_BYTES lookups for init and fini for &page_base in &page_bases { - let init_data = elf_page_data.get(&page_base); + let init_data = init_page_data.get(&page_base); for offset in 0..page_size { let addr = page_base + offset as u64; @@ -2008,13 +2091,10 @@ fn collect_bitwise_from_commit(commit_ops: &[CommitOperation]) -> Vec BitwiseOperation { BitwiseOperation::halfword( BitwiseOperationType::IsHalf, @@ -2345,30 +2425,32 @@ pub(crate) fn collect_bitwise_from_keccak(keccak_ops: &[KeccakOperation]) -> Vec /// every address accessed during execution (ELF init + runtime stores/loads). /// ELF pages get their init data from the binary; all others are zero-init. -fn generate_page_tables( - elf: &Elf, +fn generate_page_tables( + image: &I, memory_state: &MemoryState, private_input: &[u8], + exclude_touched: bool, ) -> ( Vec>, Vec, ) { use std::collections::BTreeSet; - // Collect init data from ELF segments + private input region - let init_page_data = build_init_page_data(elf, private_input); + // Per-page init bytes from the initial-memory image. + let init_page_data = build_init_page_data(image); // Derive ALL page bases from memory_state (includes ELF + runtime pages) - let mut page_bases: BTreeSet = BTreeSet::new(); - for &addr in memory_state.cells.keys() { - page_bases.insert(page::page_base_for_address(addr)); - } + let page_bases: BTreeSet = memory_state.cells.page_bases().collect(); - // Build final state map from memory_state + // Build final state map from memory_state. When `exclude_touched` (continuation + // epoch with L2G bookend), drop touched cells (timestamp > 0) so PAGE self- + // cancels them (init == fini, ts == 0) and the local-to-global table owns their + // Memory-bus init/fini instead. let final_state: FinalStateMap = memory_state .cells .iter() - .map(|(&addr, &(value, timestamp))| (addr, FinalByteState { timestamp, value })) + .filter(|(_, cell)| !exclude_touched || cell.1 == 0) + .map(|(addr, (value, timestamp))| (addr, FinalByteState { timestamp, value })) .collect(); // Generate PAGE tables and configs @@ -2481,6 +2563,14 @@ pub struct Traces { /// MEMW_R register-only fast-path traces (split into chunks of max_rows::MEMW_R) pub memw_registers: Vec>, + /// Local-to-global boundary table for continuation epochs. Empty unless the + /// continuation driver fills it with the boundary derived from + /// `touched_memory_cells`. + pub local_to_global: TraceTable, + /// Touched cells observed while replaying this epoch's logs, each as + /// `(address, end_value, end_timestamp)`. Populated only for continuation + /// epochs that use the L2G memory bookend. + pub touched_memory_cells: local_to_global::EpochTouches, // Auxiliary ALU / memory / CPU32 dispatch chips (split into chunks of their max_rows) pub eqs: Vec>, pub bytewises: Vec>, @@ -2569,11 +2659,16 @@ fn collect_all_ops( ec_scalar_ops: Vec, ecdas_ops: Vec, register_state: &mut RegisterState, + is_final: bool, ) -> CollectedOps { // HALT finalization: 33 register MEMW operations at timestamp u64::MAX. // Must come before Phase 3 (LT from MEMW) so HALT ops get timestamp checks. - let halt_memw_ops = collect_halt_ops(register_state); - memw_ops.extend(halt_memw_ops); + // Only the final epoch terminates; intermediate epochs keep their boundary + // register state (no zeroizing) so it can seed the next epoch. + if is_final { + let halt_memw_ops = collect_halt_ops(register_state); + memw_ops.extend(halt_memw_ops); + } // Route MEMW_R (register fast-path) first, then MEMW_A (aligned), rest → MEMW. // Order matters: register ops would also pass is_aligned_op, so check first. @@ -2707,20 +2802,23 @@ fn collect_all_ops( /// Phases 3-5: From routed ops, produce all traces and assemble `Traces`. /// -/// `elf` controls PAGE table generation: `Some(elf)` generates real PAGE tables -/// and PAGE bitwise lookups; `None` produces empty page tables. +/// `initial_image` controls PAGE table generation: `Some(image)` generates real +/// PAGE tables and PAGE bitwise lookups seeded from the initial-memory image; +/// `None` produces empty page tables. #[allow(clippy::too_many_arguments)] -fn build_traces( +fn build_traces( ops: CollectedOps, - elf: Option<&Elf>, + initial_image: Option<&I>, memory_state: &MemoryState, - entry_point: u64, + register_init: &[u32], decode_trace: TraceTable, decode_pc_to_row: decode::PcToRow, mut register_state: RegisterState, max_rows: &super::MaxRowsConfig, #[cfg(feature = "disk-spill")] storage_mode: StorageMode, private_input: &[u8], + is_final: bool, + l2g_memory_bookend: bool, ) -> Result { let CollectedOps { cpu_ops, @@ -2775,9 +2873,17 @@ fn build_traces( bitwise_ops.extend(collect_bitwise_from_memw_aligned(&memw_aligned_ops)); // MEMW_R sends IS_HALFWORD[timestamp_0 - old_timestamp_lo - 1] bitwise_ops.extend(collect_bitwise_from_memw_register(&memw_register_ops)); - // PAGE tables do a batched ARE_BYTES[init, fini] lookup per row (C1+C2) - if let Some(elf) = elf { - bitwise_ops.extend(collect_bitwise_from_page(elf, memory_state, private_input)); + // PAGE tables do a batched ARE_BYTES[init, fini] lookup per row (C1+C2). + // Continuation epochs (l2g_memory_bookend) skip PAGE entirely (see the + // generate_page_tables call below), so they skip its AreBytes lookups too. + if let Some(image) = initial_image + && !l2g_memory_bookend + { + bitwise_ops.extend(collect_bitwise_from_page( + image, + memory_state, + l2g_memory_bookend, + )); } let public_output_bytes: Vec = commit_ops @@ -2804,22 +2910,28 @@ fn build_traces( // PHASE 5: Generate final traces (parallelized) // ===================================================================== - // Extract halt timestamp from the last ECALL instruction - let halt_op = cpu_ops - .iter() - .rev() - .find(|op| op.decode.fields.ecall) - .ok_or(Error::MissingHaltEcall)?; - let halt_timestamp = halt_op.timestamp; - let halt_next_pc = halt_op.next_pc; - - // Finalize the PC (x255) on the REGISTER table. The CPU padding rows carry - // pc=1 and chain the inline-PC `memory` tokens with a +4 timestamp cadence - // starting from the HALT chip's emit_pc at `halt_timestamp + 1`; the last - // padding write therefore lands at `halt_timestamp + 4*num_padding_rows + 1` - // (= `halt_timestamp + 1` when there is no padding). The REGISTER final token - // must match that last write to balance the memory argument. - register_state.write_pc(1, halt_timestamp + 4 * num_padding_rows as u64 + 1); + // A monolithic run or the final continuation epoch terminates on the program's + // halt ECALL. Intermediate continuation epochs do not halt, so fall back to the + // last cycle's timestamp and skip HALT-based PC finalization — the PC is carried + // to the next epoch via the register snapshot, and HALT is excluded from the + // proof (`include_halt = false`) so `halt_trace` is unused there. + let (halt_timestamp, halt_next_pc) = if is_final { + let halt_op = cpu_ops + .iter() + .rev() + .find(|op| op.decode.fields.ecall) + .ok_or(Error::MissingHaltEcall)?; + // Finalize the PC (x255) on the REGISTER table. The CPU padding rows carry + // pc=1 and chain the inline-PC `memory` tokens with a +4 timestamp cadence + // starting from the HALT chip's emit_pc at `halt_timestamp + 1`; the last + // padding write therefore lands at `halt_timestamp + 4*num_padding_rows + 1` + // (= `halt_timestamp + 1` when there is no padding). The REGISTER final token + // must match that last write to balance the memory argument. + register_state.write_pc(1, halt_op.timestamp + 4 * num_padding_rows as u64 + 1); + (halt_op.timestamp, halt_op.next_pc) + } else { + (cpu_ops.last().map(|op| op.timestamp).unwrap_or(0), 0) + }; let register_final_state = register_state.to_final_state_map(); @@ -2989,11 +3101,16 @@ fn build_traces( keccak_rc::update_multiplicities(&mut keccak_rc_trace, keccak_ops.len()); keccak_rc_trace }; - let gen_pages = || match elf { - Some(elf) => generate_page_tables(elf, memory_state, private_input), - None => (Vec::new(), Vec::new()), + let gen_pages = || match initial_image { + // Continuation epochs (l2g_memory_bookend) skip PAGE: the L2G table owns + // every touched cell's Memory init/fini, and every untouched PAGE row + // self-cancels (init==fini, ts=0), so PAGE contributes nothing here. + Some(image) if !l2g_memory_bookend => { + generate_page_tables(image, memory_state, private_input, l2g_memory_bookend) + } + _ => (Vec::new(), Vec::new()), }; - let gen_register = || register::generate_register_trace(®ister_final_state, entry_point); + let gen_register = || register::generate_register_trace(®ister_final_state, register_init); let gen_halt = || halt::generate_halt_trace(halt_timestamp, halt_next_pc); // ECSM accelerator traces (empty/all-padding for programs that do not use ECSM). let gen_ecsm = || ecsm::generate_ecsm_trace(&ecsm_ops); @@ -3148,6 +3265,16 @@ fn build_traces( } } + // Continuation callers derive the real cross-epoch boundary from this set and + // install its L2G trace after provenance is applied. Avoid building a + // throwaway genesis-only L2G trace here. + let touched_memory_cells = if l2g_memory_bookend { + touched_cells_from_memory_state(memory_state) + } else { + Vec::new() + }; + let local_to_global = local_to_global::generate_local_to_global_trace(&[]); + Ok(Traces { cpus, bitwise, @@ -3173,6 +3300,8 @@ fn build_traces( ec_scalar: ec_scalar_trace, ecdas: ecdas_trace, memw_registers, + local_to_global, + touched_memory_cells, eqs, bytewises, stores, @@ -3236,8 +3365,7 @@ pub fn count_table_lengths( let decode_rows = (instructions.len() as u64 + 1).next_power_of_two().max(2); // Memory + register state for partition predicates that need timestamps. - let mut memory_state = MemoryState::from_elf(elf); - memory_state.add_private_input(private_input); + let mut memory_state = MemoryState::from_image(&build_initial_image(elf, private_input)); let mut register_state = RegisterState::new(elf.entry_point); // Raw counts (pre-chunking + pre-padding). @@ -3478,6 +3606,8 @@ impl Traces { cpu32s, page_configs: _, public_output_bytes: _, + local_to_global: _, + touched_memory_cells: _, } = self; let mut total: u64 = 0; @@ -3609,6 +3739,8 @@ impl Traces { cpu32s, page_configs: _, public_output_bytes: _, + local_to_global: _, + touched_memory_cells: _, } = self; let mut total: u64 = 0; @@ -3699,7 +3831,7 @@ impl Traces { pub fn page_configs_from_elf(elf: &Elf) -> Vec { use std::collections::BTreeSet; - let init_page_data = build_init_page_data(elf, &[]); + let init_page_data = build_init_page_data(&build_initial_image(elf, &[])); let page_bases: BTreeSet = init_page_data.keys().copied().collect(); @@ -3812,6 +3944,54 @@ impl Traces { private_input: &[u8], #[cfg(feature = "disk-spill")] storage_mode: StorageMode, ) -> Result { + let initial_image = build_initial_image(elf, private_input); + let register_init = register::register_init_from_entry_point(elf.entry_point); + Self::from_image_and_logs( + elf, + &initial_image, + ®ister_init, + logs, + max_rows, + private_input, + true, + false, + #[cfg(feature = "disk-spill")] + storage_mode, + ) + } + + /// Build traces for one execution epoch starting from an explicit + /// initial-memory image (the epoch's starting memory) rather than the ELF + /// image. `elf` is still used for the program code (DECODE) and entry point. + /// + /// `register_init` is the epoch's starting register image (word address -> + /// value): the program-start image for the first epoch, or an epoch's boundary + /// register snapshot for later epochs. It seeds both `RegisterState` (for + /// first-access old values) and the REGISTER table's init column. + /// + /// `is_final` marks the last epoch: it applies HALT finalization (zeroize + /// registers, require the terminating ECALL). Intermediate epochs (`false`) + /// skip HALT and keep their boundary register/memory state. + #[allow(clippy::too_many_arguments)] + pub fn from_image_and_logs( + elf: &Elf, + initial_image: &I, + register_init: &[u32], + logs: &[Log], + max_rows: &super::MaxRowsConfig, + private_input: &[u8], + is_final: bool, + l2g_memory_bookend: bool, + #[cfg(feature = "disk-spill")] storage_mode: StorageMode, + ) -> Result { + // A non-final epoch must not contain the program-terminating instruction + // (next_pc == 0). Otherwise the CPU sends an ECALL bus token with no HALT + // table to receive it (HALT is excluded when !is_final), producing an + // unverifiable proof. Fail explicitly instead. + if !is_final && logs.iter().any(|log| log.next_pc == 0) { + return Err(Error::HaltInNonFinalEpoch); + } + // Phase 0: ELF → DECODE + instructions // IMPORTANT: Use generate_decode_trace (same as compute_precomputed_commitment) // so the DECODE trace row ordering matches the AIR's hardcoded commitment. @@ -3823,9 +4003,8 @@ impl Traces { let cpu_ops = collect_cpu_ops(logs, &instructions)?; // Phase 2: Collect + route all ops - let mut memory_state = MemoryState::from_elf(elf); - memory_state.add_private_input(private_input); - let mut register_state = RegisterState::new(elf.entry_point); + let mut memory_state = MemoryState::from_image(initial_image); + let mut register_state = RegisterState::from_init(register_init); let ( memw_ops, load_ops, @@ -3854,14 +4033,15 @@ impl Traces { ec_scalar_ops, ecdas_ops, &mut register_state, + is_final, ); // Phases 3-5 build_traces( ops, - Some(elf), + Some(initial_image), &memory_state, - elf.entry_point, + register_init, decode_trace, decode_pc_to_row, register_state, @@ -3869,6 +4049,8 @@ impl Traces { #[cfg(feature = "disk-spill")] storage_mode, private_input, + is_final, + l2g_memory_bookend, ) } @@ -3889,6 +4071,7 @@ impl Traces { // Phase 2: Collect + route all ops let mut memory_state = MemoryState::new(); let entry_point = cpu_ops.first().map_or(0, |op| op.decode.pc); + let register_init = register::register_init_from_entry_point(entry_point); let mut register_state = RegisterState::new(entry_point); let ( memw_ops, @@ -3918,6 +4101,7 @@ impl Traces { ec_scalar_ops, ecdas_ops, &mut register_state, + true, ); // DECODE (from_elf_and_logs does this in Phase 0; same result either way) @@ -3926,9 +4110,9 @@ impl Traces { // Phases 3-5 (elf=None → empty PAGE tables) build_traces( ops, - None, + None::<&HashMap>, &memory_state, - entry_point, + ®ister_init, decode_trace, decode_pc_to_row, register_state, @@ -3936,6 +4120,8 @@ impl Traces { #[cfg(feature = "disk-spill")] StorageMode::Ram, &[], + true, + false, ) } } diff --git a/prover/src/tables/types.rs b/prover/src/tables/types.rs index d6091d0fd..71c83284a 100644 --- a/prover/src/tables/types.rs +++ b/prover/src/tables/types.rs @@ -307,6 +307,13 @@ pub enum BusId { /// Scalar-bit bus: EC_SCALAR sends one per set bit (timestamp, bit_index); /// ECDAS receives one per add, ECSM receives the MSB. Bit = 30, + + // ========================================================================= + // Continuations + // ========================================================================= + /// Cross-epoch memory bus: the local-to-global table's per-cell init/fini + /// boundary claims, matched across epochs by the final aggregation LogUp. + GlobalMemory = 31, } impl BusId { @@ -336,6 +343,7 @@ impl BusId { BusId::Ecdas => "Ecdas", BusId::ServeK => "ServeK", BusId::Bit => "Bit", + BusId::GlobalMemory => "GlobalMemory", } } } @@ -368,6 +376,7 @@ impl TryFrom for BusId { 28 => Ok(BusId::Ecdas), 29 => Ok(BusId::ServeK), 30 => Ok(BusId::Bit), + 31 => Ok(BusId::GlobalMemory), other => Err(other), } } diff --git a/prover/src/tests/compute_commit_bus_offset_tests.rs b/prover/src/tests/compute_commit_bus_offset_tests.rs index 79c092ae2..ca6aab272 100644 --- a/prover/src/tests/compute_commit_bus_offset_tests.rs +++ b/prover/src/tests/compute_commit_bus_offset_tests.rs @@ -16,6 +16,7 @@ type E = GoldilocksExtension; /// future refactor of the batched routine must remain equivalent to this. fn naive_offset( public_output: &[u8], + start_index: u64, z: &FieldElement, alpha: &FieldElement, ) -> Option> { @@ -24,7 +25,7 @@ fn naive_offset( let mut total = FieldElement::::zero(); for (i, &value) in public_output.iter().enumerate() { let lc = bus_id - + (FieldElement::::from(i as u64) * alpha) + + (FieldElement::::from(start_index + i as u64) * alpha) + (FieldElement::::from(value as u64) * alpha_sq); let fingerprint = z - lc; total += fingerprint.inv().ok()?; @@ -37,7 +38,7 @@ fn test_empty_public_output_returns_zero() { let z = FieldElement::::from(7u64); let alpha = FieldElement::::from(11u64); assert_eq!( - compute_commit_bus_offset(&[], &z, &alpha), + compute_commit_bus_offset(&[], 0, &z, &alpha), Some(FieldElement::::zero()), ); } @@ -48,8 +49,8 @@ fn test_non_empty_matches_naive_per_element_inverse() { let alpha = FieldElement::::from(31_415_926u64); let public_output: [u8; 5] = [0x01, 0x02, 0xff, 0x10, 0x80]; - let batched = compute_commit_bus_offset(&public_output, &z, &alpha); - let naive = naive_offset(&public_output, &z, &alpha); + let batched = compute_commit_bus_offset(&public_output, 0, &z, &alpha); + let naive = naive_offset(&public_output, 0, &z, &alpha); assert_eq!(batched, naive); assert!(batched.is_some(), "no fingerprint should collide here"); @@ -61,16 +62,36 @@ fn test_longer_input_matches_naive() { let alpha = FieldElement::::from(0xcafe_babeu64); let public_output: Vec = (0..=255u16).map(|x| x as u8).collect(); - let batched = compute_commit_bus_offset(&public_output, &z, &alpha); - let naive = naive_offset(&public_output, &z, &alpha); + let batched = compute_commit_bus_offset(&public_output, 0, &z, &alpha); + let naive = naive_offset(&public_output, 0, &z, &alpha); assert_eq!(batched, naive); assert!(batched.is_some()); } +#[test] +fn test_nonzero_start_index_matches_naive() { + // A continuation epoch whose commits continue a prior epoch: the offset must + // index from the carried x254, not 0. + let z = FieldElement::::from(0x1234_5678u64); + let alpha = FieldElement::::from(0x9abc_def0u64); + let public_output: [u8; 3] = [0xCC, 0xDD, 0xEE]; + let start_index = 7u64; + + let batched = compute_commit_bus_offset(&public_output, start_index, &z, &alpha); + let naive = naive_offset(&public_output, start_index, &z, &alpha); + + assert_eq!(batched, naive); + assert!(batched.is_some()); + + // A different start index yields a different offset (the index is bound in). + let shifted = compute_commit_bus_offset(&public_output, start_index + 1, &z, &alpha); + assert_ne!(batched, shifted); +} + #[test] fn test_zero_fingerprint_returns_none() { - // Craft fingerprint_0 = 0: i = 0, value = 0, then + // Craft fingerprint_0 = 0: start_index = 0, value = 0, then // fingerprint_0 = z - (BusId::Commit + 0·α + 0·α²) = z - BusId::Commit. // Setting z = BusId::Commit forces the collision regardless of alpha. let z = FieldElement::::from(BusId::Commit as u64); @@ -78,7 +99,7 @@ fn test_zero_fingerprint_returns_none() { let public_output: [u8; 1] = [0]; assert_eq!( - compute_commit_bus_offset(&public_output, &z, &alpha), + compute_commit_bus_offset(&public_output, 0, &z, &alpha), None, "zero fingerprint must propagate as None", ); @@ -96,5 +117,8 @@ fn test_zero_fingerprint_in_middle_returns_none() { + (FieldElement::::from(3u64) * alpha_sq); let public_output: [u8; 4] = [1, 2, 3, 4]; - assert_eq!(compute_commit_bus_offset(&public_output, &z, &alpha), None,); + assert_eq!( + compute_commit_bus_offset(&public_output, 0, &z, &alpha), + None, + ); } diff --git a/prover/src/tests/local_to_global_bus_tests.rs b/prover/src/tests/local_to_global_bus_tests.rs new file mode 100644 index 000000000..263e3d938 --- /dev/null +++ b/prover/src/tests/local_to_global_bus_tests.rs @@ -0,0 +1,1082 @@ +//! Cross-epoch GlobalMemory bus tests for the local-to-global table. +//! +//! Proves+verifies that the `GlobalMemory` bus balances over the combined L2G +//! table plus two anchors: a genesis sender (program-start initial memory) and a +//! program-end receiver (final value of each cell). The bus balances iff every +//! epoch's `fini` matches the next epoch's `init` (the cross-epoch telescoping). + +use std::collections::HashMap; + +use crypto::fiat_shamir::default_transcript::DefaultTranscript; +use math::field::element::FieldElement; + +use stark::config::Commitment; +use stark::constraints::transition::TransitionConstraintEvaluator; +use stark::lookup::{ + AirWithBuses, AuxiliaryTraceBuildData, BusInteraction, BusValue, Multiplicity, + NullBoundaryConstraintBuilder, Packing, +}; +use stark::proof::options::ProofOptions; +use stark::proof::stark::MultiProof; +use stark::trace::TraceTable; +use stark::traits::AIR; +use stark::verifier::{IsStarkVerifier, Verifier}; + +use crate::tables::bitwise::{BitwiseOperation, BitwiseOperationType}; +use crate::tables::local_to_global::{ + self, CellBoundary, GENESIS_EPOCH, epoch_boundaries, generate_local_to_global_trace, +}; +use crate::tables::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; +use crate::test_utils::multi_prove_ram; + +type F = GoldilocksField; +type E = GoldilocksExtension; + +/// Columns of an anchor trace: one GlobalMemory token `(address, value, epoch)` +/// per row, packed in the same order as the L2G init/fini tokens (no timestamp — +/// the cross-epoch chain is ordered by epoch). +mod anchor_cols { + pub const ADDR_LO: usize = 0; + pub const ADDR_HI: usize = 1; + pub const VAL: usize = 2; + pub const EPOCH: usize = 3; + pub const NUM_COLUMNS: usize = 4; +} + +type Token = (u64, u64, u64); + +fn l2g_air( + proof_options: &ProofOptions, + epoch_label: u64, +) -> AirWithBuses { + let transition_constraints: Vec>> = vec![]; + AirWithBuses::new( + local_to_global::cols::NUM_COLUMNS, + AuxiliaryTraceBuildData { + interactions: local_to_global::bus_interactions(epoch_label), + }, + proof_options, + 1, + transition_constraints, + ) +} + +fn anchor_air( + proof_options: &ProofOptions, + is_sender: bool, +) -> AirWithBuses { + let transition_constraints: Vec>> = vec![]; + let values = vec![ + BusValue::Packed { + start_column: anchor_cols::ADDR_LO, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: anchor_cols::ADDR_HI, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: anchor_cols::VAL, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: anchor_cols::EPOCH, + packing: Packing::Direct, + }, + ]; + let interaction = if is_sender { + BusInteraction::sender(BusId::GlobalMemory, Multiplicity::One, values) + } else { + BusInteraction::receiver(BusId::GlobalMemory, Multiplicity::One, values) + }; + AirWithBuses::new( + anchor_cols::NUM_COLUMNS, + AuxiliaryTraceBuildData { + interactions: vec![interaction], + }, + proof_options, + 1, + transition_constraints, + ) +} + +fn anchor_trace(tokens: &[Token]) -> TraceTable { + let num_rows = tokens.len().next_power_of_two().max(4); + let mut data = vec![FE::zero(); num_rows * anchor_cols::NUM_COLUMNS]; + for (i, &(addr, value, epoch)) in tokens.iter().enumerate() { + let base = i * anchor_cols::NUM_COLUMNS; + data[base + anchor_cols::ADDR_LO] = FE::from(addr & 0xFFFF_FFFF); + data[base + anchor_cols::ADDR_HI] = FE::from(addr >> 32); + data[base + anchor_cols::VAL] = FE::from(value & 0xFF); + data[base + anchor_cols::EPOCH] = FE::from(epoch); + } + TraceTable::new_main(data, anchor_cols::NUM_COLUMNS, 1) +} + +/// L2G air on the epoch-LOCAL `Memory` bus (uses `memory_bus_interactions`). +fn l2g_memory_air( + proof_options: &ProofOptions, +) -> AirWithBuses { + let transition_constraints: Vec>> = vec![]; + AirWithBuses::new( + local_to_global::cols::NUM_COLUMNS, + AuxiliaryTraceBuildData { + interactions: local_to_global::memory_bus_interactions(), + }, + proof_options, + 1, + transition_constraints, + ) +} + +/// Columns of a MEMW-substitute trace: per touched byte, the `Memory` tokens the +/// real access chain would emit — opposite polarity to L2G's bookend. +mod memw_sub_cols { + pub const ADDR_LO: usize = 0; + pub const ADDR_HI: usize = 1; + pub const INIT_VAL: usize = 2; + pub const FINI_TS_LO: usize = 3; + pub const FINI_TS_HI: usize = 4; + pub const FINI_VAL: usize = 5; + pub const NUM_COLUMNS: usize = 6; +} + +/// Minimal BITWISE-receiver substitute for the L2G range-check buses. It receives +/// the same AreBytes, IsHalfword, and IsB20 tokens that the real BITWISE table +/// would receive, but only for rows supplied by the test. +mod range_recv_cols { + pub const X: usize = 0; + pub const Y: usize = 1; + pub const Z: usize = 2; + pub const MU_ARE_BYTES: usize = 3; + pub const MU_IS_HALF: usize = 4; + pub const MU_IS_B20: usize = 5; + pub const NUM_COLUMNS: usize = 6; +} + +/// MEMW-substitute air: counterpart to `memory_bus_interactions`. Sends each +/// cell's init token at ts=0 (cancelling L2G's init-receive) and receives each +/// cell's fini token at the last timestamp (cancelling L2G's fini-send). +fn memw_sub_air( + proof_options: &ProofOptions, +) -> AirWithBuses { + let transition_constraints: Vec>> = vec![]; + let init_send = BusInteraction::sender( + BusId::Memory, + Multiplicity::One, + vec![ + BusValue::constant(0), + BusValue::Packed { + start_column: memw_sub_cols::ADDR_LO, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: memw_sub_cols::ADDR_HI, + packing: Packing::Direct, + }, + BusValue::constant(0), + BusValue::constant(0), + BusValue::Packed { + start_column: memw_sub_cols::INIT_VAL, + packing: Packing::Direct, + }, + ], + ); + let fini_recv = BusInteraction::receiver( + BusId::Memory, + Multiplicity::One, + vec![ + BusValue::constant(0), + BusValue::Packed { + start_column: memw_sub_cols::ADDR_LO, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: memw_sub_cols::ADDR_HI, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: memw_sub_cols::FINI_TS_LO, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: memw_sub_cols::FINI_TS_HI, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: memw_sub_cols::FINI_VAL, + packing: Packing::Direct, + }, + ], + ); + AirWithBuses::new( + memw_sub_cols::NUM_COLUMNS, + AuxiliaryTraceBuildData { + interactions: vec![init_send, fini_recv], + }, + proof_options, + 1, + transition_constraints, + ) +} + +fn l2g_range_air( + proof_options: &ProofOptions, + epoch_label: u64, +) -> AirWithBuses { + let transition_constraints: Vec>> = vec![]; + AirWithBuses::new( + local_to_global::cols::NUM_COLUMNS, + AuxiliaryTraceBuildData { + interactions: local_to_global::range_check_interactions(epoch_label), + }, + proof_options, + 1, + transition_constraints, + ) +} + +fn range_receiver_air( + proof_options: &ProofOptions, +) -> AirWithBuses { + let transition_constraints: Vec>> = vec![]; + let interactions = vec![ + BusInteraction::receiver( + BusId::AreBytes, + Multiplicity::Column(range_recv_cols::MU_ARE_BYTES), + vec![ + BusValue::Packed { + start_column: range_recv_cols::X, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: range_recv_cols::Y, + packing: Packing::Direct, + }, + ], + ), + BusInteraction::receiver( + BusId::IsHalfword, + Multiplicity::Column(range_recv_cols::MU_IS_HALF), + vec![BusValue::linear(vec![ + stark::lookup::LinearTerm::Column { + coefficient: 1, + column: range_recv_cols::X, + }, + stark::lookup::LinearTerm::Column { + coefficient: 256, + column: range_recv_cols::Y, + }, + ])], + ), + BusInteraction::receiver( + BusId::IsB20, + Multiplicity::Column(range_recv_cols::MU_IS_B20), + vec![BusValue::linear(vec![ + stark::lookup::LinearTerm::Column { + coefficient: 1, + column: range_recv_cols::X, + }, + stark::lookup::LinearTerm::Column { + coefficient: 256, + column: range_recv_cols::Y, + }, + stark::lookup::LinearTerm::Column { + coefficient: 65536, + column: range_recv_cols::Z, + }, + ])], + ), + ]; + AirWithBuses::new( + range_recv_cols::NUM_COLUMNS, + AuxiliaryTraceBuildData { interactions }, + proof_options, + 1, + transition_constraints, + ) +} + +fn range_receiver_trace(ops: &[BitwiseOperation]) -> TraceTable { + let num_rows = ops.len().next_power_of_two().max(4); + let mut data = vec![FE::zero(); num_rows * range_recv_cols::NUM_COLUMNS]; + for (i, op) in ops.iter().enumerate() { + let base = i * range_recv_cols::NUM_COLUMNS; + data[base + range_recv_cols::X] = FE::from(op.x as u64); + data[base + range_recv_cols::Y] = FE::from(op.y as u64); + data[base + range_recv_cols::Z] = FE::from(op.z as u64); + let mu_col = match op.lookup_type { + BitwiseOperationType::AreBytes => range_recv_cols::MU_ARE_BYTES, + BitwiseOperationType::IsHalf => range_recv_cols::MU_IS_HALF, + BitwiseOperationType::IsB20 => range_recv_cols::MU_IS_B20, + _ => panic!("unexpected L2G range-check lookup"), + }; + data[base + mu_col] = FE::one(); + } + TraceTable::new_main(data, range_recv_cols::NUM_COLUMNS, 1) +} + +fn memw_sub_trace(boundary: &[CellBoundary]) -> TraceTable { + let num_rows = boundary.len().next_power_of_two().max(4); + let mut data = vec![FE::zero(); num_rows * memw_sub_cols::NUM_COLUMNS]; + for (i, b) in boundary.iter().enumerate() { + let base = i * memw_sub_cols::NUM_COLUMNS; + data[base + memw_sub_cols::ADDR_LO] = FE::from(b.address & 0xFFFF_FFFF); + data[base + memw_sub_cols::ADDR_HI] = FE::from(b.address >> 32); + data[base + memw_sub_cols::INIT_VAL] = FE::from(b.init.value & 0xFF); + data[base + memw_sub_cols::FINI_TS_LO] = FE::from(b.fini.timestamp & 0xFFFF_FFFF); + data[base + memw_sub_cols::FINI_TS_HI] = FE::from(b.fini.timestamp >> 32); + data[base + memw_sub_cols::FINI_VAL] = FE::from(b.fini.value & 0xFF); + } + TraceTable::new_main(data, memw_sub_cols::NUM_COLUMNS, 1) +} + +/// Prove + verify the epoch-local `Memory` bus over L2G's bookend (built from +/// `l2g_boundary`) plus the MEMW-substitute chain (built from `memw_boundary`). +/// Equal boundaries balance; a mismatch leaves the bus unbalanced. +fn prove_verify_memory(l2g_boundary: &[CellBoundary], memw_boundary: &[CellBoundary]) -> bool { + let proof_options = ProofOptions::default_test_options(); + let l2g = l2g_memory_air(&proof_options); + let memw = memw_sub_air(&proof_options); + let mut l2g_trace = generate_local_to_global_trace(l2g_boundary); + let mut memw_trace = memw_sub_trace(memw_boundary); + let pairs: Vec<( + &dyn AIR, + _, + _, + )> = vec![(&l2g, &mut l2g_trace, &()), (&memw, &mut memw_trace, &())]; + let proof = multi_prove_ram(pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + let airs: Vec<&dyn AIR> = vec![&l2g, &memw]; + Verifier::multi_verify( + &airs, + &proof, + &mut DefaultTranscript::::new(&[]), + &FieldElement::zero(), + ) +} + +fn prove_verify_l2g_range_with_trace( + l2g_trace: &mut TraceTable, + range_ops: &[BitwiseOperation], + epoch_label: u64, +) -> bool { + let proof_options = ProofOptions::default_test_options(); + let l2g = l2g_range_air(&proof_options, epoch_label); + let receiver = range_receiver_air(&proof_options); + let mut receiver_trace = range_receiver_trace(range_ops); + let pairs: Vec<( + &dyn AIR, + _, + _, + )> = vec![ + (&l2g, l2g_trace, &()), + (&receiver, &mut receiver_trace, &()), + ]; + let proof = multi_prove_ram(pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + let airs: Vec<&dyn AIR> = + vec![&l2g, &receiver]; + Verifier::multi_verify( + &airs, + &proof, + &mut DefaultTranscript::::new(&[]), + &FieldElement::zero(), + ) +} + +/// Inert L2G AIR: commits the trace columns with no bus and no constraints — +/// the deterministic commitment an epoch proof publishes for its L2G table. The +/// main-trace Merkle root is over the main columns only, so it matches the L2G +/// sub-table root committed in the bus proof. +fn inert_l2g_air( + proof_options: &ProofOptions, +) -> AirWithBuses { + let transition_constraints: Vec>> = vec![]; + AirWithBuses::new( + local_to_global::cols::NUM_COLUMNS, + AuxiliaryTraceBuildData { + interactions: vec![], + }, + proof_options, + 1, + transition_constraints, + ) +} + +/// Commit one epoch's L2G trace in a minimal proof and return its Merkle root — +/// the `R_i` an epoch proof publishes for that epoch. +fn l2g_root(boundary: &[CellBoundary]) -> Commitment { + let proof_options = ProofOptions::default_test_options(); + let air = inert_l2g_air(&proof_options); + let mut trace = generate_local_to_global_trace(boundary); + let pairs: Vec<( + &dyn AIR, + _, + _, + )> = vec![(&air, &mut trace, &())]; + let proof = multi_prove_ram(pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + proof.proofs[0].lde_trace_main_merkle_root +} + +/// Prove the cross-epoch GlobalMemory bus over one L2G sub-table per epoch plus +/// the genesis/program-end anchors. The first N sub-tables (epoch order) are the +/// per-epoch L2G tables. +pub(crate) fn prove_global(boundaries: &[Vec]) -> MultiProof { + let all: Vec = boundaries.iter().flatten().copied().collect(); + + // Genesis anchor: a SEND token for each cell first touched from program memory. + let genesis: Vec = all + .iter() + .filter(|b| b.init.originating_epoch == GENESIS_EPOCH) + .map(|b| (b.address, b.init.value, b.init.originating_epoch)) + .collect(); + + // Program-end anchor: a RECEIVE token for each cell's final fini (epochs are + // in order, so the last write wins). + let mut final_fini: HashMap = HashMap::new(); + for epoch in boundaries { + for b in epoch { + final_fini.insert(b.address, (b.address, b.fini.value, b.fini.epoch)); + } + } + let program_end: Vec = final_fini.into_values().collect(); + + let mut l2g_traces: Vec> = boundaries + .iter() + .map(|epoch| generate_local_to_global_trace(epoch)) + .collect(); + let mut genesis_trace = anchor_trace(&genesis); + let mut program_end_trace = anchor_trace(&program_end); + + let proof_options = ProofOptions::default_test_options(); + // One L2G air per epoch, each carrying its 1-based `fini_epoch` constant. + let l2g_airs: Vec<_> = (0..boundaries.len()) + .map(|i| l2g_air(&proof_options, local_to_global::epoch_label(i as u64))) + .collect(); + let genesis_anchor = anchor_air(&proof_options, true); + let program_end_anchor = anchor_air(&proof_options, false); + + // Per-epoch L2G sub-tables (each with its own air), then the anchors. + let mut air_trace_pairs: Vec<( + &dyn AIR, + _, + _, + )> = l2g_airs + .iter() + .zip(l2g_traces.iter_mut()) + .map(|(air, trace)| { + ( + air as &dyn AIR, + trace, + &(), + ) + }) + .collect(); + air_trace_pairs.push((&genesis_anchor, &mut genesis_trace, &())); + air_trace_pairs.push((&program_end_anchor, &mut program_end_trace, &())); + + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap() +} + +pub(crate) fn prove_and_verify(boundaries: &[Vec]) -> bool { + let proof = prove_global(boundaries); + + let proof_options = ProofOptions::default_test_options(); + let l2g_airs: Vec<_> = (0..boundaries.len()) + .map(|i| l2g_air(&proof_options, local_to_global::epoch_label(i as u64))) + .collect(); + let genesis_anchor = anchor_air(&proof_options, true); + let program_end_anchor = anchor_air(&proof_options, false); + + // air_refs must match the air_trace_pairs order: one L2G air per epoch, then anchors. + let mut airs: Vec<&dyn AIR> = l2g_airs + .iter() + .map(|a| a as &dyn AIR) + .collect(); + airs.push(&genesis_anchor); + airs.push(&program_end_anchor); + + Verifier::multi_verify( + &airs, + &proof, + &mut DefaultTranscript::::new(&[]), + &FieldElement::zero(), + ) +} + +#[test] +fn test_global_memory_bus_balances() { + // Cell 10 touched in epochs 0,1,2; cell 20 in epoch 0 then again epoch 2 + // (skipping 1); cell 30 once. + let initial_memory = HashMap::from([(10u64, 5u64)]); + let epochs = vec![ + vec![(10, 7, 3), (20, 9, 4)], + vec![(10, 8, 10)], + vec![(20, 9, 20), (30, 1, 21)], + ]; + let boundaries = epoch_boundaries(&initial_memory, &epochs); + assert!(prove_and_verify(&boundaries)); +} + +#[test] +fn test_global_memory_bus_rejects_tampered_boundary() { + let initial_memory = HashMap::from([(10u64, 5u64)]); + let epochs = vec![vec![(10, 7, 3)], vec![(10, 8, 10)]]; + let mut boundaries = epoch_boundaries(&initial_memory, &epochs); + assert!(prove_and_verify(&boundaries)); + + // Break the chain: epoch 0 now claims a different fini than epoch 1's init. + boundaries[0][0].fini.value = 999; + assert!(!prove_and_verify(&boundaries)); +} + +#[test] +fn test_l2g_binding_holds() { + // Per-epoch L2G roots committed by the epoch proofs match the per-epoch L2G + // sub-table roots in the final cross-epoch proof. + let initial_memory = HashMap::from([(10u64, 5u64)]); + let epochs = vec![ + vec![(10, 7, 3), (20, 9, 4)], + vec![(10, 8, 10)], + vec![(20, 9, 20), (30, 1, 21)], + ]; + let boundaries = epoch_boundaries(&initial_memory, &epochs); + + let final_proof = prove_global(&boundaries); + let roots: Vec = boundaries.iter().map(|b| l2g_root(b)).collect(); + + assert!(crate::verify_l2g_commitment_binding(&roots, &final_proof)); +} + +#[test] +fn test_l2g_binding_rejects_mismatch() { + // The final proof uses a DIFFERENT epoch-0 L2G table than the epoch proofs + // committed, so the binding must reject it. + let initial_memory = HashMap::from([(10u64, 5u64)]); + let epochs = vec![ + vec![(10, 7, 3), (20, 9, 4)], + vec![(10, 8, 10)], + vec![(20, 9, 20), (30, 1, 21)], + ]; + let boundaries = epoch_boundaries(&initial_memory, &epochs); + + // Honest per-epoch roots. + let roots: Vec = boundaries.iter().map(|b| l2g_root(b)).collect(); + + // Final proof built over a tampered epoch-0 L2G table. + let mut tampered = boundaries.clone(); + tampered[0][0].fini.value = 999; + let final_proof = prove_global(&tampered); + + assert!(!crate::verify_l2g_commitment_binding(&roots, &final_proof)); +} + +// ========================================================================= +// Helpers for soundness regression tests +// ========================================================================= + +/// Like `prove_verify_memory` but accepts a pre-built (possibly mutated) +/// l2g trace instead of deriving it from a boundary slice. +/// +/// Used by tests that forge individual columns (MU, epoch halfwords) after +/// trace generation — the mutation must survive into the proof so the +/// verifier sees the forged commitment. +fn prove_verify_memory_with_trace( + l2g_trace: &mut TraceTable, + memw_boundary: &[CellBoundary], +) -> bool { + let proof_options = ProofOptions::default_test_options(); + let l2g = l2g_memory_air(&proof_options); + let memw = memw_sub_air(&proof_options); + let mut memw_trace = memw_sub_trace(memw_boundary); + let pairs: Vec<( + &dyn AIR, + _, + _, + )> = vec![(&l2g, l2g_trace, &()), (&memw, &mut memw_trace, &())]; + let proof = multi_prove_ram(pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + let airs: Vec<&dyn AIR> = vec![&l2g, &memw]; + Verifier::multi_verify( + &airs, + &proof, + &mut DefaultTranscript::::new(&[]), + &FieldElement::zero(), + ) +} + +/// Like `prove_global` (and `prove_and_verify`) but accepts pre-built l2g +/// traces (one per epoch) so that column mutations applied before this call +/// survive into the proof. +/// +/// Returns `true` iff the multi-table verifier accepts the proof. +fn prove_and_verify_global_with_traces( + boundaries: &[Vec], + l2g_traces: &mut [TraceTable], +) -> bool { + let all: Vec = boundaries.iter().flatten().copied().collect(); + + let genesis: Vec = all + .iter() + .filter(|b| b.init.originating_epoch == GENESIS_EPOCH) + .map(|b| (b.address, b.init.value, b.init.originating_epoch)) + .collect(); + + let mut final_fini: HashMap = HashMap::new(); + for epoch in boundaries { + for b in epoch { + final_fini.insert(b.address, (b.address, b.fini.value, b.fini.epoch)); + } + } + let program_end: Vec = final_fini.into_values().collect(); + + let mut genesis_trace = anchor_trace(&genesis); + let mut program_end_trace = anchor_trace(&program_end); + + let proof_options = ProofOptions::default_test_options(); + let l2g_airs: Vec<_> = (0..boundaries.len()) + .map(|i| l2g_air(&proof_options, local_to_global::epoch_label(i as u64))) + .collect(); + let genesis_anchor = anchor_air(&proof_options, true); + let program_end_anchor = anchor_air(&proof_options, false); + + let mut air_trace_pairs: Vec<( + &dyn AIR, + _, + _, + )> = l2g_airs + .iter() + .zip(l2g_traces.iter_mut()) + .map(|(air, trace)| { + ( + air as &dyn AIR, + trace, + &(), + ) + }) + .collect(); + air_trace_pairs.push((&genesis_anchor, &mut genesis_trace, &())); + air_trace_pairs.push((&program_end_anchor, &mut program_end_trace, &())); + + let proof = multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + + let mut airs: Vec<&dyn AIR> = l2g_airs + .iter() + .map(|a| a as &dyn AIR) + .collect(); + airs.push(&genesis_anchor); + airs.push(&program_end_anchor); + + Verifier::multi_verify( + &airs, + &proof, + &mut DefaultTranscript::::new(&[]), + &FieldElement::zero(), + ) +} + +// ========================================================================= +// Soundness regression tests: MU selector (Design X / Statement S) +// ========================================================================= + +/// (1a) MU=0 on a real row silences its Memory-bus tokens → the bus dangles. +/// +/// Property guarded: the `MU` selector gates EVERY L2G interaction on the +/// epoch-local Memory bus. Clearing MU on a genuinely-touched cell means its +/// init-receive and fini-send never fire; the MEMW-substitute chain still +/// sends/receives for that cell, leaving both tokens unmatched → bus +/// imbalance → proof must fail. +/// +/// Modelled on `test_local_memory_bus_rejects_tamper` (same Memory-bus path) +/// extended to mutate MU rather than a value column, using the new +/// `prove_verify_memory_with_trace` helper. +#[test] +fn test_l2g_mu_zero_on_real_row_rejects() { + // Two touched cells; row 0 is real (MU=1). We forge row 0's MU to 0. + let initial_memory = HashMap::from([(10u64, 5u64)]); + let epochs = vec![vec![(10, 7, 3), (20, 9, 4)]]; + let boundaries = epoch_boundaries(&initial_memory, &epochs); + + // Honest round-trip must pass. + assert!( + prove_verify_memory(&boundaries[0], &boundaries[0]), + "baseline must verify before forgery" + ); + + // Forge: clear MU on the first real row. + let mut forged_trace = generate_local_to_global_trace(&boundaries[0]); + forged_trace + .main_table + .set(0, local_to_global::cols::MU, FE::zero()); + + // The Memory bus is now unbalanced: L2G's init-receive and fini-send for + // cell 10 are silenced (MU=0), but the MEMW-substitute sends cell 10's + // init and expects its fini — neither token finds its counterpart. + assert!( + !prove_verify_memory_with_trace(&mut forged_trace, &boundaries[0]), + "MU=0 on a real row must cause the Memory bus to reject" + ); +} + +/// (1b) MU=1 on a padding row injects phantom tokens → the GlobalMemory bus +/// cannot balance. +/// +/// Property guarded: same Design-X property, opposite direction. A padding row +/// with MU=1 fires a spurious init-receive and fini-send on the GlobalMemory +/// bus. The two phantom tokens carry different values — the init token carries +/// originating_epoch=0 (zero-filled padding) while the fini token carries +/// `fini_epoch=epoch_label` (the per-table constant, always ≥ 1). Because the +/// epoch field differs, the phantom receive and send do NOT self-cancel; neither +/// the genesis anchor nor the program-end anchor has a matching row for address 0 +/// → both tokens dangle → bus imbalance → proof fails. +/// +/// Note: the epoch-local Memory bus would NOT catch this because the phantom +/// row's init and fini tokens are identical (all columns zero) and self-cancel +/// in the LogUp. The GlobalMemory bus carries the epoch constant in the fini +/// token but not the init token, breaking the self-cancellation. +/// +/// Three real boundaries pad to four rows; row 3 is the padding row (all-zero). +/// Uses `prove_and_verify_global_with_traces` (same path as test 1c and test 3). +#[test] +fn test_l2g_mu_one_on_padding_row_rejects_global_bus() { + // 3 real rows → 4-row trace (padding row at index 3). + let initial_memory = HashMap::new(); + let epochs = vec![vec![(10, 7, 3), (20, 9, 4), (30, 1, 5)]]; + let boundaries = epoch_boundaries(&initial_memory, &epochs); + assert_eq!(boundaries[0].len(), 3, "expect 3 real rows"); + + // Honest baseline on the GlobalMemory bus. + assert!( + prove_and_verify(&boundaries), + "baseline must verify before forgery" + ); + + // Forge: set MU=1 on the padding row (row 3, all-zero columns). + let mut forged_trace = generate_local_to_global_trace(&boundaries[0]); + let num_rows = forged_trace.num_rows(); + assert_eq!(num_rows, 4, "trace must be padded to 4 rows"); + forged_trace + .main_table + .set(3, local_to_global::cols::MU, FE::one()); + let mut l2g_traces = vec![forged_trace]; + + // The phantom row fires on the GlobalMemory bus: + // - init-receive: epoch=0 (zero-filled), addr=0 — no genesis anchor row sends this. + // - fini-send: epoch=epoch_label=1, addr=0 — no program-end anchor receives this. + // The two tokens differ in the epoch field, so they do not self-cancel. + assert!( + !prove_and_verify_global_with_traces(&boundaries, &mut l2g_traces), + "MU=1 on a padding row must cause the GlobalMemory bus to reject" + ); +} + +/// (1c) MU=2 (non-boolean) on a real row unbalances the GlobalMemory bus. +/// +/// Property guarded: MU is the LogUp multiplicity for ALL bus interactions. +/// With MU=2 the fini-sender fires twice but the program-end anchor receives +/// only once, and the init-receiver fires twice but the genesis anchor sends +/// only once → both sides of the GlobalMemory bus are off by 1 → proof fails. +/// +/// Uses `prove_and_verify_global_with_traces` (forked from `prove_global`) +/// to inject the pre-mutated trace. Modelled on +/// `test_prove_elfs_ecsm_forged_ecdas_mu_rejected` (prove_elfs_tests.rs:1230) +/// for the "set MU to 2, assert reject" pattern. +#[test] +fn test_l2g_mu_nonboolean_rejects_global_bus() { + let initial_memory = HashMap::from([(10u64, 5u64)]); + let epochs = vec![vec![(10, 7, 3)]]; + let boundaries = epoch_boundaries(&initial_memory, &epochs); + + // Honest baseline on the GlobalMemory bus. + assert!( + prove_and_verify(&boundaries), + "baseline must verify before forgery" + ); + + // Forge: set MU=2 on row 0 of epoch 0's L2G trace. + let mut l2g_trace = generate_local_to_global_trace(&boundaries[0]); + l2g_trace + .main_table + .set(0, local_to_global::cols::MU, FE::from(2u64)); + let mut l2g_traces = vec![l2g_trace]; + + // Multiplicity 2 on both the init-receiver and fini-sender; genesis and + // program-end anchors only send/receive multiplicity 1 → bus imbalance. + assert!( + !prove_and_verify_global_with_traces(&boundaries, &mut l2g_traces), + "MU=2 (non-boolean) must cause the GlobalMemory bus to reject" + ); +} + +// ========================================================================= +// Soundness regression tests: init_epoch ordering (IsB20) +// ========================================================================= + +/// (2) Forged init_epoch violating the ordering constraint is rejected. +/// +/// Property guarded: `init_epoch < fini_epoch` is enforced via an IsB20 +/// lookup on `fini_epoch − 1 − init_epoch`. A forged row that claims +/// `init_epoch >= fini_epoch` causes the difference to underflow in the +/// field to a value far outside [0, 2^20); no matching IsB20 row exists in +/// the BITWISE table, so the range-check bus cannot balance and the proof +/// must fail. +/// +/// The ordering check lives on `range_check_interactions`, which is wired to +/// the BITWISE table inside the epoch proof. The epoch-local `l2g_memory_air` +/// in this test file does NOT include `range_check_interactions` — it only +/// covers the Memory bus. The full range-check path (with a live BITWISE +/// table) is exercised inside the epoch prover in `continuation.rs` +/// (`l2g_memory_air` there concatenates both, see line 155-159). Wiring the +/// complete BITWISE sub-proof here would require replicating `prove_epoch`'s +/// full table set, which is out of scope for a unit bus test. +/// +/// This test asserts the arithmetic property that makes the attack fail. +/// `test_ordering_rejects_future_reference` in +/// `local_to_global.rs::tests` (line 831) already verifies that the field +/// value `fini_epoch − 1 − init_epoch` wraps to a value ≥ 2^20 for both +/// self-references and future-references, so no IsB20 row matches. The +/// proof-level bus path is covered by +/// `test_l2g_init_epoch_ordering_live_is_b20_rejects` below. +/// +/// Variants that ARE expressible without the full bitwise table: +/// - Self-reference (init_epoch == fini_epoch) and future-reference +/// (init_epoch > fini_epoch) are both covered by the arithmetic check. +/// - The GlobalMemory bus itself does NOT enforce the ordering; it only +/// checks that tokens match across epochs. The IsB20 sender is wired +/// exclusively on the epoch-local table (which carries the BITWISE provider). +/// +/// The paired live-bus test wires an L2G range-check AIR to a minimal BITWISE +/// receiver table and proves that a self-reference rejects through IsB20. +#[test] +fn test_l2g_init_epoch_ordering_field_arithmetic() { + // Verify the arithmetic property that underlies the IsB20 soundness argument + // without running a full proof. The ordering sender computes: + // fini_epoch − 1 − init_epoch (in the Goldilocks field) + // For an honest row this is a small non-negative integer in [0, 2^20). + // For a forged row it wraps to a huge field value outside [0, 2^20). + + let order_field_value = |fini_label: u64, init_epoch: u64| -> u64 { + // Replicate the field arithmetic: FE::from(fini_label - 1) - FE::from(init_epoch). + // The Goldilocks prime is 2^64 - 2^32 + 1. + let result = FE::from(fini_label - 1) - FE::from(init_epoch); + *result.value() + }; + + // Honest: epoch 2 consuming genesis (epoch 0) fini → 2 - 1 - 0 = 1. + assert!(order_field_value(2, GENESIS_EPOCH) < (1 << 20)); + + // Honest: epoch 5 consuming epoch 2's fini → 5 - 1 - 2 = 2. + assert!(order_field_value(5, 2) < (1 << 20)); + + // Forged self-reference: init_epoch == fini_epoch → 5 - 1 - 5 = -1 in field. + let self_ref = order_field_value(5, 5); + assert!( + self_ref >= (1 << 20), + "self-reference must produce a value outside the IsB20 range (got {self_ref})" + ); + + // Forged future-reference: init_epoch > fini_epoch → 5 - 1 - 9 < 0 in field. + let future_ref = order_field_value(5, 9); + assert!( + future_ref >= (1 << 20), + "future-reference must produce a value outside the IsB20 range (got {future_ref})" + ); +} + +#[test] +fn test_l2g_init_epoch_ordering_live_is_b20_rejects() { + // Epoch 1 consumes epoch 0's fini for cell 10. Honest ordering has + // init_epoch=1, fini_epoch=2, so 2 - 1 - 1 = 0 is a valid IsB20 lookup. + let initial_memory = HashMap::new(); + let epochs = vec![vec![(10, 7, 3)], vec![(10, 8, 10)]]; + let boundaries = epoch_boundaries(&initial_memory, &epochs); + let boundary = &boundaries[1]; + let epoch_label = boundary[0].fini.epoch; + assert_eq!(epoch_label, 2); + + let mut honest_trace = generate_local_to_global_trace(boundary); + let honest_ops = local_to_global::collect_bitwise_from_l2g(boundary); + assert!( + prove_verify_l2g_range_with_trace(&mut honest_trace, &honest_ops, epoch_label), + "honest L2G range checks must balance against BITWISE receivers" + ); + + // Forge a self-reference: init_epoch == fini_epoch. The halfword lookups are + // still satisfiable, so the receiver table below includes them. The missing + // piece is exactly IsB20[2 - 1 - 2], which underflows in the field and has no + // valid 20-bit receiver row. + let mut forged_trace = generate_local_to_global_trace(boundary); + forged_trace.main_table.set( + 0, + local_to_global::cols::INIT_EPOCH_0, + FE::from(epoch_label), + ); + forged_trace + .main_table + .set(0, local_to_global::cols::INIT_EPOCH_1, FE::zero()); + + let cell = boundary[0]; + let forged_ops = vec![ + BitwiseOperation::byte_op( + BitwiseOperationType::AreBytes, + (cell.init.value & 0xFF) as u8, + (cell.fini.value & 0xFF) as u8, + ), + BitwiseOperation::halfword( + BitwiseOperationType::IsHalf, + (epoch_label & 0xFF) as u8, + ((epoch_label >> 8) & 0xFF) as u8, + ), + BitwiseOperation::halfword(BitwiseOperationType::IsHalf, 0, 0), + ]; + assert!( + !prove_verify_l2g_range_with_trace(&mut forged_trace, &forged_ops, epoch_label), + "self-referential init_epoch must fail through the live IsB20 bus" + ); +} + +// ========================================================================= +// Soundness regression tests: Design-Y orphan attack +// ========================================================================= + +/// (3) Design-Y orphan attack: MU=0 on a later epoch's L2G row truncates the +/// cross-epoch chain → the GlobalMemory bus rejects. +/// +/// Property guarded: setting MU=0 on an L2G row for epoch i+1 silences that +/// epoch's fini-send on the GlobalMemory bus. If the global finalisation +/// (program-end anchor) still expects the last fini to come from epoch i+1, +/// the fini token is never sent → program-end anchor receives a token that +/// nobody sent → bus imbalance. +/// +/// Concretely: cell 10 is touched in both epoch 0 (label 1) and epoch 1 +/// (label 2). The forged trace sets MU=0 on epoch 1's L2G row for cell 10. +/// Epoch 1's fini-send is silenced; the program-end anchor still tries to +/// receive `(10, 8, 2, 10)` (the last honest fini) — but it was never sent. +/// Separately, epoch 1's init-receive is also silenced, leaving epoch 0's +/// fini token (which epoch 1 was supposed to consume) dangling. Both +/// produce bus imbalances. +/// +/// Modelled on `test_global_memory_bus_rejects_tampered_boundary` (which +/// tampers a boundary value) and uses the new +/// `prove_and_verify_global_with_traces` helper to inject the forged epoch-1 +/// trace. `prove_and_verify` (which generates its own traces) is used for the +/// baseline check; the forged proof is built via the helper. +#[test] +fn test_l2g_design_y_orphan_mu_zero_rejects() { + // Cell 10 touched in epoch 0 (label 1, fini value=7, ts=3) and epoch 1 + // (label 2, fini value=8, ts=10). Cell 20 touched in epoch 0 only. + let initial_memory = HashMap::from([(10u64, 5u64)]); + let epochs = vec![vec![(10, 7, 3), (20, 9, 4)], vec![(10, 8, 10)]]; + let boundaries = epoch_boundaries(&initial_memory, &epochs); + + // Honest baseline on the GlobalMemory bus. + assert!( + prove_and_verify(&boundaries), + "baseline must verify before forgery" + ); + + // Build honest traces for both epochs. + let epoch0_trace = generate_local_to_global_trace(&boundaries[0]); + let mut epoch1_trace = generate_local_to_global_trace(&boundaries[1]); + + // Epoch 1 has exactly one real row (cell 10). Forge MU=0 on that row. + // This orphans cell 10's cross-epoch chain at epoch 1: the init-receive + // (consuming epoch 0's fini token for cell 10) and the fini-send (which + // the program-end anchor expects to receive) both fire with multiplicity 0. + assert_eq!( + boundaries[1].len(), + 1, + "epoch 1 must have exactly one real row" + ); + epoch1_trace + .main_table + .set(0, local_to_global::cols::MU, FE::zero()); + + let mut l2g_traces = vec![epoch0_trace, epoch1_trace]; + + // The GlobalMemory bus cannot balance: + // - Epoch 0's fini token for cell 10 was sent (epoch 0's MU=1) but not + // consumed by epoch 1 (epoch 1's init-receive is silenced → MU=0). + // - The program-end anchor tries to receive epoch 1's fini for cell 10 + // (the last honest value), but that fini-send is also silenced. + assert!( + !prove_and_verify_global_with_traces(&boundaries, &mut l2g_traces), + "MU=0 on a later epoch's L2G row (Design-Y orphan) must cause the GlobalMemory bus to reject" + ); +} + +// ========================================================================= +// Soundness regression tests: private-input continuation +// ========================================================================= + +/// (4) Private-input continuation: `test_private_input_xpage` spans multiple +/// epochs and verifies with non-empty private inputs. +/// +/// Property guarded: the continuation prover correctly handles private-input +/// pages (which are touched in the first epoch and potentially persist across +/// epoch boundaries) and the resulting multi-epoch L2G chain verifies end-to-end. +/// +/// The fixture reads 16 bytes of private input from 0xFF000000, then commits +/// bytes 4..12 (8 bytes after the 4-byte length prefix). With `epoch_size_log2=2` +/// (4 cycles) the 11-cycle program spans three epochs: epoch 0 reads the private-input +/// page (touching 0xFF000000..), epoch 1 performs the commit syscall, epoch 2 +/// halts. The private-input page's L2G entry (epoch 0 fini → epoch 1+ init) +/// is the cross-epoch link under test. +/// +/// Modelled on `continuation::tests::test_prove_and_verify_continuation` +/// (continuation.rs:896) and `prove_elfs_tests::test_prove_private_input_xpage` +/// (prove_elfs_tests.rs:2649). +#[test] +fn test_continuation_private_input_spans_epochs() { + let elf_bytes = crate::test_utils::asm_elf_bytes("test_private_input_xpage"); + + // 16-byte private input: 4-byte length prefix (=16) + 8 bytes of payload + // that will be committed + 4 padding bytes (the fixture commits bytes 4..12). + let mut input: Vec = Vec::with_capacity(16); + // Length prefix: 16 as little-endian u32. + input.extend_from_slice(&16u32.to_le_bytes()); + // 8-byte payload that will be committed. + input.extend_from_slice(&[0x11u8, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88]); + // 4 trailing padding bytes (not committed). + input.extend_from_slice(&[0x00u8, 0x00, 0x00, 0x00]); + assert_eq!(input.len(), 16); + + let result = crate::continuation::prove_and_verify_continuation( + &elf_bytes, + &input, + 2, + &ProofOptions::default_test_options(), + ); + + // The continuation must prove and verify without error. + let output = result.expect("prove_and_verify_continuation must not error"); + + // The fixture commits bytes 4..12 of private input (the 8-byte payload). + assert_eq!( + output.as_deref(), + Some(&input[4..12]), + "committed output must equal private input bytes 4..12" + ); +} + +#[test] +fn test_local_memory_bus_balances() { + // For each touched byte, L2G's init-receive (ts=0) + fini-send cancel the + // MEMW chain's init-send + fini-receive: the epoch-local Memory bus balances. + let initial_memory = HashMap::from([(10u64, 5u64)]); + let epochs = vec![vec![(10, 7, 3), (20, 9, 4)]]; + let boundaries = epoch_boundaries(&initial_memory, &epochs); + assert!(prove_verify_memory(&boundaries[0], &boundaries[0])); +} + +#[test] +fn test_local_memory_bus_rejects_tamper() { + // L2G claims the real fini value but the access chain ends on a different + // one — the Memory bus no longer balances. + let initial_memory = HashMap::from([(10u64, 5u64)]); + let epochs = vec![vec![(10, 7, 3)]]; + let boundaries = epoch_boundaries(&initial_memory, &epochs); + assert!(prove_verify_memory(&boundaries[0], &boundaries[0])); + + let mut tampered = boundaries[0].clone(); + tampered[0].fini.value = 999; + assert!(!prove_verify_memory(&boundaries[0], &tampered)); +} diff --git a/prover/src/tests/mod.rs b/prover/src/tests/mod.rs index 4d0ac4477..9b32e3b8c 100644 --- a/prover/src/tests/mod.rs +++ b/prover/src/tests/mod.rs @@ -43,6 +43,8 @@ pub mod keccak_rnd_tests; #[cfg(test)] pub mod load_tests; #[cfg(test)] +pub mod local_to_global_bus_tests; +#[cfg(test)] pub mod lt_bus_tests; #[cfg(test)] pub mod lt_tests; diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index a52383341..10013b5ed 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -59,6 +59,9 @@ fn prove_and_verify_vm_minimal(elf: &Elf, traces: &mut Traces) -> bool { &traces.page_configs, &table_counts, None, + true, + None, + None, None, ); @@ -77,6 +80,7 @@ fn prove_and_verify_vm_minimal(elf: &Elf, traces: &mut Traces) -> bool { &airs.air_refs(), &multi_proof, &traces.public_output_bytes, + 0, &mut replay_transcript, ) .expect("fingerprint collision in test"); @@ -110,6 +114,9 @@ fn prove_vm_minimal(elf_bytes: &[u8], private_inputs: &[u8], max_rows: &MaxRowsC &traces.page_configs, &table_counts, None, + true, + None, + None, None, ); let runtime_page_ranges = traces.runtime_page_ranges(); @@ -150,6 +157,9 @@ fn verify_vm_minimal(vm_proof: &VmProof, elf_bytes: &[u8]) -> bool { &page_configs, &vm_proof.table_counts, None, + true, + None, + None, None, ); let air_refs = airs.air_refs(); @@ -158,6 +168,7 @@ fn verify_vm_minimal(vm_proof: &VmProof, elf_bytes: &[u8]) -> bool { &air_refs, &vm_proof.proof, &vm_proof.public_output, + 0, &mut replay_transcript, ) .expect("fingerprint collision in test"); @@ -1337,6 +1348,9 @@ fn test_prove_elfs_test_commit_4_wrong_pages_rejected() { &traces.page_configs, &table_counts, None, + true, + None, + None, None, ); let proof = multi_prove_ram( @@ -1354,6 +1368,9 @@ fn test_prove_elfs_test_commit_4_wrong_pages_rejected() { &wrong_configs, &table_counts, None, + true, + None, + None, None, ); let verifier_air_refs = verifier_airs.air_refs(); @@ -1362,6 +1379,7 @@ fn test_prove_elfs_test_commit_4_wrong_pages_rejected() { &verifier_air_refs, &proof, &traces.public_output_bytes, + 0, &mut replay_transcript, ) .expect("fingerprint collision in test"); @@ -2086,6 +2104,9 @@ fn test_deep_stack_runtime_pages_roundtrip() { &traces.page_configs, &table_counts, None, + true, + None, + None, None, ); let proof = multi_prove_ram( @@ -2102,6 +2123,9 @@ fn test_deep_stack_runtime_pages_roundtrip() { &verifier_configs, &table_counts, None, + true, + None, + None, None, ); let verifier_air_refs = verifier_airs.air_refs(); @@ -2110,6 +2134,7 @@ fn test_deep_stack_runtime_pages_roundtrip() { &verifier_air_refs, &proof, &traces.public_output_bytes, + 0, &mut replay_transcript, ) .expect("fingerprint collision in test"); @@ -2152,6 +2177,9 @@ fn test_deep_stack_missing_pages_rejected() { &traces.page_configs, &table_counts, None, + true, + None, + None, None, ); let proof = multi_prove_ram( @@ -2168,6 +2196,9 @@ fn test_deep_stack_missing_pages_rejected() { &wrong_configs, &table_counts, None, + true, + None, + None, None, ); let verifier_air_refs = verifier_airs.air_refs(); @@ -2176,6 +2207,7 @@ fn test_deep_stack_missing_pages_rejected() { &verifier_air_refs, &proof, &traces.public_output_bytes, + 0, &mut replay_transcript, ) .expect("fingerprint collision in test"); @@ -2253,6 +2285,9 @@ fn test_heap_alloc_runtime_pages_roundtrip() { &traces.page_configs, &table_counts, None, + true, + None, + None, None, ); let proof = multi_prove_ram( @@ -2269,6 +2304,9 @@ fn test_heap_alloc_runtime_pages_roundtrip() { &verifier_configs, &table_counts, None, + true, + None, + None, None, ); let verifier_air_refs = verifier_airs.air_refs(); @@ -2277,6 +2315,7 @@ fn test_heap_alloc_runtime_pages_roundtrip() { &verifier_air_refs, &proof, &traces.public_output_bytes, + 0, &mut replay_transcript, ) .expect("fingerprint collision in test"); @@ -2427,7 +2466,18 @@ fn test_crafted_zero_count_proof_must_not_verify() { store: 0, cpu32: 0, }; - let airs = VmAirs::new(&elf, &proof_options, true, &[], &zero_counts, None, None); + let airs = VmAirs::new( + &elf, + &proof_options, + true, + &[], + &zero_counts, + None, + true, + None, + None, + None, + ); let verifier_air_refs = airs.air_refs(); assert_eq!(verifier_air_refs.len(), crate::FIXED_TABLE_COUNT); @@ -2855,3 +2905,560 @@ fn test_count_elements_nonzero() { "total_auxiliary_field_elements should be nonzero (got {aux})" ); } + +/// Prove and verify the FIRST continuation epoch in isolation. Epoch 0 starts +/// from the program's initial memory/registers (so its init is correct) and does +/// not terminate, so it is proven with the HALT table excluded (`include_halt = false`). +#[test] +fn test_prove_first_epoch_without_halt() { + use crate::compute_expected_commit_bus_balance; + use crate::tables::trace_builder::build_initial_image; + use crate::test_utils::asm_elf_bytes; + + let _ = env_logger::builder().is_test(true).try_init(); + let elf_bytes = asm_elf_bytes("arith_8"); + let elf = Elf::load(&elf_bytes).unwrap(); + + // arith_8 is ~10 cycles; a power-of-two epoch_size of 4 makes epoch 0 an + // intermediate epoch (4 cycles → no CPU padding rows) with the program + // continuing past it. + let epoch_size = 4; + let epochs = Executor::new(&elf, vec![]) + .unwrap() + .run_epochs(epoch_size) + .unwrap(); + assert!(epochs.len() >= 2); + + // Epoch 0's starting memory/registers are the program-start image; it does + // not halt (is_final=false). + let image = build_initial_image(&elf, &[]); + let register_init = crate::tables::register::register_init_from_entry_point(elf.entry_point); + let mut traces = Traces::from_image_and_logs( + &elf, + &image, + ®ister_init, + &epochs[0].logs, + &MaxRowsConfig::default(), + &[], + false, + false, + #[cfg(feature = "disk-spill")] + stark::storage_mode::StorageMode::Ram, + ) + .unwrap(); + + let proof_options = ProofOptions::default_test_options(); + let table_counts = traces.table_counts(); + let airs = VmAirs::new( + &elf, + &proof_options, + true, + &traces.page_configs, + &table_counts, + None, + false, + None, + None, + None, + ); + + let multi_proof = multi_prove_ram( + airs.air_trace_pairs(&mut traces), + &mut DefaultTranscript::::new(&[]), + ) + .expect("first epoch failed to prove"); + + let mut replay = DefaultTranscript::::new(&[]); + let expected_bus_balance = compute_expected_commit_bus_balance( + &airs.air_refs(), + &multi_proof, + &traces.public_output_bytes, + 0, + &mut replay, + ) + .expect("fingerprint collision in test"); + + assert!( + Verifier::multi_verify( + &airs.air_refs(), + &multi_proof, + &mut DefaultTranscript::::new(&[]), + &expected_bus_balance, + ), + "first epoch (HALT excluded) failed to verify" + ); +} + +/// Prove and verify a NON-first continuation epoch (epoch 1) in isolation. Its +/// starting memory and registers come from epoch 0's boundary snapshot, and it +/// does not terminate (HALT excluded). +#[test] +fn test_prove_second_epoch_from_snapshot() { + use crate::compute_expected_commit_bus_balance; + use crate::tables::register; + use crate::test_utils::asm_elf_bytes; + + let _ = env_logger::builder().is_test(true).try_init(); + let elf_bytes = asm_elf_bytes("arith_8"); + let elf = Elf::load(&elf_bytes).unwrap(); + + // arith_8 is ~10 cycles; epoch_size 4 (power of two) yields epochs 4/4/2, so + // epoch 1 is intermediate (4 cycles → no CPU padding rows). + let epoch_size = 4; + let epochs = Executor::new(&elf, vec![]) + .unwrap() + .run_epochs(epoch_size) + .unwrap(); + assert!(epochs.len() >= 3, "need an intermediate epoch 1"); + + // Epoch 1 starts from epoch 0's ending memory + register snapshot. + let image: std::collections::HashMap = epochs[0].end_memory.iter_bytes().collect(); + let register_init = + register::register_init_from_snapshot(&epochs[0].end_registers, epochs[0].end_pc); + + let mut traces = Traces::from_image_and_logs( + &elf, + &image, + ®ister_init, + &epochs[1].logs, + &MaxRowsConfig::default(), + &[], + false, + false, + #[cfg(feature = "disk-spill")] + stark::storage_mode::StorageMode::Ram, + ) + .unwrap(); + + let proof_options = ProofOptions::default_test_options(); + let table_counts = traces.table_counts(); + // The REGISTER commitment is built from this epoch's boundary register init. + let airs = VmAirs::new( + &elf, + &proof_options, + true, + &traces.page_configs, + &table_counts, + None, + false, + Some(®ister_init), + None, + None, + ); + + let multi_proof = multi_prove_ram( + airs.air_trace_pairs(&mut traces), + &mut DefaultTranscript::::new(&[]), + ) + .expect("second epoch failed to prove"); + + let mut replay = DefaultTranscript::::new(&[]); + let expected_bus_balance = compute_expected_commit_bus_balance( + &airs.air_refs(), + &multi_proof, + &traces.public_output_bytes, + 0, + &mut replay, + ) + .expect("fingerprint collision in test"); + + assert!( + Verifier::multi_verify( + &airs.air_refs(), + &multi_proof, + &mut DefaultTranscript::::new(&[]), + &expected_bus_balance, + ), + "second epoch (register init from snapshot) failed to verify" + ); +} + +/// An epoch proof can COMMIT the local-to-global table inertly — committed +/// columns, but no GlobalMemory bus and no constraints in the epoch proof — and +/// still verify, exposing the L2G commitment root that the final proof (Step 4) +/// will bind to. The cross-epoch GlobalMemory matching is proven separately. +#[test] +fn test_epoch_proof_commits_l2g() { + use crate::compute_expected_commit_bus_balance; + use crate::tables::local_to_global; + use crate::tables::register; + use crate::tables::trace_builder::{build_initial_image, epoch_touched_cells}; + use crate::test_utils::asm_elf_bytes; + use std::collections::HashMap; + + let _ = env_logger::builder().is_test(true).try_init(); + let elf_bytes = asm_elf_bytes("all_loadstore_32"); + let elf = Elf::load(&elf_bytes).unwrap(); + + // Power-of-two epoch size: all_loadstore_32 is ~34 cycles, so epoch_size 8 + // makes epoch 0 an intermediate epoch with no CPU padding rows. + let epoch_size = 8; + let epochs = Executor::new(&elf, vec![]) + .unwrap() + .run_epochs(epoch_size) + .unwrap(); + assert!(epochs.len() >= 2); + + let image = build_initial_image(&elf, &[]); + let register_init = register::register_init_from_entry_point(elf.entry_point); + let mut traces = Traces::from_image_and_logs( + &elf, + &image, + ®ister_init, + &epochs[0].logs, + &MaxRowsConfig::default(), + &[], + false, + false, + #[cfg(feature = "disk-spill")] + stark::storage_mode::StorageMode::Ram, + ) + .unwrap(); + + // Epoch 0's local-to-global trace, committed inertly below. + let register_init0 = register::register_init_from_entry_point(elf.entry_point); + let touched = epoch_touched_cells(&elf, &image, ®ister_init0, &epochs[0].logs).unwrap(); + let initial_memory: HashMap = image.iter().map(|(&a, &v)| (a, v as u64)).collect(); + let boundaries = local_to_global::epoch_boundaries(&initial_memory, &[touched]); + let mut l2g_trace = local_to_global::generate_local_to_global_trace(&boundaries[0]); + + let proof_options = ProofOptions::default_test_options(); + let table_counts = traces.table_counts(); + let airs = VmAirs::new( + &elf, + &proof_options, + true, + &traces.page_configs, + &table_counts, + None, + false, + None, + None, + None, + ); + + // Inert L2G AIR: commits the trace columns, but no bus and no constraints. + let transition_constraints: Vec>> = vec![]; + let inert_l2g_air: AirWithBuses = + AirWithBuses::new( + local_to_global::cols::NUM_COLUMNS, + AuxiliaryTraceBuildData { + interactions: vec![], + }, + &proof_options, + 1, + transition_constraints, + ); + + let mut pairs = airs.air_trace_pairs(&mut traces); + pairs.push((&inert_l2g_air, &mut l2g_trace, &())); + + let multi_proof = multi_prove_ram(pairs, &mut DefaultTranscript::::new(&[])) + .expect("epoch proof with inert L2G failed to prove"); + + let mut refs = airs.air_refs(); + refs.push(&inert_l2g_air); + + let mut replay = DefaultTranscript::::new(&[]); + let expected_bus_balance = compute_expected_commit_bus_balance( + &refs, + &multi_proof, + &traces.public_output_bytes, + 0, + &mut replay, + ) + .expect("fingerprint collision in test"); + + assert!( + Verifier::multi_verify( + &refs, + &multi_proof, + &mut DefaultTranscript::::new(&[]), + &expected_bus_balance, + ), + "epoch proof with inert L2G failed to verify" + ); + + // The L2G table (pushed last) is committed: its Merkle root is exposed and + // non-zero — this is the `R_i` the final proof will be bound to in Step 4. + let l2g_root = multi_proof + .proofs + .last() + .unwrap() + .lde_trace_main_merkle_root; + assert_ne!( + l2g_root, [0u8; 32], + "L2G commitment root should be non-zero" + ); +} + +/// End-to-end continuation pipeline over a real ELF: split execution into epochs, +/// prove+verify each epoch (each committing its local-to-global table inertly and +/// exposing a root R_i), prove the cross-epoch GlobalMemory bus balances over the +/// real per-epoch boundaries, and finally bind the cross-epoch proof to the REAL +/// per-epoch roots. The R_i collected from the independent epoch proofs equal the +/// per-epoch L2G sub-table roots in the cross-epoch proof — that root equality is +/// the shared-commitment linkage between the epoch proofs and the global memory +/// argument. +#[test] +fn test_continuation_pipeline_end_to_end() { + use crate::compute_expected_commit_bus_balance; + use crate::tables::local_to_global; + use crate::tables::register; + use crate::tables::trace_builder::{build_initial_image, epoch_touched_cells}; + use crate::test_utils::asm_elf_bytes; + use std::collections::HashMap; + + let _ = env_logger::builder().is_test(true).try_init(); + let elf_bytes = asm_elf_bytes("all_loadstore_32"); + let elf = Elf::load(&elf_bytes).unwrap(); + + // Split execution into power-of-two epochs (all_loadstore_32 is ~34 cycles, so + // epoch_size 8 gives intermediate epochs with no CPU padding rows). + let epoch_size = 8; + let epochs = Executor::new(&elf, vec![]) + .unwrap() + .run_epochs(epoch_size) + .unwrap(); + assert!(epochs.len() >= 2); + + let image0 = build_initial_image(&elf, &[]); + let initial_memory: HashMap = image0.iter().map(|(&a, &v)| (a, v as u64)).collect(); + + // Pass 1: each epoch's starting state + the cells it touches. Epoch 0 starts + // from the program image; epoch i>0 from epoch i-1's boundary snapshot. + let mut images: Vec> = Vec::with_capacity(epochs.len()); + let mut register_inits: Vec> = Vec::with_capacity(epochs.len()); + let mut all_touched: Vec> = Vec::with_capacity(epochs.len()); + for (i, epoch) in epochs.iter().enumerate() { + let (image_i, register_init_i) = if i == 0 { + ( + image0.clone(), + register::register_init_from_entry_point(elf.entry_point), + ) + } else { + let image_i: HashMap = epochs[i - 1].end_memory.iter_bytes().collect(); + let register_init_i = register::register_init_from_snapshot( + &epochs[i - 1].end_registers, + epochs[i - 1].end_pc, + ); + (image_i, register_init_i) + }; + let touched_i = epoch_touched_cells(&elf, &image_i, ®ister_init_i, &epoch.logs).unwrap(); + images.push(image_i); + register_inits.push(register_init_i); + all_touched.push(touched_i); + } + let boundaries = local_to_global::epoch_boundaries(&initial_memory, &all_touched); + + let proof_options = ProofOptions::default_test_options(); + + // Pass 2: prove+verify each epoch, committing boundaries[i] inertly, and + // collect the L2G commitment root each epoch proof exposes. + let mut epoch_roots = Vec::with_capacity(epochs.len()); + for (i, epoch) in epochs.iter().enumerate() { + let is_final = i == epochs.len() - 1; + let mut traces = Traces::from_image_and_logs( + &elf, + &images[i], + ®ister_inits[i], + &epoch.logs, + &MaxRowsConfig::default(), + &[], + is_final, + false, + #[cfg(feature = "disk-spill")] + stark::storage_mode::StorageMode::Ram, + ) + .unwrap(); + + let table_counts = traces.table_counts(); + let register_init_arg = if i == 0 { + None + } else { + Some(register_inits[i].as_slice()) + }; + let airs = VmAirs::new( + &elf, + &proof_options, + true, + &traces.page_configs, + &table_counts, + None, + is_final, + register_init_arg, + None, + None, + ); + + let mut l2g_trace = local_to_global::generate_local_to_global_trace(&boundaries[i]); + let transition_constraints: Vec>> = vec![]; + let inert_l2g_air: AirWithBuses = + AirWithBuses::new( + local_to_global::cols::NUM_COLUMNS, + AuxiliaryTraceBuildData { + interactions: vec![], + }, + &proof_options, + 1, + transition_constraints, + ); + + let mut pairs = airs.air_trace_pairs(&mut traces); + pairs.push((&inert_l2g_air, &mut l2g_trace, &())); + let multi_proof = multi_prove_ram(pairs, &mut DefaultTranscript::::new(&[])) + .expect("epoch proof failed to prove"); + + let mut refs = airs.air_refs(); + refs.push(&inert_l2g_air); + let mut replay = DefaultTranscript::::new(&[]); + let expected_bus_balance = compute_expected_commit_bus_balance( + &refs, + &multi_proof, + &traces.public_output_bytes, + 0, + &mut replay, + ) + .expect("fingerprint collision in test"); + assert!( + Verifier::multi_verify( + &refs, + &multi_proof, + &mut DefaultTranscript::::new(&[]), + &expected_bus_balance, + ), + "epoch {i} failed to verify" + ); + + epoch_roots.push( + multi_proof + .proofs + .last() + .unwrap() + .lde_trace_main_merkle_root, + ); + } + + // The cross-epoch GlobalMemory bus balances over the real per-epoch boundaries. + assert!( + crate::tests::local_to_global_bus_tests::prove_and_verify(&boundaries), + "final GlobalMemory bus must balance over real epoch data" + ); + + // The cross-epoch proof is bound to the REAL per-epoch roots: the L2G root each + // epoch proof exposed equals the per-epoch L2G sub-table root in the final proof. + let final_proof = crate::tests::local_to_global_bus_tests::prove_global(&boundaries); + assert!( + crate::verify_l2g_commitment_binding(&epoch_roots, &final_proof), + "final proof must be bound to the real per-epoch L2G roots" + ); +} + +/// A continuation epoch built with `l2g_memory_bookend = true` proves and verifies: +/// PAGE no longer bookends the touched RAM bytes (they self-cancel), and the +/// local-to-global table provides their `Memory`-bus init/fini instead. The epoch +/// `Memory` bus still nets to zero — L2G has replaced PAGE as the bookend. +#[test] +fn test_epoch_memory_bus_with_l2g_bookend() { + use crate::compute_expected_commit_bus_balance; + use crate::tables::local_to_global; + use crate::tables::register; + use crate::tables::trace_builder::build_initial_image; + use crate::test_utils::asm_elf_bytes; + use std::collections::HashMap; + + let _ = env_logger::builder().is_test(true).try_init(); + let elf_bytes = asm_elf_bytes("all_loadstore_32"); + let elf = Elf::load(&elf_bytes).unwrap(); + + // Power-of-two epoch size: all_loadstore_32 is ~34 cycles, so epoch_size 8 + // makes epoch 0 an intermediate epoch with no CPU padding rows. + let epoch_size = 8; + let epochs = Executor::new(&elf, vec![]) + .unwrap() + .run_epochs(epoch_size) + .unwrap(); + assert!(epochs.len() >= 2); + + // Epoch 0 starts from the program image; build it with the L2G memory bookend. + let image = build_initial_image(&elf, &[]); + let register_init = register::register_init_from_entry_point(elf.entry_point); + let mut traces = Traces::from_image_and_logs( + &elf, + &image, + ®ister_init, + &epochs[0].logs, + &MaxRowsConfig::default(), + &[], + false, + true, + #[cfg(feature = "disk-spill")] + stark::storage_mode::StorageMode::Ram, + ) + .unwrap(); + let initial_memory: HashMap = image.iter().map(|(&a, &v)| (a, v as u64)).collect(); + let boundaries = + local_to_global::epoch_boundaries(&initial_memory, &[traces.touched_memory_cells.clone()]); + traces.local_to_global = local_to_global::generate_local_to_global_trace(&boundaries[0]); + + let proof_options = ProofOptions::default_test_options(); + let table_counts = traces.table_counts(); + let airs = VmAirs::new( + &elf, + &proof_options, + true, + &traces.page_configs, + &table_counts, + None, + false, + None, + None, + None, + ); + + // L2G air on the epoch-local Memory bus (the bookend that replaces PAGE). + let transition_constraints: Vec>> = vec![]; + let l2g_air: AirWithBuses = + AirWithBuses::new( + local_to_global::cols::NUM_COLUMNS, + AuxiliaryTraceBuildData { + interactions: local_to_global::memory_bus_interactions(), + }, + &proof_options, + 1, + transition_constraints, + ); + + // Take the L2G trace out of `traces` so `air_trace_pairs` can borrow the rest. + let mut l2g_trace = std::mem::replace( + &mut traces.local_to_global, + local_to_global::generate_local_to_global_trace(&[]), + ); + + let mut pairs = airs.air_trace_pairs(&mut traces); + pairs.push((&l2g_air, &mut l2g_trace, &())); + let multi_proof = multi_prove_ram(pairs, &mut DefaultTranscript::::new(&[])) + .expect("epoch with L2G memory bookend failed to prove"); + + let mut refs = airs.air_refs(); + refs.push(&l2g_air); + let mut replay = DefaultTranscript::::new(&[]); + let expected_bus_balance = compute_expected_commit_bus_balance( + &refs, + &multi_proof, + &traces.public_output_bytes, + 0, + &mut replay, + ) + .expect("fingerprint collision in test"); + + assert!( + Verifier::multi_verify( + &refs, + &multi_proof, + &mut DefaultTranscript::::new(&[]), + &expected_bus_balance, + ), + "epoch Memory bus must balance with L2G bookend + PAGE excluding touched cells" + ); +} diff --git a/prover/src/tests/register_tests.rs b/prover/src/tests/register_tests.rs index 1baa55eda..433968ab5 100644 --- a/prover/src/tests/register_tests.rs +++ b/prover/src/tests/register_tests.rs @@ -1,5 +1,7 @@ //! Tests for the REGISTER table. +use stark::proof::options::ProofOptions; + use crate::tables::register::*; use crate::tables::types::*; @@ -17,7 +19,7 @@ fn test_register_base_address() { fn test_generate_register_trace_empty() { let entry_point = 0x1000u64; let final_state = FinalRegisterStateMap::new(); - let trace = generate_register_trace(&final_state, entry_point); + let trace = generate_register_trace(&final_state, ®ister_init_from_entry_point(entry_point)); // Should have power-of-2 rows >= 67 (x0-x31, x254, x255) assert!(trace.num_rows() >= NUM_REGISTER_ADDRESSES); @@ -66,7 +68,7 @@ fn test_generate_register_trace_with_access() { }, ); - let trace = generate_register_trace(&final_state, entry_point); + let trace = generate_register_trace(&final_state, ®ister_init_from_entry_point(entry_point)); // Row 10 (address 10) should have the final state assert_eq!(*trace.main_table.get(10, cols::OFFSET), FE::from(10u64)); @@ -83,3 +85,51 @@ fn test_bus_interactions() { let interactions = bus_interactions(); assert_eq!(interactions.len(), 2); // C1, C2 } + +#[test] +fn test_fini_from_trace_reads_every_register() { + let mut final_state = FinalRegisterStateMap::new(); + final_state.insert( + register_base_address(5), // addr 10 + FinalRegisterWordState { + timestamp: 100, + value: 0x42, + }, + ); + let trace = generate_register_trace(&final_state, ®ister_init_from_entry_point(0x1000)); + + let fini = fini_from_trace(&trace); + // One entry per real register Word address, in register_word_address_list order + // (positions 0..63 are addresses 0..63, so addr a is at index a for a < 64). + assert_eq!(fini.len(), NUM_REGISTER_ADDRESSES); + // Accessed register reflects its written value; PC (x255, addr 510 = index 65) + // reflects entry point; x0 (addr 0 = index 0) ends at 0. + assert_eq!(fini[10], 0x42); + assert_eq!(fini[65], 0x1000); + assert_eq!(fini[0], 0); +} + +#[test] +fn test_precomputed_commitment_with_fini_binds_fini() { + let opts = ProofOptions::default_test_options(); + let init = register_init_from_entry_point(0x1000); + // Fini vectors in register_word_address_list order (index = address for a < 64). + let mut fini_a = vec![0u32; NUM_REGISTER_ADDRESSES]; + fini_a[10] = 0x42; + fini_a[12] = 7; + let mut fini_b = fini_a.clone(); + fini_b[10] = 0x43; // a different final value at one address + + let root_a = compute_precomputed_commitment_with_fini(&opts, &init, &fini_a); + let root_a2 = compute_precomputed_commitment_with_fini(&opts, &init, &fini_a); + let root_b = compute_precomputed_commitment_with_fini(&opts, &init, &fini_b); + + // Deterministic, and sensitive to every fini value: a tampered R_{i+1} yields a + // different preprocessed root, so a trace whose FINI != the committed R_{i+1} + // fails the verifier's preprocessed-root check. + assert_eq!(root_a, root_a2); + assert_ne!(root_a, root_b); + + // The 3-column (with-fini) commitment differs from the 2-column monolithic one. + assert_ne!(root_a, compute_precomputed_commitment(&opts, &init)); +} diff --git a/prover/src/tests/statement_tests.rs b/prover/src/tests/statement_tests.rs index 55ac5a15b..73944e262 100644 --- a/prover/src/tests/statement_tests.rs +++ b/prover/src/tests/statement_tests.rs @@ -3,7 +3,7 @@ use crypto::fiat_shamir::default_transcript::DefaultTranscript; use crypto::fiat_shamir::is_transcript::IsTranscript; -use crate::statement::absorb_statement; +use crate::statement::{StatementKind, absorb_continuation_global_statement, absorb_statement}; use crate::test_utils::E; use crate::{RuntimePageRange, TableCounts}; @@ -47,7 +47,15 @@ fn state_after_absorb( ranges: &[RuntimePageRange], ) -> [u8; 32] { let mut t = DefaultTranscript::::new(&[]); - absorb_statement(&mut t, elf, out, counts, priv_pages, ranges); + absorb_statement( + &mut t, + StatementKind::Monolithic, + elf, + out, + counts, + priv_pages, + ranges, + ); t.state() } @@ -120,3 +128,51 @@ fn public_output_length_prefix_prevents_collision() { state_after_absorb(b"elf", b"\x41", &counts_b, 0, &[]), ); } + +fn epoch_state(elf: &[u8], label: u64) -> [u8; 32] { + let mut t = DefaultTranscript::::new(&[]); + absorb_statement( + &mut t, + StatementKind::ContinuationEpoch { epoch_label: label }, + elf, + b"out", + &sample_counts(), + 1, + &sample_ranges(), + ); + t.state() +} + +#[test] +fn continuation_epoch_state_binds_label_and_program() { + let baseline = epoch_state(b"elf", 1); + // Deterministic. + assert_eq!(baseline, epoch_state(b"elf", 1)); + // Pinned to the epoch's position: a different label diverges (replay across + // positions is rejected). + assert_ne!(baseline, epoch_state(b"elf", 2), "must bind epoch_label"); + // Pinned to the program. + assert_ne!(baseline, epoch_state(b"other-elf", 1), "must bind the ELF"); +} + +#[test] +fn continuation_epoch_differs_from_monolithic_statement() { + // A monolithic proof and a continuation epoch proof must never share a + // transcript seed, even with the same base statement. + let monolithic = state_after_absorb(b"elf", b"out", &sample_counts(), 1, &sample_ranges()); + assert_ne!(monolithic, epoch_state(b"elf", 1)); +} + +fn global_state(elf: &[u8], num_epochs: usize) -> [u8; 32] { + let mut t = DefaultTranscript::::new(&[]); + absorb_continuation_global_statement(&mut t, elf, num_epochs); + t.state() +} + +#[test] +fn continuation_global_state_binds_program_and_epoch_count() { + let baseline = global_state(b"elf", 3); + assert_eq!(baseline, global_state(b"elf", 3)); // deterministic + assert_ne!(baseline, global_state(b"elf", 4), "must bind epoch count"); + assert_ne!(baseline, global_state(b"other-elf", 3), "must bind the ELF"); +} diff --git a/prover/src/tests/trace_builder_tests.rs b/prover/src/tests/trace_builder_tests.rs index b3c1e1514..b23da43bf 100644 --- a/prover/src/tests/trace_builder_tests.rs +++ b/prover/src/tests/trace_builder_tests.rs @@ -823,3 +823,278 @@ mod routing_tests { ); } } + +/// `from_image_and_logs` is a faithful generalization of `from_elf_and_logs`: +/// fed the ELF-derived image, it must produce identical traces. +#[test] +fn test_from_image_and_logs_matches_from_elf_and_logs() { + use crate::tables::MaxRowsConfig; + use crate::tables::trace_builder::build_initial_image; + use crate::test_utils::asm_elf_bytes; + use executor::elf::Elf; + use executor::vm::execution::Executor; + + let elf_bytes = asm_elf_bytes("basic_program"); + let program = Elf::load(&elf_bytes).unwrap(); + let logs = Executor::new(&program, vec![]).unwrap().run().unwrap().logs; + let max_rows = MaxRowsConfig::default(); + + let from_elf = Traces::from_elf_and_logs( + &program, + &logs, + &max_rows, + &[], + #[cfg(feature = "disk-spill")] + stark::storage_mode::StorageMode::Ram, + ) + .unwrap(); + + let image = build_initial_image(&program, &[]); + let register_init = + crate::tables::register::register_init_from_entry_point(program.entry_point); + let from_image = Traces::from_image_and_logs( + &program, + &image, + ®ister_init, + &logs, + &max_rows, + &[], + true, + false, + #[cfg(feature = "disk-spill")] + stark::storage_mode::StorageMode::Ram, + ) + .unwrap(); + + assert_eq!( + from_elf.total_field_elements(), + from_image.total_field_elements() + ); + assert_eq!( + format!("{:?}", from_elf.table_counts()), + format!("{:?}", from_image.table_counts()) + ); +} + +/// A memory snapshot at an epoch boundary converts into a non-empty initial +/// image (the input `from_image_and_logs` consumes for the next epoch). +#[test] +fn test_epoch_end_memory_converts_to_image() { + use crate::test_utils::asm_elf_bytes; + use executor::elf::Elf; + use executor::vm::execution::Executor; + use std::collections::HashMap; + + let elf_bytes = asm_elf_bytes("basic_program"); + let program = Elf::load(&elf_bytes).unwrap(); + + let total = Executor::new(&program, vec![]) + .unwrap() + .run() + .unwrap() + .logs + .len(); + let epoch_size = (total / 3).max(1); + let epochs = Executor::new(&program, vec![]) + .unwrap() + .run_epochs(epoch_size) + .unwrap(); + assert!(epochs.len() >= 2); + + let image: HashMap = epochs[0].end_memory.iter_bytes().collect(); + assert!(!image.is_empty()); +} + +/// Every epoch builds traces: intermediate epochs (`is_final = false`) skip HALT +/// and start from the previous epoch's memory; the last epoch terminates. +#[test] +fn test_build_traces_for_all_epochs() { + use crate::tables::MaxRowsConfig; + use crate::tables::trace_builder::build_initial_image; + use crate::test_utils::asm_elf_bytes; + use executor::elf::Elf; + use executor::vm::execution::Executor; + use std::collections::HashMap; + + let elf_bytes = asm_elf_bytes("basic_program"); + let program = Elf::load(&elf_bytes).unwrap(); + + let total = Executor::new(&program, vec![]) + .unwrap() + .run() + .unwrap() + .logs + .len(); + let epoch_size = (total / 3).max(1); + let epochs = Executor::new(&program, vec![]) + .unwrap() + .run_epochs(epoch_size) + .unwrap(); + assert!(epochs.len() >= 2); + + let max_rows = MaxRowsConfig::default(); + let last = epochs.len() - 1; + + for (i, epoch) in epochs.iter().enumerate() { + // Epoch 0 starts from the program-start image; later epochs from the + // previous epoch's ending memory + register snapshot. + let (image, register_init): (HashMap, Vec) = if i == 0 { + ( + build_initial_image(&program, &[]), + crate::tables::register::register_init_from_entry_point(program.entry_point), + ) + } else { + ( + epochs[i - 1].end_memory.iter_bytes().collect(), + crate::tables::register::register_init_from_snapshot( + &epochs[i - 1].end_registers, + epochs[i - 1].end_pc, + ), + ) + }; + + let traces = Traces::from_image_and_logs( + &program, + &image, + ®ister_init, + &epoch.logs, + &max_rows, + &[], + i == last, + false, + #[cfg(feature = "disk-spill")] + stark::storage_mode::StorageMode::Ram, + ) + .unwrap_or_else(|e| panic!("epoch {i} (is_final={}) failed to build: {e:?}", i == last)); + + assert!( + traces.table_counts().cpu > 0, + "epoch {i} produced an empty CPU trace" + ); + } +} + +/// A non-final epoch carrying the program-terminating instruction is rejected +/// (rather than silently producing an unverifiable proof). +#[test] +fn test_terminating_epoch_rejected_when_not_final() { + use crate::tables::MaxRowsConfig; + use crate::tables::register::register_init_from_snapshot; + use crate::test_utils::asm_elf_bytes; + use executor::elf::Elf; + use executor::vm::execution::Executor; + use std::collections::HashMap; + + let elf_bytes = asm_elf_bytes("basic_program"); + let program = Elf::load(&elf_bytes).unwrap(); + + let total = Executor::new(&program, vec![]) + .unwrap() + .run() + .unwrap() + .logs + .len(); + let epoch_size = (total / 3).max(1); + let epochs = Executor::new(&program, vec![]) + .unwrap() + .run_epochs(epoch_size) + .unwrap(); + assert!(epochs.len() >= 2); + + // The last epoch holds the terminating instruction; building it as a + // non-final epoch (is_final = false) must error. + let last = epochs.len() - 1; + let image: HashMap = epochs[last - 1].end_memory.iter_bytes().collect(); + let register_init = + register_init_from_snapshot(&epochs[last - 1].end_registers, epochs[last - 1].end_pc); + + let result = Traces::from_image_and_logs( + &program, + &image, + ®ister_init, + &epochs[last].logs, + &MaxRowsConfig::default(), + &[], + false, + false, + #[cfg(feature = "disk-spill")] + stark::storage_mode::StorageMode::Ram, + ); + + assert!( + matches!(result, Err(crate::Error::HaltInNonFinalEpoch)), + "expected HaltInNonFinalEpoch error for a non-final terminating epoch" + ); +} + +/// End to end: extract real per-epoch touched cells from execution, feed them +/// through the local-to-global boundary logic, and render each epoch's trace. +#[test] +fn test_local_to_global_traces_from_real_execution() { + use crate::tables::local_to_global::{epoch_boundaries, generate_local_to_global_trace}; + use crate::tables::trace_builder::{build_initial_image, epoch_touched_cells}; + use crate::test_utils::asm_elf_bytes; + use executor::elf::Elf; + use executor::vm::execution::Executor; + use std::collections::HashMap; + + // A program that exercises memory (loads/stores), so some cells are touched. + let elf_bytes = asm_elf_bytes("all_loadstore_32"); + let program = Elf::load(&elf_bytes).unwrap(); + + let total = Executor::new(&program, vec![]) + .unwrap() + .run() + .unwrap() + .logs + .len(); + let epoch_size = (total / 3).max(1); + let epochs = Executor::new(&program, vec![]) + .unwrap() + .run_epochs(epoch_size) + .unwrap(); + assert!(epochs.len() >= 2); + + let elf_image = build_initial_image(&program, &[]); + let total_memory = elf_image.len(); + + // Per-epoch touched cells from real execution (epoch 0 from the ELF image, + // later epochs from the previous epoch's ending memory). + let mut per_epoch_touches: Vec> = Vec::new(); + for (i, epoch) in epochs.iter().enumerate() { + let image: HashMap = if i == 0 { + elf_image.clone() + } else { + epochs[i - 1].end_memory.iter_bytes().collect() + }; + let register_init = if i == 0 { + crate::tables::register::register_init_from_entry_point(program.entry_point) + } else { + crate::tables::register::register_init_from_snapshot( + &epochs[i - 1].end_registers, + epochs[i - 1].end_pc, + ) + }; + per_epoch_touches + .push(epoch_touched_cells(&program, &image, ®ister_init, &epoch.logs).unwrap()); + } + + // The program touches memory somewhere, and every per-epoch touched set is + // sparse (far smaller than the whole memory image). + let total_touched: usize = per_epoch_touches.iter().map(Vec::len).sum(); + assert!(total_touched > 0); + for touched in &per_epoch_touches { + assert!(touched.len() < total_memory); + } + + // Boundary claims + rendered L2G trace per epoch. + let initial_memory: HashMap = + elf_image.iter().map(|(&a, &v)| (a, v as u64)).collect(); + let boundaries = epoch_boundaries(&initial_memory, &per_epoch_touches); + + for (i, boundary_set) in boundaries.iter().enumerate() { + let trace = generate_local_to_global_trace(boundary_set); + let expected_rows = per_epoch_touches[i].len().next_power_of_two().max(1); + assert_eq!(trace.num_rows(), expected_rows); + } +}