diff --git a/.github/workflows/pr_main.yaml b/.github/workflows/pr_main.yaml
index bc0560acb..ae675d770 100644
--- a/.github/workflows/pr_main.yaml
+++ b/.github/workflows/pr_main.yaml
@@ -102,21 +102,43 @@ jobs:
           cargo test --release -p executor test_ethrex -- --ignored
           cargo test --release -p executor test_ckzg -- --ignored
 
+  test-cli:
+    name: CLI tests
+    runs-on: ubuntu-latest
+    if: github.event_name != 'push' || github.actor != 'github-merge-queue[bot]'
+    steps:
+      - name: Checkout sources
+        uses: actions/checkout@v4
+
+      - name: Setup Rust Environment
+        uses: ./.github/actions/setup-rust
+
+      - name: Cache cargo build artifacts
+        uses: Swatinem/rust-cache@v2
+        with:
+          shared-key: "lambda-vm-cli-test"
+          cache-all-crates: "true"
+
+      - name: Run CLI tests
+        run: cargo test -p cli
+
   # "Test" is a required check — keep this name to avoid branch protection changes.
-  # This gate job passes only when executor tests AND all prover shards succeed.
+  # This gate job passes only when CLI, executor, disk-spill, and prover tests succeed.
   test:
     name: Test
     if: always()
-    needs: [test-executor, test-prover, test-disk-spill]
+    needs: [test-executor, test-cli, test-prover, test-disk-spill]
     runs-on: ubuntu-latest
     steps:
       - name: Check results
         run: |
           executor="${{ needs.test-executor.result }}"
+          cli="${{ needs.test-cli.result }}"
           prover="${{ needs.test-prover.result }}"
           disk_spill="${{ needs.test-disk-spill.result }}"
 
           echo "test-executor: $executor"
+          echo "test-cli: $cli"
           echo "test-prover: $prover"
           echo "test-disk-spill: $disk_spill"
 
@@ -124,6 +146,9 @@ jobs:
           if [[ "$executor" != "success" && "$executor" != "skipped" ]]; then
             exit 1
           fi
+          if [[ "$cli" != "success" && "$cli" != "skipped" ]]; then
+            exit 1
+          fi
           if [[ "$prover" != "success" && "$prover" != "skipped" ]]; then
             exit 1
           fi
diff --git a/bin/cli/README.md b/bin/cli/README.md
index c784ff6c7..bc5eb9d53 100644
--- a/bin/cli/README.md
+++ b/bin/cli/README.md
@@ -57,8 +57,10 @@ cargo run -p cli --release -- prove <PROGRAM.elf> -o proof.bin [flags]
 | `--private-input <FILE>` | Pass private input bytes to the guest. |
 | `--blowup <N>` | FRI blowup factor (power of 2). Higher = fewer queries, smaller proof, slower proving. [default: 2] |
 | `--time` | Print total proving time. |
-| `--cycles` | Run one extra pre-pass outside the timer and print the dynamic instruction count. |
-| `--elements` | Build traces and print main-trace and aux-trace field element counts. |
+| `--cycles` | Run one extra execution outside the timer and print the dynamic instruction count. Also supported with `--continuations`. |
+| `--elements` | Build traces and print main-trace and aux-trace field element counts. Monolithic proving only; conflicts with `--continuations`. |
+| `--continuations` | Prove as a continuation bundle split into fixed-size epochs. |
+| `--epoch-size-log2 <N>` | Continuation epoch size as `2^N` cycles. Requires `--continuations`. Defaults to `20`; values below `18` are rejected. |
 
 ### Verify
 
@@ -72,8 +74,10 @@ cargo run -p cli --release -- verify <PROOF> <PROGRAM.elf> [flags]
 |---|---|
 | `--blowup <N>` | FRI blowup factor used during proving. Must match. [default: 2] |
 | `--time` | Print verification time. |
+| `--continuations` | Verify a continuation proof bundle produced by `prove --continuations`. |
 
-Returns exit code `0` on successful verification, `1` on failure.
+Returns exit code `0` on successful verification, `1` on failure. `--blowup` must
+match the value used during proving.
 
 ### Count Elements
 
@@ -96,10 +100,29 @@ cargo run -p cli --release -- execute executor/program_artifacts/asm/add.elf
 cargo run -p cli --release -- prove executor/program_artifacts/asm/add.elf -o /tmp/proof.bin
 cargo run -p cli --release -- verify /tmp/proof.bin executor/program_artifacts/asm/add.elf
 
+# Generate and verify a continuation proof
+cargo run -p cli --release -- prove program.elf -o /tmp/cont.bin --continuations --epoch-size-log2 20
+cargo run -p cli --release -- verify /tmp/cont.bin program.elf --continuations
+
+# Generate a continuation proof and print total dynamic instruction count
+cargo run -p cli --release -- prove program.elf -o /tmp/cont.bin --continuations --cycles
+
 # Prove with private input and print metrics
 cargo run -p cli --release -- prove program.elf -o /tmp/proof.bin --private-input input.bin --time --cycles
 ```
 
+For continuation proofs, `--epoch-size-log2` is the power in `2^N` cycles. Larger
+values reduce epoch count and fixed per-epoch overhead, but increase peak memory.
+As rough ethrex 10-transfer distinct-account reference points from a local sweep:
+`19` used about 6.9 GB peak heap, `20` about 9.5 GB, `21` about 15.8 GB, and `22`
+about 26.8 GB. For a new workload, use the highest value the machine can run
+without swapping.
+
+Continuation proof bundles are self-contained for standalone verification. When
+`--private-input` is used, the serialized continuation proof includes the raw
+private input bytes so the verifier can rebuild the genesis memory commitment.
+Do not treat continuation proof files as confidential-input hiding artifacts.
+
 ## Guest Program Flamegraphs
 
 Generate flamegraphs showing where the guest RISC-V program spends its execution time (by instruction count).
diff --git a/bin/cli/src/main.rs b/bin/cli/src/main.rs
index 5c9719650..2b053755c 100644
--- a/bin/cli/src/main.rs
+++ b/bin/cli/src/main.rs
@@ -18,6 +18,9 @@ use executor::{
 use prover::VmProof;
 use stark::proof::options::GoldilocksCubicProofOptions;
 
+const DEFAULT_CONTINUATION_EPOCH_SIZE_LOG2: u32 = 20;
+const MIN_CONTINUATION_EPOCH_SIZE_LOG2: u32 = 18;
+
 /// Polls jemalloc `stats.allocated` every 10ms from a background thread,
 /// tracking the high-water mark. Near-zero overhead because jemalloc uses
 /// thread-local caches — `epoch::advance()` just merges cached counters.
@@ -130,20 +133,34 @@ enum Commands {
 
         /// Blowup factor (power of 2). Higher = fewer queries, smaller proof, slower proving.
         #[arg(long, default_value = "2")]
-        blowup: Option<u8>,
+        blowup: u8,
 
         /// Print proving time
         #[arg(long)]
         time: bool,
 
-        /// Execute one pre-pass outside the timer and print dynamic instruction count
+        /// Execute once outside the timer and print dynamic instruction count
         #[arg(long)]
         cycles: bool,
 
         /// Build traces and print total main-trace field elements (rows × columns summed across
         /// all tables) and aux-trace field elements (committed EF columns × rows)
-        #[arg(long)]
+        #[arg(long, conflicts_with = "continuations")]
         elements: bool,
+
+        /// Prove with continuations (split execution into epochs; flat peak memory)
+        #[arg(long)]
+        continuations: bool,
+
+        /// Continuation epoch size as log2(cycles); e.g. 20 means 1,048,576 cycles.
+        #[arg(
+            long,
+            value_name = "N",
+            requires = "continuations",
+            value_parser = parse_epoch_size_log2,
+            long_help = "Continuation epoch size as log2(cycles); e.g. 20 means 1,048,576 cycles.\n\nDefault when omitted: 20. Values below 18 are rejected for the CLI because tiny epochs are dominated by fixed overhead. Indicative ethrex 10-transfer distinct-account peak heap from a local sweep: 19 ~= 6.9 GB, 20 ~= 9.5 GB, 21 ~= 15.8 GB, 22 ~= 26.8 GB. Higher values reduce epoch count, continuation bundle size, and fixed per-epoch overhead, but increase peak memory. For a new workload, try the highest value your machine can run without swapping."
+        )]
+        epoch_size_log2: Option<u32>,
     },
 
     /// Verify a proof bundle
@@ -158,11 +175,15 @@ enum Commands {
 
         /// Blowup factor used during proving (must match)
         #[arg(long, default_value = "2")]
-        blowup: Option<u8>,
+        blowup: u8,
 
         /// Print verification time
         #[arg(long)]
         time: bool,
+
+        /// Verify a continuation proof bundle (produced by `prove --continuations`)
+        #[arg(long)]
+        continuations: bool,
     },
 
     /// Count main-trace and aux-trace field elements without proving
@@ -196,13 +217,36 @@ fn main() -> ExitCode {
             time,
             cycles,
             elements,
-        } => cmd_prove(elf, output, private_input, blowup, time, cycles, elements),
+            continuations,
+            epoch_size_log2,
+        } => {
+            if continuations {
+                cmd_prove_continuation(
+                    elf,
+                    output,
+                    private_input,
+                    epoch_size_log2,
+                    blowup,
+                    time,
+                    cycles,
+                )
+            } else {
+                cmd_prove(elf, output, private_input, blowup, time, cycles, elements)
+            }
+        }
         Commands::Verify {
             proof,
             elf,
             blowup,
             time,
-        } => cmd_verify(proof, elf, blowup, time),
+            continuations,
+        } => {
+            if continuations {
+                cmd_verify_continuation(proof, elf, blowup, time)
+            } else {
+                cmd_verify(proof, elf, blowup, time)
+            }
+        }
         Commands::CountElements { elf, private_input } => cmd_count_elements(elf, private_input),
     }
 }
@@ -217,6 +261,17 @@ fn read_private_input(path: Option<&PathBuf>) -> Result<Vec<u8>, String> {
     }
 }
 
+fn count_cycles(elf_data: &[u8], private_inputs: &[u8]) -> Result<u64, String> {
+    let program =
+        Elf::load(elf_data).map_err(|e| format!("Failed to load ELF for cycle count: {e:?}"))?;
+    let executor = Executor::new(&program, private_inputs.to_vec())
+        .map_err(|e| format!("Failed to create executor for cycle count: {e:?}"))?;
+    executor
+        .run()
+        .map(|result| result.logs.len() as u64)
+        .map_err(|e| format!("Execution failed during cycle count: {e:?}"))
+}
+
 fn cmd_execute(
     elf_path: PathBuf,
     private_input_path: Option<PathBuf>,
@@ -324,7 +379,7 @@ fn cmd_prove(
     elf_path: PathBuf,
     output_path: PathBuf,
     private_input_path: Option<PathBuf>,
-    blowup: Option<u8>,
+    blowup: u8,
     time: bool,
     cycles: bool,
     elements: bool,
@@ -350,24 +405,10 @@ fn cmd_prove(
     // Mirrors SP1's cycle-count pass so both provers report the same kind of
     // number without inflating the measured proving time.
     let cycle_count = if cycles {
-        let program = match Elf::load(&elf_data) {
-            Ok(p) => p,
+        match count_cycles(&elf_data, &private_inputs) {
+            Ok(count) => Some(count),
             Err(e) => {
-                eprintln!("Failed to load ELF for cycle count: {:?}", e);
-                return ExitCode::FAILURE;
-            }
-        };
-        let executor = match Executor::new(&program, private_inputs.clone()) {
-            Ok(e) => e,
-            Err(e) => {
-                eprintln!("Failed to create executor for cycle count: {:?}", e);
-                return ExitCode::FAILURE;
-            }
-        };
-        match executor.run() {
-            Ok(result) => Some(result.logs.len() as u64),
-            Err(e) => {
-                eprintln!("Execution failed during cycle count: {:?}", e);
+                eprintln!("{e}");
                 return ExitCode::FAILURE;
             }
         }
@@ -398,31 +439,23 @@ fn cmd_prove(
     });
 
     let start = Instant::now();
-    let proof = match blowup {
-        Some(b) => {
-            let opts = match GoldilocksCubicProofOptions::with_blowup(b) {
-                Ok(opts) => opts,
-                Err(e) => {
-                    eprintln!("Invalid proof options: {e}");
-                    return ExitCode::FAILURE;
-                }
-            };
-            eprintln!(
-                "Generating proof (blowup={b}, queries={})...",
-                opts.fri_number_of_queries
-            );
-            prover::prove_with_options_and_inputs(
-                &elf_data,
-                &private_inputs,
-                &opts,
-                &Default::default(),
-            )
-        }
-        None => {
-            eprintln!("Generating proof...");
-            prover::prove_with_inputs(&elf_data, &private_inputs)
+    let opts = match GoldilocksCubicProofOptions::with_blowup(blowup) {
+        Ok(opts) => opts,
+        Err(e) => {
+            eprintln!("Invalid proof options: {e}");
+            return ExitCode::FAILURE;
         }
     };
+    eprintln!(
+        "Generating proof (blowup={blowup}, queries={})...",
+        opts.fri_number_of_queries
+    );
+    let proof = prover::prove_with_options_and_inputs(
+        &elf_data,
+        &private_inputs,
+        &opts,
+        &Default::default(),
+    );
     let prove_elapsed = start.elapsed();
     let proof = match proof {
         Ok(proof) => proof,
@@ -474,7 +507,7 @@ fn cmd_prove(
     ExitCode::SUCCESS
 }
 
-fn cmd_verify(proof_path: PathBuf, elf_path: PathBuf, blowup: Option<u8>, time: bool) -> ExitCode {
+fn cmd_verify(proof_path: PathBuf, elf_path: PathBuf, blowup: u8, time: bool) -> ExitCode {
     eprintln!("Reading ELF file...");
     let elf_data = match std::fs::read(&elf_path) {
         Ok(data) => data,
@@ -503,19 +536,14 @@ fn cmd_verify(proof_path: PathBuf, elf_path: PathBuf, blowup: Option<u8>, time:
 
     eprintln!("Verifying proof...");
     let start = Instant::now();
-    let result = match blowup {
-        Some(b) => {
-            let opts = match GoldilocksCubicProofOptions::with_blowup(b) {
-                Ok(opts) => opts,
-                Err(e) => {
-                    eprintln!("Invalid proof options: {e}");
-                    return ExitCode::FAILURE;
-                }
-            };
-            prover::verify_with_options(&proof, &elf_data, &opts, None, None)
+    let opts = match GoldilocksCubicProofOptions::with_blowup(blowup) {
+        Ok(opts) => opts,
+        Err(e) => {
+            eprintln!("Invalid proof options: {e}");
+            return ExitCode::FAILURE;
         }
-        None => prover::verify(&proof, &elf_data),
     };
+    let result = prover::verify_with_options(&proof, &elf_data, &opts, None, None);
     let verify_elapsed = start.elapsed();
     let result = match result {
         Ok(valid) => valid,
@@ -532,11 +560,181 @@ fn cmd_verify(proof_path: PathBuf, elf_path: PathBuf, blowup: Option<u8>, time:
         }
         ExitCode::SUCCESS
     } else {
-        eprintln!("Verification failed!");
+        eprintln!("Verification failed! Ensure --blowup matches the value used for proving.");
         ExitCode::FAILURE
     }
 }
 
+fn cmd_prove_continuation(
+    elf_path: PathBuf,
+    output_path: PathBuf,
+    private_input_path: Option<PathBuf>,
+    epoch_size_log2: Option<u32>,
+    blowup: u8,
+    time: bool,
+    cycles: bool,
+) -> ExitCode {
+    eprintln!("Reading ELF file...");
+    let elf_data = match std::fs::read(&elf_path) {
+        Ok(data) => data,
+        Err(e) => {
+            eprintln!("Failed to read ELF file: {}", e);
+            return ExitCode::FAILURE;
+        }
+    };
+
+    let private_inputs = match read_private_input(private_input_path.as_ref()) {
+        Ok(inputs) => inputs,
+        Err(e) => {
+            eprintln!("{e}");
+            return ExitCode::FAILURE;
+        }
+    };
+
+    let cycle_count = if cycles {
+        match count_cycles(&elf_data, &private_inputs) {
+            Ok(count) => Some(count),
+            Err(e) => {
+                eprintln!("{e}");
+                return ExitCode::FAILURE;
+            }
+        }
+    } else {
+        None
+    };
+
+    let epoch_size_log2 = epoch_size_log2.unwrap_or(DEFAULT_CONTINUATION_EPOCH_SIZE_LOG2);
+    let epoch_size = match continuation_epoch_size(epoch_size_log2) {
+        Ok(size) => size,
+        Err(e) => {
+            eprintln!("{e}");
+            return ExitCode::FAILURE;
+        }
+    };
+
+    let opts = match GoldilocksCubicProofOptions::with_blowup(blowup) {
+        Ok(opts) => opts,
+        Err(e) => {
+            eprintln!("Invalid proof options: {e}");
+            return ExitCode::FAILURE;
+        }
+    };
+
+    eprintln!(
+        "Generating continuation proof (blowup={blowup}, epoch_size_log2={epoch_size_log2}, epoch_size={epoch_size})...",
+    );
+    let start = Instant::now();
+    let bundle = match prover::continuation::prove_continuation(
+        &elf_data,
+        &private_inputs,
+        epoch_size_log2,
+        &opts,
+    ) {
+        Ok(b) => b,
+        Err(e) => {
+            eprintln!("Continuation proof generation failed: {}", e);
+            return ExitCode::FAILURE;
+        }
+    };
+    let prove_elapsed = start.elapsed();
+
+    eprintln!("Writing proof...");
+    let file = match File::create(&output_path) {
+        Ok(f) => f,
+        Err(e) => {
+            eprintln!("Failed to create output file: {}", e);
+            return ExitCode::FAILURE;
+        }
+    };
+    let mut writer = BufWriter::new(file);
+    let bytes = match bincode::serialize(&bundle) {
+        Ok(b) => b,
+        Err(e) => {
+            eprintln!("Failed to serialize proof: {}", e);
+            return ExitCode::FAILURE;
+        }
+    };
+    if let Err(e) = writer.write_all(&bytes) {
+        eprintln!("Failed to write proof: {}", e);
+        return ExitCode::FAILURE;
+    }
+
+    eprintln!("Proof written to {:?}", output_path);
+    if let Some(c) = cycle_count {
+        println!("Cycles: {}", c);
+    }
+    println!("Epochs: {}", bundle.num_epochs());
+    if time {
+        println!("Proving time: {:.3}s", prove_elapsed.as_secs_f64());
+    }
+    ExitCode::SUCCESS
+}
+
+fn cmd_verify_continuation(
+    proof_path: PathBuf,
+    elf_path: PathBuf,
+    blowup: u8,
+    time: bool,
+) -> ExitCode {
+    eprintln!("Reading ELF file...");
+    let elf_data = match std::fs::read(&elf_path) {
+        Ok(data) => data,
+        Err(e) => {
+            eprintln!("Failed to read ELF file: {}", e);
+            return ExitCode::FAILURE;
+        }
+    };
+
+    eprintln!("Reading proof...");
+    let proof_bytes = match std::fs::read(&proof_path) {
+        Ok(b) => b,
+        Err(e) => {
+            eprintln!("Failed to read proof file: {}", e);
+            return ExitCode::FAILURE;
+        }
+    };
+    let bundle: prover::continuation::ContinuationProof = match bincode::deserialize(&proof_bytes) {
+        Ok(p) => p,
+        Err(e) => {
+            eprintln!("Failed to deserialize proof: {}", e);
+            return ExitCode::FAILURE;
+        }
+    };
+
+    let opts = match GoldilocksCubicProofOptions::with_blowup(blowup) {
+        Ok(opts) => opts,
+        Err(e) => {
+            eprintln!("Invalid proof options: {e}");
+            return ExitCode::FAILURE;
+        }
+    };
+
+    eprintln!("Verifying continuation proof...");
+    let start = Instant::now();
+    let result = prover::continuation::verify_continuation(&elf_data, &bundle, &opts);
+    let verify_elapsed = start.elapsed();
+
+    match result {
+        Ok(Some(output)) => {
+            eprintln!("Verification succeeded!");
+            let hex: String = output.iter().map(|b| format!("{:02x}", b)).collect();
+            println!("Output: {}", hex);
+            if time {
+                println!("Verification time: {:.3}s", verify_elapsed.as_secs_f64());
+            }
+            ExitCode::SUCCESS
+        }
+        Ok(None) => {
+            eprintln!("Verification failed! Ensure --blowup matches the value used for proving.");
+            ExitCode::FAILURE
+        }
+        Err(e) => {
+            eprintln!("Verification error: {}", e);
+            ExitCode::FAILURE
+        }
+    }
+}
+
 fn cmd_count_elements(elf_path: PathBuf, private_input_path: Option<PathBuf>) -> ExitCode {
     let elf_data = match std::fs::read(&elf_path) {
         Ok(data) => data,
@@ -566,3 +764,160 @@ fn cmd_count_elements(elf_path: PathBuf, private_input_path: Option<PathBuf>) ->
         }
     }
 }
+
+fn continuation_epoch_size(epoch_size_log2: u32) -> Result<usize, String> {
+    if epoch_size_log2 < MIN_CONTINUATION_EPOCH_SIZE_LOG2 {
+        return Err(format!(
+            "--epoch-size-log2 must be at least {MIN_CONTINUATION_EPOCH_SIZE_LOG2} for CLI proving"
+        ));
+    }
+    1usize.checked_shl(epoch_size_log2).ok_or_else(|| {
+        format!("--epoch-size-log2 {epoch_size_log2} is too large for this platform")
+    })
+}
+
+fn parse_epoch_size_log2(value: &str) -> Result<u32, String> {
+    let epoch_size_log2 = value
+        .parse::<u32>()
+        .map_err(|_| format!("--epoch-size-log2 must be an integer, got `{value}`"))?;
+    continuation_epoch_size(epoch_size_log2)?;
+    Ok(epoch_size_log2)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use clap::CommandFactory;
+
+    // The arg graph is well-formed (e.g. `requires`/`conflicts_with` reference real args).
+    #[test]
+    fn cli_command_is_valid() {
+        Cli::command().debug_assert();
+    }
+
+    // The continuation epoch flag requires --continuations.
+    #[test]
+    fn epoch_size_log2_requires_continuations() {
+        let r = Cli::command().try_get_matches_from([
+            "cli",
+            "prove",
+            "prog.elf",
+            "-o",
+            "out",
+            "--epoch-size-log2",
+            "20",
+        ]);
+        assert!(r.is_err());
+    }
+
+    #[test]
+    fn epoch_size_log2_accepts_continuations() {
+        let r = Cli::command().try_get_matches_from([
+            "cli",
+            "prove",
+            "prog.elf",
+            "-o",
+            "out",
+            "--continuations",
+            "--epoch-size-log2",
+            "20",
+        ]);
+        assert!(r.is_ok());
+    }
+
+    #[test]
+    fn cycles_accepts_continuations() {
+        let r = Cli::command().try_get_matches_from([
+            "cli",
+            "prove",
+            "prog.elf",
+            "-o",
+            "out",
+            "--continuations",
+            "--cycles",
+        ]);
+        assert!(r.is_ok());
+    }
+
+    #[test]
+    fn elements_conflicts_with_continuations() {
+        let r = Cli::command().try_get_matches_from([
+            "cli",
+            "prove",
+            "prog.elf",
+            "-o",
+            "out",
+            "--continuations",
+            "--elements",
+        ]);
+        assert!(r.is_err());
+    }
+
+    #[test]
+    fn epoch_size_log2_rejects_tiny_cli_values() {
+        let r = Cli::command().try_get_matches_from([
+            "cli",
+            "prove",
+            "prog.elf",
+            "-o",
+            "out",
+            "--continuations",
+            "--epoch-size-log2",
+            "17",
+        ]);
+        assert!(r.is_err());
+    }
+
+    #[test]
+    fn old_epoch_size_flag_is_rejected() {
+        let r = Cli::command().try_get_matches_from([
+            "cli",
+            "prove",
+            "prog.elf",
+            "-o",
+            "out",
+            "--continuations",
+            "--epoch-size",
+            "1048576",
+        ]);
+        assert!(r.is_err());
+    }
+
+    #[test]
+    fn old_num_epochs_flag_is_rejected() {
+        let r = Cli::command().try_get_matches_from([
+            "cli",
+            "prove",
+            "prog.elf",
+            "-o",
+            "out",
+            "--continuations",
+            "--num-epochs",
+            "4",
+        ]);
+        assert!(r.is_err());
+    }
+
+    #[test]
+    fn prove_help_omits_removed_epoch_flags() {
+        let mut cmd = Cli::command();
+        let prove = cmd.find_subcommand_mut("prove").unwrap();
+        let mut help = Vec::new();
+        prove.write_long_help(&mut help).unwrap();
+        let help = String::from_utf8(help).unwrap();
+
+        assert!(help.contains("--epoch-size-log2 <N>"));
+        assert!(!help.contains("--num-epochs"));
+        assert!(!help.contains("--epoch-size <"));
+    }
+
+    #[test]
+    fn continuation_epoch_size_rejects_tiny_cli_values() {
+        assert!(continuation_epoch_size(17).is_err());
+    }
+
+    #[test]
+    fn continuation_epoch_size_uses_exact_power_of_two() {
+        assert_eq!(continuation_epoch_size(20).unwrap(), 1 << 20);
+    }
+}
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
index e8f27b631..8ba066462 100644
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -17,6 +17,7 @@
     - [Provable security and conjectured security](./cryptography/security.md)
     - [Lookup argument](./cryptography/lookup.md)
 - [Virtual machine](./virtual_machine/introduction.md)
+- [Continuations design](./continuations_design.md)
 
 ## Getting started
 
diff --git a/docs/continuations_design.md b/docs/continuations_design.md
new file mode 100644
index 000000000..9c3f54747
--- /dev/null
+++ b/docs/continuations_design.md
@@ -0,0 +1,564 @@
+# Continuations (Approach 2) — design
+
+This is the single design document for the "continuations" prover (Approach 2,
+"prove-epoch" from the streaming spec). It covers the things a continuation must
+carry across epoch boundaries — **memory** (the bulk of the doc: §1–§5, including
+the cross-epoch local-to-global table and the Design X vs Design Y decision),
+**registers** including the commit index x254 (§6), and the **Fiat-Shamir statement
+binding** (§7) that ties each epoch proof to its program and position — plus the
+soundness mechanisms that make each safe. §8 describes the **standalone (split)
+prover/verifier** that checks a proof bundle with only the ELF.
+
+It is written to be read by a human picking this up cold.
+
+---
+
+## 1. Why continuations
+
+A monolithic proof builds the trace for the **whole** execution in memory at
+once; for large programs that exhausts RAM. Continuations split the execution
+into fixed-size **epochs** and prove each independently, so peak memory stays
+flat as program size grows.
+
+Almost every constraint in a proof is local to its slice of cycles — *except
+memory*. A load in a late epoch may read what an early epoch wrote. So the only
+thing that must be stitched across epoch boundaries is **memory consistency**.
+
+```
+   one execution (e.g. 4,000,000 cycles)
+   ┌───────────────────────────────────────────────┐
+   │  split into epochs of N cycles                 │
+   └───────────────────────────────────────────────┘
+        │            │            │            │
+        ▼            ▼            ▼            ▼
+   ┌─────────┐  ┌─────────┐  ┌─────────┐  ┌─────────┐
+   │ Epoch 0 │  │ Epoch 1 │  │ Epoch 2 │  │ Epoch 3 │  each proven on its own
+   │ CPU MEMW│  │ CPU MEMW│  │ CPU MEMW│  │ CPU MEMW│  (tables dropped from RAM
+   │ ... L2G │  │ ... L2G │  │ ... L2G │  │ ... L2G │   after each epoch)
+   └────┬────┘  └────┬────┘  └────┬────┘  └────┬────┘
+        └────────────┴─────┬──────┴────────────┘
+                           ▼
+                ┌────────────────────────┐
+                │   ONE global proof     │  links the epochs together
+                │   (cross-epoch memory) │
+                └────────────────────────┘
+```
+
+---
+
+## 2. The pieces
+
+A **bus** is a LogUp channel: tables *send* and *receive* tokens, and the proof
+checks that everything sent is received (the bus "balances"). An unmatched token
+makes the proof fail.
+
+- **MEMW** — the actual loads/stores, driven by the CPU executing the program.
+- **L2G** (local-to-global) — one row per memory cell an epoch *touches*. Two roles:
+  - inside an epoch, on the **Memory bus**, it is the *bookend* — it supplies a
+    cell's starting value (seed at timestamp 0) and collects its ending value.
+    It **replaces the PAGE table**, which is switched off inside continuation
+    epochs.
+  - across epochs, on the **GlobalMemory bus**, it carries each cell's
+    "where did this value come from / where is it going" claims.
+- **global_memory** — the *anchors* on the GlobalMemory bus:
+  - **genesis**: a cell's starting value, read from the **ELF** (preprocessed,
+    so the verifier recomputes it — the prover cannot choose initial memory).
+  - **finalization**: a cell's final value after the last epoch that touched it.
+
+### A single L2G row
+
+```
+   ┌──────────┬───────────────────────────┬───────────────────────────┐
+   │ address  │ init: value, epoch        │ fini: value, time         │
+   └──────────┴───────────────────────────┴───────────────────────────┘
+     which       what it was when this        what it is at this
+     cell         epoch first saw it, and       epoch's end (its last
+                  which epoch wrote it          access timestamp)
+```
+
+Column layout (9 columns): `address_lo/hi` (32-bit), `init_value` (byte),
+`init_epoch` (two 16-bit halfwords), `fini_value` (byte),
+`fini_timestamp_lo/hi` (32-bit), `MU` (selector).
+
+Note: **`fini_epoch` is NOT a column** — it is supplied as a per-table constant
+(see §4.2).
+
+Note: there is **no `init_timestamp`**. Timestamps are epoch-local (each epoch's
+clock restarts; the Memory-bus seed is `ts = 0`) and order accesses only *within*
+an epoch. The cross-epoch chain is ordered by the **epoch number** (§3.3), so the
+GlobalMemory bus carries no timestamp at all (see §2 telescoping). `fini_timestamp`
+stays only because the epoch-local **Memory bus** needs it (matched against MEMW).
+
+### Cross-epoch telescoping
+
+For a cell touched in epochs 1, 2, 3, the GlobalMemory bus checks:
+
+```
+ global_memory      L2G(ep1)        L2G(ep2)        L2G(ep3)      global_memory
+   GENESIS  ───────►  init
+   (value v0,         fini  ───────►  init
+    from ELF)                         fini  ───────►  init
+                                                      fini ───────► FINAL
+                                                                    (last value)
+
+   each  "fini ───► init"  is one matched token:
+   epoch i's fini == epoch (i+1)'s init  (same address, value, epoch — no timestamp)
+```
+
+The bus balances **iff** every `fini` is consumed by the next-touching epoch's
+`init`, anchored by GENESIS (the one source) and FINAL (the one sink). That
+chain *is* "memory stayed consistent across epochs." Inside each epoch, ordinary
+memory checking (MEMW + timestamp ordering) handles consistency; L2G only
+provides the seam at the edges.
+
+---
+
+## 3. Soundness, by component
+
+The skeleton above is correct but not *sound* on its own — a cheating prover
+could make the buses balance while lying. Four mechanisms close the gaps.
+
+### 3.1 Range checks on the L2G columns
+
+Raw field columns must be forced into their intended ranges, or a prover can
+stuff out-of-range junk into them.
+
+Principle: **only check what nothing else already checks.**
+
+- `address`, `fini_timestamp`, the value bytes — these travel on the Memory bus
+  and are matched against **MEMW**, which already range-checks them (exactly how
+  PAGE relied on MEMW). No extra check.
+- The **cross-epoch-only** field `init_epoch` has no MEMW partner, so L2G checks it
+  itself: store as 16-bit halfwords, check each with the `IsHalfword` lookup, and
+  rebuild the value as `lo + 2^16·hi`. Because only the range-checked halfwords feed
+  the reconstruction, no extra AIR constraint is needed. (There is no
+  `init_timestamp` to check — the GlobalMemory bus carries no timestamp; see §2.)
+
+The value bytes get PAGE's batched `AreBytes` check (the `init` value is a
+trusted source and must be checked).
+
+### 3.2 `fini_epoch` as a per-table constant
+
+Inside epoch *i*'s table, **every** row's `fini_epoch` is just *i*. So it does
+not need to be a per-row committed column — it is supplied to the AIR as a
+constant `epoch_label`, computed by the verifier from the epoch's position.
+
+This is *strictly more sound* than a column: the prover cannot choose it. The
+genesis sentinel is `0` and real epochs are labelled `1, 2, 3, …`
+(`epoch_label(i) = i + 1`), so genesis is below every real epoch.
+
+### 3.3 Cross-epoch ordering (the subtle one)
+
+The GlobalMemory bus only proves the tokens **match as a set** — not that they
+are chained in increasing-epoch order. Without that, a cheater can make a row's
+`init` and `fini` cancel each other (point `init` at its own epoch), so the row
+**vanishes** from the chain — letting an epoch read a *forged* value for a cell
+while a later epoch absorbs that cell's real genesis. The bus balances; the
+program ran on a lie.
+
+Fix: force every row to reference a strictly earlier source —
+`init_epoch < fini_epoch`. With genesis `= 0` and 1-based epochs, genesis (`0`)
+satisfies it with no special case.
+
+How `a < b` is checked without a dedicated comparison table (the same trick
+MEMW uses for timestamps): in the field, `a < b` ⟺ `b − 1 − a` is a small,
+in-range number. If `a ≥ b`, that subtraction wraps to a huge field element that
+fails the range check. So we range-check `fini_epoch − 1 − init_epoch` with the
+`IsB20` (20-bit) lookup — reusing the bit-table already present, near-zero cost.
+
+```
+   honest:  init=2, fini=5  →  5-1-2 = 2     small  ✓ passes
+   cheat:   init=5, fini=5  →  5-1-5 = -1    wraps  ✗ fails  (self-reference)
+   cheat:   init=9, fini=5  →  5-1-9 = -5    wraps  ✗ fails  (future reference)
+```
+
+Strict `<` (not `≤`) is required: `≤` would permit `init_epoch == fini_epoch`,
+which is exactly the self-cancel that enables the forgery. Strict `<` guarantees
+a real row's `init` and `fini` epochs always differ, so a real row can never
+self-cancel.
+
+Cost: this bounds the **number** of epochs to `< 2^20` (~1M) — *not* their size.
+Unreachable in practice (optimal epochs are millions of cycles → thousands of
+epochs even for a billion-cycle run) and fails closed. If ever needed, widen the
+gap check to 32-bit or switch to the LT table.
+
+### 3.4 The `MU` selector
+
+Traces are padded with blank rows to a power of two (an FFT requirement). Those
+padding rows must not disturb any bus.
+
+Originally padding was harmless because a blank row's `init` and `fini` tokens
+were identical and self-cancelled. **Two** of the changes above broke that, each
+on its own:
+
+- §3.2 (constant `fini_epoch`): a padding row's `fini` now carries
+  `epoch = the constant` while its `init` carries `epoch = 0`, so the tokens
+  differ and no longer cancel.
+- §3.3 (the ordering check): a padding row has `init_epoch == fini_epoch` (both
+  `0`), which fails the strict `<` check.
+
+So `MU` is required by *either* change.
+
+Fix: a selector column `MU` (1 on real rows, 0 on padding). Interactions gated by
+`Multiplicity::Column(MU)` contribute nothing on padding rows.
+
+`MU` is itself constrained boolean (`MU·(1−MU)=0`), and pinned to the right
+rows by bus balance (a real row with `MU=0` drops its telescoping link →
+imbalance).
+
+### 3.5 CPU padding and the power-of-two epoch size
+
+The CPU table is padded to a power of two (the same FFT requirement). After the
+inline-PC rework, padding rows are **not** inert: each carries `pc = 1` and
+reads/writes it on the inline-PC `memory` chain, and that chain is anchored only by
+the HALT chip's `consume_pc`/`emit_pc` — which converts the last real `next_pc`
+into the `pc = 1` sentinel the padding rows expect.
+
+An **intermediate** continuation epoch excludes HALT (only the *final* epoch
+halts). So if an intermediate epoch had padding rows, their `pc = 1` tokens would
+dangle — no HALT to anchor them, and the REGISTER FINI carries the real next PC,
+not `1` — and the Memory bus would not balance. The honest prover could not produce
+a verifying proof.
+
+Fix: **epoch size is expressed as `epoch_size_log2`**, so the driver slices at
+exactly `2^epoch_size_log2` cycles. An intermediate epoch runs that exact
+power-of-two number of cycles, so its CPU table already has a power-of-two row
+count and therefore **zero padding rows** — nothing to dangle. The final epoch
+keeps its remainder *and* its HALT, so its padding chain is anchored as usual. A
+program shorter than one epoch runs as a single final (monolithic-style) epoch.
+
+This is a **completeness** fix: it changes no constraint and nothing the verifier
+accepts — only how the driver slices cycles. A debug-assert enforces the
+"intermediate epoch ⟹ power-of-two cycle count" invariant.
+
+---
+
+## 4. Design X vs Design Y — *where* `MU` is applied
+
+`MU` is needed to neutralize padding, but **which** interactions should it gate?
+
+```
+                       GlobalMemory   Memory     range +
+                       (telescoping)  (bookend)  ordering
+   Design X (SOUND):       MU            MU          MU      ← MU gates everything
+   Design Y (UNSOUND):     MU           One         One      ← MU only on GlobalMemory
+```
+
+**Conclusion up front: Design X is sound; Design Y is *not*.** We initially
+believed Y was a cleaner equivalent (and two adversarial reviews agreed). They
+were wrong — Y opens a chain-truncation attack. Below is X, then Y, then the
+attack and why X blocks it.
+
+### Design X
+
+`MU` gates **every** L2G interaction (matches the standard table pattern —
+LT/MUL/MEMW each gate all their interactions with one multiplicity column).
+
+The crucial consequence — which we first mistook for redundancy — is that gating
+the **Memory bus bookend** with `MU` forces `MU = 1` on every *touched* cell:
+a touched cell's MEMW accesses need the L2G seed/fini on the Memory bus (PAGE is
+off), so `MU = 0` would dangle them → the epoch proof fails. This is **Statement
+S** below. Forcing `MU = 1` on every touched cell forces every touching epoch
+**into the global chain** — so the chain is **complete**, and cannot be truncated.
+
+### Design Y (rejected — unsound)
+
+`MU` gates **only the GlobalMemory bus**; the Memory bus and range/ordering checks
+use `Multiplicity::One`. The intended win was that the ordering check then fires
+unconditionally so `MU` can't skip it. But decoupling the Memory bookend from `MU`
+**broke Statement S**: a touched cell's bookend now fires regardless of `MU`
+(`Multiplicity::One`), so the epoch proof passes even with `MU = 0`. Nothing then
+forces `MU = 1` on a *non-first-touch* row — and that is exploitable.
+
+### The attack Design Y allows: orphan a touched epoch
+
+Cell A, touched by epochs e1 then e2. Honest: genesis `v0` → e1 writes `f1` →
+e2 writes `f2` → final `f2`. A cheating prover sets **`MU = 0` on e2's L2G row**
+and sets `global_memory`'s finalization for A to `f1`:
+
+```
+   genesis(v0) ──► e1.init        ✓ (genesis must be consumed — forces e1 only)
+   e1.fini(f1) ──► FINAL(f1)      ✓ (prover-chosen finalization absorbs it)
+   e2.init / e2.fini              ✗ MU=0 — orphaned, don't fire
+```
+
+- The GlobalMemory bus **balances** (every fired token matched).
+- e2's **epoch proof still passes** — in Design Y its Memory bookend is
+  `Multiplicity::One`, so it fires regardless of `MU`; e2 ran internally-consistently.
+- **Nothing forces `MU_e2 = 1`:** e2 isn't first-touch (genesis went to e1), and
+  the finalization is a *prover column*, so it just absorbs whatever the last fired
+  fini was.
+
+Result: e2's write to A is silently dropped — A's final value is claimed `f1`
+when it's really `f2`. A false statement, proven. (For a *middle* epoch, reroute
+the later init to consume the earlier fini, skipping the middle one.)
+
+The root cause is the **input/output asymmetry** of the anchors: genesis is the
+*input* and is ELF-bound (fixed), but the finalization is the *output* — a prover
+column. The finalization is only trustworthy if the chain is **complete** so that
+the last fini is *forced* to be consumed by it. A complete chain pins the
+finalization; a truncatable chain leaves it free. Design X forces completeness
+(via `MU=1` on every touched cell); Design Y does not.
+
+### Statement S (why Design X is sound, and what Y broke)
+
+> In a continuation epoch, the only table that provides a RAM cell's seed (its
+> value at timestamp 0) on the Memory bus is L2G (PAGE is off). If a cell is
+> accessed by MEMW during the epoch, the memory argument requires that seed; with
+> `MU = 0` the seed is absent and the Memory bus cannot balance. Therefore any
+> accessed cell is forced to `MU = 1`.
+
+S rests on three checkable facts: (1) PAGE is off in continuation epochs;
+(2) MEMW enforces timestamp ordering, so a cell's access chain must bottom out at
+the seed; (3) no other table provides a RAM seed (REGISTER is registers only, a
+disjoint token subspace).
+
+**S requires the Memory bookend to be `MU`-gated** — that is exactly what Design X
+has and Design Y removed. So the "redundant" `MU` on the Memory bus in Design X is
+in fact load-bearing: it's what forces every touched epoch into the chain, making
+the chain complete and the finalization trustworthy.
+
+### The anchoring chain (why a real access cannot be dropped at all)
+
+`MU = 1` being forced bottoms out at the program itself:
+
+```
+   ELF ─DECODE(preprocessed)─► each row's instruction (LOAD/STORE flags) is fixed
+   PC-continuity ───────────► every executed instruction is present, in order
+        │
+   ▼ a real load/store row has its flag = 1 (DECODE match + IsBit) ⟹ CPU sends Memw req
+   ▼ MEMW must receive it (MU_READ/MU_WRITE) — dropping it ⟹ Memw-bus imbalance
+   ▼ MEMW's bookend pairing needs the L2G seed/fini — in Design X (MU-gated) ⟹ MU=1
+   ▼ MU=1 ⟹ the cell is in the global chain ⟹ chain complete ⟹ finalization pinned
+```
+
+This is the VM's core execution soundness (DECODE + PC-continuity + IsBit flags,
+verified in `cpu.rs` / `constraints/cpu.rs`), extended one link at a time up to
+cross-epoch memory. Design X keeps every link; Design Y cut the MEMW→L2G link.
+
+### How `global_memory`'s finalization is constrained — and the parallel with `main`
+
+The finalization is **not** checked against an external value (it's the computed
+output, not a known input). It is pinned **internally** by the bus: it must consume
+the last fini of each cell's chain, which (with a complete chain) is the cell's
+real last-written value. This is exactly how **PAGE** works in the monolithic
+prover — PAGE's `fini` is pinned by the (single, complete) Memory bus to the last
+MEMW write. Design X is the faithful cross-epoch extension; Design Y silently
+dropped the "chain is complete" property both rely on.
+
+---
+
+## 5. Adversarial review summary
+
+1. **`MU` safety (Design X).** Could `MU=0` on a real row, or a non-boolean `MU`,
+   skip the ordering or forge a balance? No — caught by the Memory bus (Statement
+   S) and the boolean constraint. **Holds.**
+2. **Design Y.** Two adversarial reviews concluded Y was sound (padding harmless,
+   ordering unconditional, "ghost row" attack defeated). **They were wrong.** Both
+   only tested *first-touch* `MU=0` (genesis dangles → caught) and added/forged
+   rows; neither tested **truncating the chain at a non-first-touch row** while
+   pointing the prover-controlled finalization at the truncation. That attack (§4)
+   makes Y unsound. Lesson: a review that misses an attack class proves nothing
+   about it — the truncation/orphan class was the gap.
+3. **`fini_epoch` as a constant.** Sound — strictly more so than a column. Labels
+   are verifier-computed from epoch position (unforgeable); prove/verify use
+   identical labels (no off-by-one); the free `init_epoch` column and
+   `global_memory`'s `FINI_EPOCH` column are pinned by bus balance **when the chain
+   is complete** (Design X). Independent of the X/Y choice.
+
+---
+
+## 6. Registers (cross-epoch)
+
+Registers must also carry across epochs: epoch *i+1* must start from epoch *i*'s
+final register file. Unlike memory, the register file is **small and fixed** (34
+registers / 67 word-addresses, all present every epoch), so it needs no L2G /
+global telescoping — we bind the whole snapshot directly.
+
+**Mechanism (no new bus).** The REGISTER table is the register analog of PAGE — it
+already puts each register's init/fini tokens on the epoch-local Memory bus
+(REG-C1 init, REG-C2 fini, matched against MEMW). For continuation epochs we
+**also preprocess the FINI column** = the epoch's final register file `R_{i+1}`
+(on top of the already-preprocessed INIT = `R_i`). "Preprocessed" means
+*verifier-known*: the verifier recomputes the column's commitment, so the prover
+cannot choose it. The verifier reuses the **same** `R_{i+1}` as epoch *i*'s FINI
+and epoch *i+1*'s INIT, so `init(i+1) == fini(i)` **by construction** — no equality
+check and no bus. Genesis is epoch 0's INIT = the ELF entry-point registers
+(verifier-derived).
+
+```
+   epoch i REGISTER              epoch i+1 REGISTER
+     INIT = R_i      (pre)         INIT = R_{i+1}   (pre)   ← same R_{i+1}
+     FINI = R_{i+1}  (pre) ────────┘                          reused both sides
+```
+
+### Register soundness (two locks)
+
+For `R_{i+1}` to be the *real* final registers (not a free prover claim), two
+locks compose:
+
+1. **Preprocessing** pins the trace's FINI column = the public `R_{i+1}` (the
+   verifier recomputes the commitment; the proof's FINI openings must authenticate
+   against it, so the prover can't deviate).
+2. **REG-C2 on the Memory bus** pins that FINI column = MEMW's true last write to
+   each register (or the Memory bus doesn't balance).
+
+Compose them: public `R_{i+1}` = trace FINI = real last write. So the value handed
+to the next epoch is pinned to real execution.
+
+The **monolithic prover is unchanged**: it keeps FINI as a main-trace column (it
+has no verifier-known final state) and preprocesses 2 columns, not 3.
+
+### Commit index (x254)
+
+The COMMIT chip's running output index lives in a synthetic single-word register
+**x254** (word-address 508), so it rides the **same** register binding above —
+epoch *i*'s `FINI[x254]` becomes epoch *i+1*'s `INIT[x254]`, pinned by the two
+locks like any register. Each epoch therefore indexes its committed bytes from the
+*carried* value, not from `0`:
+
+- the COMMIT trace seeds `current_commit_index` from x254
+  (`register_state.read_index()` in `trace_builder.rs`), with a debug-assert
+  pinning the two in sync every step;
+- the verifier's commit-bus offset (`compute_commit_bus_offset`'s `start_index`)
+  starts at the same carried x254.
+
+The driver concatenates each epoch's committed slice into the run-wide output.
+Because every slice is commit-bus-bound *and* the x254 indices are forced
+contiguous (`init(i+1) == fini(i)`), the concatenation equals the true output
+stream — no separate global "commit output" bus is needed.
+
+---
+
+## 7. Fiat-Shamir statement binding
+
+Each epoch proof and the global proof seed their Fiat-Shamir transcript with a
+**statement** before the challenges are drawn (they previously started empty). The
+seeding only *adds* input to the transcript, so it can strengthen binding but never
+weaken soundness — and it pins every proof to its program and position, so a proof
+can't be replayed elsewhere:
+
+- Each **epoch** absorbs: a domain tag, the ELF digest, the public output, the
+  table layout, and the **epoch label** (its position).
+- The **global** proof absorbs: a (distinct) domain tag, the ELF digest, and the
+  **epoch count**.
+
+The monolithic encoding is unchanged (same function, monolithic tag, no label).
+The genesis / register / memory anchor values are *additionally* bound via the
+preprocessed commitments absorbed during proving.
+
+The standalone *split* verifier (§8) carries these statement fields in the proof
+bundle and takes the epoch label / count from its own trusted enumeration, so the
+binding holds there too — not just on the integrated path.
+
+---
+
+## 8. Standalone (split) prover/verifier
+
+The continuation can be proved and verified by separate parties. `prove_continuation`
+emits a self-contained `ContinuationProof` bundle; `verify_continuation(elf, &bundle)`
+checks it using **only the bundle and the ELF** — nothing from the prover's memory.
+The integrated `prove_and_verify_continuation` is now a thin wrapper
+(`prove_continuation` then `verify_continuation`), and `prove_verify_epoch` is
+likewise split into `prove_epoch` + `verify_epoch`.
+
+The bundle is prover-supplied and therefore **untrusted**. Per epoch it carries the
+`MultiProof`, the `public_output` slice, `table_counts`,
+`num_private_input_pages`, `runtime_page_ranges`, the bound `reg_fini` (`R_{i+1}`),
+the epoch `l2g_root`, and the touched-cell `boundary`; plus the global `MultiProof`
+and the `private_inputs`. Everything the integrated path reused from prover memory
+becomes an **explicit verifier action**:
+
+- **Enumerate, don't trust.** The verifier assigns each epoch's `label` and the
+  `is_final` flag **by position** (`0..N-1`; the last is final), so the prover can't
+  relabel, reorder, truncate, or append epochs — a wrong label diverges that epoch's
+  Fiat-Shamir challenges, and a wrong `is_final` builds the HALT table in/out and
+  mismatches the committed proof.
+- **Derive the register / x254 chain.** Epoch 0's register INIT is derived from the
+  ELF entry point; epoch *i+1*'s INIT is derived from epoch *i*'s bundle `reg_fini`
+  (incl. x254 @ 508). So `init(i+1) == fini(i)` is now *enforced by the verifier
+  rebuilding the AIR from the previous FINI* (via the shared `build_epoch_airs`),
+  not merely true-by-construction. The commit-bus `start_index` is taken from the
+  carried `register_init[508]`, not a free scalar.
+- **Genesis from the ELF.** `verify_global` rebuilds the memory genesis from the ELF
+  (+ bundle private inputs) and closes the GlobalMemory bus;
+  `verify_l2g_commitment_binding` ties each epoch's `l2g_root` to the corresponding
+  global-proof sub-table root — which is what makes the prover-supplied `boundary`
+  trustworthy.
+- **Reconstruct the output** by concatenating the per-epoch commit slices (each
+  commit-bus-bound, contiguous via the x254 chain).
+- The verifier also `validate()`s `table_counts` and never trusts a prover-supplied
+  page config (continuation epochs have none — PAGE is skipped under the L2G
+  bookend, so `page_configs` is always empty).
+
+A single `build_epoch_airs` helper builds the AIR set identically on both sides, so
+prove and verify cannot diverge.
+
+**Reviewed.** An adversarial "construct-a-break" audit (Phase-3 dismissal audit with
+fresh agents) of the register/x254 chain, the L2G root binding, and
+completeness-by-enumeration found no false-accept: each forgery is caught by a
+Merkle/hash collision, a bus imbalance, or a Fiat-Shamir divergence.
+
+The bundle derives serde and round-trips through `bincode` (exactly like a
+monolithic `VmProof`); the CLI drives it via `prove --continuations` (writes the
+bundle) and `verify --continuations` (checks bundle + ELF only). `prove` picks the
+epoch size from `--epoch-size-log2 N` (`N=20` means 1,048,576 cycles), defaulting
+to `20`. A local ethrex 10-transfer distinct-account
+sweep measured peak heap at roughly 6.9 GB (`19`), 9.5 GB (`20`), 15.8 GB (`21`),
+and 26.8 GB (`22`); pick the highest value the workload and machine can run
+without swapping.
+
+**Limitation — not succinct.** The bundle carries, and the verifier checks, all *N*
+epoch proofs plus the global proof. Continuations keep peak *prover* memory flat;
+they do **not** shrink proof size or verify time. A single succinct proof needs a
+recursion/aggregation layer (deferred).
+
+---
+
+## 9. Status and open items
+
+- Implemented and tested: range checks (§3.1), `fini_epoch` constant (§3.2),
+  ordering check (§3.3), the `MU` selector (§3.4), the **power-of-two epoch size**
+  (§3.5), **cross-epoch registers** (§6), the **commit index x254** across epochs
+  (§6), the **Fiat-Shamir statement binding** (§7), and the **standalone split
+  prover/verifier** (§8) — bundle serialized with `bincode` and driven from the CLI
+  (`prove`/`verify --continuations`).
+- **The committed code implements Design X** (`MU` gates every L2G interaction),
+  which is the sound design. Design Y was implemented briefly, then found unsound
+  (§4, the chain-truncation attack) and **reverted**. Do not re-introduce the
+  Design Y wiring: gating only the GlobalMemory bus reopens the orphan attack.
+- Deferred:
+  - **Succinctness.** The split verifier is non-succinct (N+1 proofs, §8). A single
+    small proof needs a recursion/aggregation layer — a separate, larger effort.
+  - **Private-input binding.** The genesis image depends on `private_inputs`, which
+    the bundle carries in the clear; binding them into the statement (so "which input
+    produced this output" is pinned) is a follow-up that also touches the monolithic
+    proof.
+
+---
+
+## 10. Where the code lives
+
+- `prover/src/tables/local_to_global.rs` — L2G columns, trace generation, the
+  Memory/GlobalMemory bus interactions, range checks, the ordering lookup, and
+  the per-row selector.
+- `prover/src/tables/global_memory.rs` — the genesis (ELF-bound) and
+  finalization anchors.
+- `prover/src/tables/register.rs` — the REGISTER table: REG-C1/REG-C2 Memory-bus
+  tokens, the preprocessed FINI commitment (`compute_precomputed_commitment_with_fini`,
+  `NUM_PREPROCESSED_COLS_WITH_FINI`), and `fini_from_trace`.
+- `prover/src/statement.rs` — the Fiat-Shamir statement absorbers
+  (`absorb_statement` with `StatementKind`, `absorb_continuation_global_statement`).
+- `prover/src/continuation.rs` — the split prover/verifier: `prove_continuation` /
+  `verify_continuation` and the `ContinuationProof` bundle; the per-epoch
+  `prove_epoch` / `verify_epoch` with the shared `build_epoch_airs` helper; the
+  global proof (`prove_global` / `verify_global`); the per-epoch AIRs
+  (`l2g_memory_air` / `l2g_global_air`); the power-of-two epoch sizing from
+  `epoch_size_log2`; the register-FINI preprocessing; the transcript seeding; and
+  `prove_and_verify_continuation` (the thin integrated wrapper).
+- `prover/src/lib.rs` — `verify_l2g_commitment_binding` (epoch L2G root ↔ global
+  sub-table root) and the commit-bus offset/balance helpers
+  (`compute_commit_bus_offset`, `compute_expected_commit_bus_balance`) that take the
+  carried x254 as `start_index`.
+- `prover/src/tables/trace_builder.rs` — seeds `current_commit_index` from x254
+  (`read_index`) so committed-byte indexing carries across epochs.
diff --git a/executor/programs/asm/array_multipass_20M.s b/executor/programs/asm/array_multipass_20M.s
new file mode 100644
index 000000000..9d5fab40a
--- /dev/null
+++ b/executor/programs/asm/array_multipass_20M.s
@@ -0,0 +1,36 @@
+	.attribute	5, "rv64i2p1"
+	.globl	main
+main:
+	# Multi-pass array: P passes over an N-word array, each element
+	# load+add+store. Touches a LARGE distinct RAM footprint (N words)
+	# and REUSES it every pass (so each cell is touched in multiple
+	# epochs) -> worst-case stress for the local-to-global table.
+	#
+	# Footprint = N words = 4*N bytes (here 262144 words = 1 MiB).
+	# Steps ~= P * N * 6  (here 13 * 262144 * 6 ~= 20.4M).
+	#
+	# Tuning knobs:
+	#   t5 init (N)  -> distinct footprint (bytes = 4*N)
+	#   t6 init (P)  -> number of passes (cross-epoch reuse)
+	#   keep P*N*6 ~= target step count.
+
+	li	t3, 1			# increment k
+	li	t6, 13			# P = passes
+	li	t0, 0x40000000		# BASE = array address (free RAM)
+
+.outer:
+	mv	t1, t0			# ptr = BASE
+	li	t5, 262144		# N = words per pass
+.inner:
+	lw	t4, 0(t1)		# t4 = a[i]
+	add	t4, t4, t3		# a[i] += k
+	sw	t4, 0(t1)		# a[i] = t4
+	addi	t1, t1, 4		# ptr += 4
+	addi	t5, t5, -1		# i--
+	bnez	t5, .inner
+	addi	t6, t6, -1		# pass--
+	bnez	t6, .outer
+
+	li	a0, 0
+	li	a7, 93
+	ecall
diff --git a/executor/programs/asm/test_commit_split.s b/executor/programs/asm/test_commit_split.s
new file mode 100644
index 000000000..1b8dab7f0
--- /dev/null
+++ b/executor/programs/asm/test_commit_split.s
@@ -0,0 +1,47 @@
+	.attribute	5, "rv64i2p1"
+	.globl	main
+main:
+	# Commit [0xAA,0xBB] early, do filler work, then commit [0xCC,0xDD] later —
+	# so with a small epoch size the two commits fall in DIFFERENT epochs and the
+	# second commit's epoch starts with x254 (commit index) already = 2.
+	addi	sp, sp, -16		# allocate stack
+
+	# --- first commit: bytes [0xAA, 0xBB] ---
+	addi	t0, zero, 0xAA
+	sb	t0, 0(sp)
+	addi	t0, zero, 0xBB
+	sb	t0, 1(sp)
+	li	a0, 1			# fd = 1
+	mv	a1, sp			# buf = sp
+	li	a2, 2			# count = 2
+	li	a7, 64			# syscall = Commit
+	ecall
+
+	# --- filler work (room for an epoch boundary between the two commits) ---
+	addi	t1, zero, 0
+	addi	t1, t1, 1
+	addi	t1, t1, 1
+	addi	t1, t1, 1
+	addi	t1, t1, 1
+	addi	t1, t1, 1
+	addi	t1, t1, 1
+	addi	t1, t1, 1
+	addi	t1, t1, 1
+	addi	t1, t1, 1
+
+	# --- second commit: bytes [0xCC, 0xDD] ---
+	addi	t0, zero, 0xCC
+	sb	t0, 2(sp)
+	addi	t0, zero, 0xDD
+	sb	t0, 3(sp)
+	li	a0, 1			# fd = 1
+	addi	a1, sp, 2		# buf = sp+2
+	li	a2, 2			# count = 2
+	li	a7, 64			# syscall = Commit
+	ecall
+
+	# --- halt ---
+	addi	sp, sp, 16
+	li	a0, 0
+	li	a7, 93			# syscall = Halt
+	ecall
diff --git a/executor/programs/asm/test_ecsm_split.s b/executor/programs/asm/test_ecsm_split.s
new file mode 100644
index 000000000..e0e1666ae
--- /dev/null
+++ b/executor/programs/asm/test_ecsm_split.s
@@ -0,0 +1,49 @@
+	.attribute	5, "rv64i2p1_m2p0_zmmul1p0"
+	.globl	main
+main:
+	# Like test_ecsm.s, but the ECSM pointer registers (a0=&xR, a1=&xG, a2=&k)
+	# are set at the very START and never rewritten before the ecall. With a small
+	# continuation epoch size the ecall lands in a LATER epoch than the one that set
+	# the pointers, so the per-epoch touched-cell pass must carry registers across
+	# the boundary to compute the right addresses.
+	addi	sp, sp, -96
+	addi	a0, sp, 64
+	addi	a1, sp, 0
+	addi	a2, sp, 32
+	li	a7, -11
+
+	# xG = secp256k1 Gx, little-endian (4 doublewords). The heavy 64-bit immediates
+	# act as natural filler between the pointer setup and the ecall.
+	li	t0, 0x59F2815B16F81798
+	sd	t0, 0(sp)
+	li	t0, 0x029BFCDB2DCE28D9
+	sd	t0, 8(sp)
+	li	t0, 0x55A06295CE870B07
+	sd	t0, 16(sp)
+	li	t0, 0x79BE667EF9DCBBAC
+	sd	t0, 24(sp)
+
+	# k = 5 (little-endian); exercises double, double, add.
+	li	t0, 5
+	sd	t0, 32(sp)
+	sd	zero, 40(sp)
+	sd	zero, 48(sp)
+	sd	zero, 56(sp)
+
+	# ECSM ecall: a0/a1/a2 were set far above (possibly in an earlier epoch).
+	ecall
+
+	# Commit the 32-byte result xR so the test can check it equals x(5G).
+	li	a0, 1
+	addi	a1, sp, 64
+	li	a2, 32
+	li	a7, 64
+	ecall
+
+	# Restore stack and halt.
+	addi	sp, sp, 96
+	li	a0, 0
+	li	a7, 93
+	ecall
+.Lfunc_end1:
+	.size	main, .Lfunc_end1-main
diff --git a/executor/src/vm/execution.rs b/executor/src/vm/execution.rs
index 614aad649..99eb0a00f 100644
--- a/executor/src/vm/execution.rs
+++ b/executor/src/vm/execution.rs
@@ -30,6 +30,17 @@ pub struct ExecutionResult {
 /// Size of each log chunk - balances memory usage vs callback overhead
 const CHUNK_SIZE: usize = 100_000;
 
+/// Result of executing one continuation epoch: the logs produced during the
+/// epoch and the VM state at the epoch boundary. The boundary state is the
+/// starting state of the next epoch.
+#[derive(Debug)]
+pub struct EpochExecution {
+    pub logs: Vec<Log>,
+    pub end_pc: u64,
+    pub end_registers: Registers,
+    pub end_memory: Memory,
+}
+
 /// Executor state for chunked execution
 pub struct Executor {
     memory: Memory,
@@ -57,13 +68,34 @@ impl Executor {
 
     /// Resume execution and return next logs. Returns None when program is finished.
     pub fn resume(&mut self) -> Result<Option<&[Log]>, ExecutorError> {
+        self.resume_with_limit(CHUNK_SIZE)
+    }
+
+    /// Current program counter (0 once the program has halted).
+    pub fn pc(&self) -> u64 {
+        self.pc
+    }
+
+    /// Current register state.
+    pub fn registers(&self) -> &Registers {
+        &self.registers
+    }
+
+    /// Current memory state.
+    pub fn memory(&self) -> &Memory {
+        &self.memory
+    }
+
+    /// Resume execution, running at most `limit` cycles, and return the logs
+    /// produced. Returns None when the program is finished.
+    pub fn resume_with_limit(&mut self, limit: usize) -> Result<Option<&[Log]>, ExecutorError> {
         if self.pc == 0 {
             return Ok(None);
         }
 
         self.logs.clear();
 
-        while self.pc != 0 && self.logs.len() < CHUNK_SIZE {
+        while self.pc != 0 && self.logs.len() < limit {
             if !self.pc.is_multiple_of(4) {
                 return Err(ExecutorError::InstructionAddressMisaligned(self.pc));
             }
@@ -117,6 +149,29 @@ impl Executor {
             instructions: self.instructions.into_instruction_map(),
         })
     }
+
+    /// Run to completion, splitting execution into epochs of at most `epoch_size`
+    /// cycles. Each epoch captures its logs and the VM state at the epoch
+    /// boundary, which is the starting state of the next epoch. Consumes the
+    /// executor.
+    ///
+    /// Test/bench helper — the production continuation prover streams epochs via
+    /// `resume_with_limit` directly.
+    pub fn run_epochs(mut self, epoch_size: usize) -> Result<Vec<EpochExecution>, ExecutorError> {
+        assert!(epoch_size > 0, "epoch_size must be greater than zero");
+
+        let mut epochs = Vec::new();
+        while let Some(logs) = self.resume_with_limit(epoch_size)? {
+            let logs = logs.to_vec();
+            epochs.push(EpochExecution {
+                logs,
+                end_pc: self.pc,
+                end_registers: self.registers.clone(),
+                end_memory: self.memory.clone(),
+            });
+        }
+        Ok(epochs)
+    }
 }
 
 fn load_program(segments: &[crate::elf::Segment], memory: &mut Memory) -> Result<(), MemoryError> {
diff --git a/executor/src/vm/memory.rs b/executor/src/vm/memory.rs
index ea84e2620..1bc4549fd 100644
--- a/executor/src/vm/memory.rs
+++ b/executor/src/vm/memory.rs
@@ -50,7 +50,7 @@ pub const MAX_PRIVATE_INPUT_SIZE: u64 = 6700000;
 /// Must match `PRIVATE_INPUT_START` in `syscalls/src/syscalls.rs`.
 pub const PRIVATE_INPUT_START_INDEX: u64 = 0xFF000000;
 
-#[derive(Default, Debug)]
+#[derive(Default, Debug, Clone)]
 pub struct Memory {
     cells: U64HashMap<[u8; 4]>,
     /// Bytes committed to public output via `commit_public_output`. The
@@ -80,6 +80,18 @@ impl Memory {
         entry[(address % 4) as usize] = value;
     }
 
+    /// Iterate over all stored bytes as `(address, value)` pairs. Cells are
+    /// stored as 4-byte words; each word expands into its four byte addresses.
+    /// Used to snapshot memory at an epoch boundary.
+    pub fn iter_bytes(&self) -> impl Iterator<Item = (u64, u8)> + '_ {
+        self.cells.iter().flat_map(|(&addr, bytes)| {
+            bytes
+                .iter()
+                .enumerate()
+                .map(move |(i, &b)| (addr + i as u64, b))
+        })
+    }
+
     pub fn load_word(&self, address: u64) -> Result<u32, MemoryError> {
         if address.is_multiple_of(4) {
             let bytes = self.cells.get(&address).cloned().unwrap_or_default();
diff --git a/executor/src/vm/registers.rs b/executor/src/vm/registers.rs
index 61945b732..a82ef44f1 100644
--- a/executor/src/vm/registers.rs
+++ b/executor/src/vm/registers.rs
@@ -2,7 +2,7 @@ use std::fmt::Display;
 
 pub const STACK_TOP: u64 = 0xFFFFFFFFFFFFFFF0; // 64-bit max (Multiple of 16 for RV64 ABI)
 
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 /// Holds the current value of all 32 registers
 /// Register zero is implicit as it cannot hold any value other than zero
 pub struct Registers([u64; 31]);
diff --git a/executor/tests/asm.rs b/executor/tests/asm.rs
index e9c9c08dd..a1c9baf2b 100644
--- a/executor/tests/asm.rs
+++ b/executor/tests/asm.rs
@@ -923,3 +923,44 @@ fn test_keccak() {
     assert_eq!(result.return_values.memory_values, expected_bytes);
     assert_eq!(result.return_values.register_values.0, 0);
 }
+
+#[test]
+fn test_run_epochs_splits_execution_into_n_cycle_epochs() {
+    let elf_data = std::fs::read("./program_artifacts/asm/basic_program.elf").unwrap();
+    let program = Elf::load(&elf_data).unwrap();
+
+    // Reference: full single-pass run.
+    let full = Executor::new(&program, vec![]).unwrap().run().unwrap();
+
+    // Pick an epoch size that splits this program into a few epochs, whatever
+    // its exact length.
+    let total_cycles = full.logs.len();
+    assert!(total_cycles >= 2);
+    let epoch_size = (total_cycles / 3).max(1);
+
+    let epochs = Executor::new(&program, vec![])
+        .unwrap()
+        .run_epochs(epoch_size)
+        .unwrap();
+
+    // The program is long enough to span several epochs.
+    assert!(epochs.len() >= 2);
+
+    // Concatenated epoch logs reproduce the full run's instruction stream.
+    let concat: Vec<u64> = epochs
+        .iter()
+        .flat_map(|e| e.logs.iter().map(|l| l.current_pc))
+        .collect();
+    let expected: Vec<u64> = full.logs.iter().map(|l| l.current_pc).collect();
+    assert_eq!(concat, expected);
+
+    // Every epoch except the last runs exactly `epoch_size` cycles.
+    for epoch in &epochs[..epochs.len() - 1] {
+        assert_eq!(epoch.logs.len(), epoch_size);
+    }
+    let last = epochs.last().unwrap();
+    assert!(!last.logs.is_empty() && last.logs.len() <= epoch_size);
+
+    // The program finished, so the final epoch's boundary pc is 0.
+    assert_eq!(last.end_pc, 0);
+}
diff --git a/prover/Cargo.toml b/prover/Cargo.toml
index da9ceb9af..61d2aa61a 100644
--- a/prover/Cargo.toml
+++ b/prover/Cargo.toml
@@ -43,3 +43,7 @@ harness = false
 [[bench]]
 name = "profile_vm_prover"
 harness = false
+
+[[bench]]
+name = "bench_continuation"
+harness = false
diff --git a/prover/benches/bench_continuation.rs b/prover/benches/bench_continuation.rs
new file mode 100644
index 000000000..c8638346f
--- /dev/null
+++ b/prover/benches/bench_continuation.rs
@@ -0,0 +1,141 @@
+//! Peak-memory benchmark: monolithic proving vs continuation (streaming-epoch)
+//! proving, for large programs.
+//!
+//! This is a plain one-shot binary (`harness = false`), not a Criterion bench:
+//! Criterion measures time over many iterations, whereas the point here is the
+//! peak resident set of a SINGLE prove. Wrap it in the OS timer to capture RSS,
+//! on Linux:
+//!     /usr/bin/time -v <binary> main <elf_path>
+//!     /usr/bin/time -v <binary> cont <elf_path> 65536
+//!
+//! Build + locate the binary:
+//!     cargo build --release --bench bench_continuation
+//!     ls target/release/deps/bench_continuation-*   # the executable (no .d)
+//!
+//! Args:
+//!     <mode>        "count", "main" (monolithic prove) or "cont" (continuation)
+//!     <elf_path>    path to a compiled ELF artifact
+//!     [epoch_size]  epoch length in cycles for "cont" (default 65536)
+//!
+//! Env:
+//!     BENCH_PRIVATE_INPUT  optional path to a private-input file (e.g. an
+//!                          ethrex ProgramInput .bin). Empty if unset.
+
+use std::time::Instant;
+
+fn main() {
+    let args: Vec<String> = std::env::args().collect();
+    if args.len() < 3 {
+        eprintln!("usage: bench_continuation <count|main|cont> <elf_path> [epoch_size]");
+        std::process::exit(2);
+    }
+    let mode = args[1].as_str();
+    let elf_path = &args[2];
+    let elf = std::fs::read(elf_path).expect("failed to read ELF");
+    let private_inputs: Vec<u8> = match std::env::var("BENCH_PRIVATE_INPUT") {
+        Ok(path) if !path.is_empty() => {
+            std::fs::read(&path).expect("failed to read BENCH_PRIVATE_INPUT file")
+        }
+        _ => Vec::new(),
+    };
+
+    let start = Instant::now();
+    match mode {
+        "count" => {
+            // Count cycles by running the executor to completion (no proving).
+            // Cycle count is a linear proxy for monolithic proving memory.
+            use executor::elf::Elf;
+            use executor::vm::execution::Executor;
+            let program = Elf::load(&elf).expect("bad ELF");
+            let result = Executor::new(&program, private_inputs)
+                .expect("executor")
+                .run()
+                .expect("execution failed");
+            println!("cycles = {}", result.logs.len());
+        }
+        "footprint" => {
+            // Run to completion, then classify the touched memory by region so we
+            // can see how much of the footprint is stack (contiguous, near
+            // STACK_TOP) vs the rest (ELF data / heap / private input, low
+            // addresses). Tells us whether a stack-specific Vec store would help.
+            use executor::elf::Elf;
+            use executor::vm::execution::Executor;
+            use executor::vm::registers::STACK_TOP;
+            let program = Elf::load(&elf).expect("bad ELF");
+            let mut ex = Executor::new(&program, private_inputs).expect("executor");
+            while ex.pc() != 0 {
+                match ex.resume_with_limit(usize::MAX).expect("execution failed") {
+                    Some(_) => {}
+                    None => break,
+                }
+            }
+            // Stack lives in the top half of the address space (grows down from
+            // STACK_TOP); ELF data / heap / input are in the low addresses.
+            const STACK_THRESHOLD: u64 = 1 << 63;
+            let (mut stack, mut other) = (0u64, 0u64);
+            let (mut min_stack, mut min_other, mut max_other) = (u64::MAX, u64::MAX, 0u64);
+            for (addr, _) in ex.memory().iter_bytes() {
+                if addr >= STACK_THRESHOLD {
+                    stack += 1;
+                    min_stack = min_stack.min(addr);
+                } else {
+                    other += 1;
+                    min_other = min_other.min(addr);
+                    max_other = max_other.max(addr);
+                }
+            }
+            let total = stack + other;
+            let pct = |n: u64| 100.0 * n as f64 / total.max(1) as f64;
+            println!("footprint: {total} touched bytes");
+            if stack > 0 {
+                let span = STACK_TOP - min_stack + 1;
+                println!(
+                    "  stack: {stack} bytes ({:.1}%), range [{:#x}..={:#x}], span {span} bytes, density {:.1}%",
+                    pct(stack),
+                    min_stack,
+                    STACK_TOP,
+                    100.0 * stack as f64 / span as f64,
+                );
+            }
+            if other > 0 {
+                println!(
+                    "  other (data/heap/input): {other} bytes ({:.1}%), range [{:#x}..={:#x}]",
+                    pct(other),
+                    min_other,
+                    max_other,
+                );
+            }
+        }
+        "main" => {
+            lambda_vm_prover::prove_with_inputs(&elf, &private_inputs)
+                .expect("monolithic prove failed");
+            println!("main prove ok ({} bytes ELF)", elf.len());
+        }
+        "cont" => {
+            let epoch_size_log2: u32 = args
+                .get(3)
+                .map(|s| s.parse().expect("bad epoch_size_log2"))
+                .unwrap_or(16);
+            // Match the monolithic `main` mode's options (blowup 2) for a fair comparison.
+            let opts = stark::proof::options::GoldilocksCubicProofOptions::with_blowup(2)
+                .expect("blowup=2 is always valid");
+            let output = lambda_vm_prover::continuation::prove_and_verify_continuation(
+                &elf,
+                &private_inputs,
+                epoch_size_log2,
+                &opts,
+            )
+            .expect("continuation failed");
+            assert!(output.is_some(), "continuation did not verify");
+            println!(
+                "cont prove+verify ok (epoch_size_log2={epoch_size_log2}, epoch_size={})",
+                1usize << epoch_size_log2
+            );
+        }
+        other => {
+            eprintln!("unknown mode {other:?}; use count|footprint|main|cont");
+            std::process::exit(2);
+        }
+    }
+    println!("elapsed {:.2}s", start.elapsed().as_secs_f64());
+}
diff --git a/prover/src/continuation.rs b/prover/src/continuation.rs
new file mode 100644
index 000000000..15b56a1e0
--- /dev/null
+++ b/prover/src/continuation.rs
@@ -0,0 +1,1210 @@
+//! First production implementation of continuations (Approach 2).
+//!
+//! Splits an execution into fixed-size epochs, proves each epoch independently
+//! (its memory is initialized/finalized by the per-epoch local-to-global table),
+//! and proves one cross-epoch "global memory" LogUp that links every epoch's
+//! `fini` to the next epoch's `init` (so `fini(epoch i) == init(epoch i+1)`).
+//!
+//! The global proof's genesis anchor is bound to the ELF: the verifier
+//! recomputes the per-page preprocessed init commitment from the ELF in
+//! `verify_global`, so the starting memory cannot be prover-supplied.
+//!
+//! The local-to-global columns are range-checked in the epoch proof (which
+//! carries the BITWISE provider): values are bytes, and the cross-epoch-only
+//! `init_epoch` is built from `IsHalfword`-checked halfwords. Address and
+//! fini-timestamp need no extra check — they are matched against MEMW on the
+//! epoch-local Memory bus, exactly as PAGE relies on MEMW. The global proof
+//! commits the identical trace, so it inherits the guarantee via the commitment
+//! binding. There is no cross-epoch timestamp; the chain is ordered by epoch.
+//!
+//! Cross-epoch registers are bound the same way: each continuation epoch
+//! preprocesses its REGISTER `FINI` column to the epoch's final register file
+//! `R_{i+1}` (alongside `INIT = R_i`), and the driver reuses the same `R_{i+1}`
+//! as the next epoch's preprocessed `INIT` — so `init(epoch i+1) == fini(epoch i)`
+//! by construction, with the REG-C2 Memory bus binding `FINI` to the true final
+//! registers. No extra bus.
+//!
+//! The x254 commit index is carried across epochs by that same register binding,
+//! so a continuation epoch indexes its commits from the carried value: both the
+//! COMMIT trace (`current_commit_index` seeded from x254) and the verifier's
+//! `compute_commit_bus_offset` (a `start_index` parameter) count from it, and the
+//! driver concatenates each epoch's committed bytes into the run-wide output.
+//!
+//! The prover and verifier are split: `prove_continuation` emits a self-contained
+//! `ContinuationProof` bundle and `verify_continuation` checks it from the bundle
+//! and ELF alone (`prove_and_verify_continuation` is a thin wrapper over both).
+
+use std::collections::HashMap;
+
+use crypto::fiat_shamir::default_transcript::DefaultTranscript;
+use executor::elf::Elf;
+use executor::vm::execution::Executor;
+use executor::vm::memory::MAX_PRIVATE_INPUT_SIZE;
+use math::field::element::FieldElement;
+use stark::config::Commitment;
+use stark::lookup::{AirWithBuses, AuxiliaryTraceBuildData, NullBoundaryConstraintBuilder};
+use stark::proof::options::ProofOptions;
+use stark::proof::stark::MultiProof;
+use stark::prover::{IsStarkProver, Prover};
+use stark::trace::TraceTable;
+use stark::traits::AIR;
+use stark::verifier::{IsStarkVerifier, Verifier};
+
+use crate::statement::{StatementKind, absorb_continuation_global_statement, absorb_statement};
+use crate::tables::local_to_global::{self, CellBoundary};
+use crate::tables::page::{self, PageConfig};
+use crate::tables::register;
+use crate::tables::trace_builder::{Traces, build_init_page_data, build_initial_image_paged};
+use crate::tables::types::{GoldilocksExtension, GoldilocksField};
+use crate::tables::{MaxRowsConfig, global_memory};
+use crate::{
+    Error, FIXED_TABLE_COUNT, RuntimePageRange, TableCounts, VmAirs,
+    compute_expected_commit_bus_balance, verify_l2g_commitment_binding,
+};
+
+type F = GoldilocksField;
+type E = GoldilocksExtension;
+type AirRef<'a> = &'a dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>;
+
+fn empty_constraints()
+-> Vec<Box<dyn stark::constraints::transition::TransitionConstraintEvaluator<F, E>>> {
+    vec![]
+}
+
+/// Fresh transcript seeded with the epoch's statement (ELF, public output, table
+/// layout) and `epoch_label` (its position). The epoch's prove, verify, and
+/// bus-balance replay all seed via this so their challenges match; the seeding
+/// pins each epoch proof to its program and position (replay protection).
+fn epoch_transcript(
+    elf_bytes: &[u8],
+    public_output: &[u8],
+    table_counts: &TableCounts,
+    num_private_input_pages: usize,
+    runtime_page_ranges: &[RuntimePageRange],
+    epoch_label: u64,
+) -> DefaultTranscript<E> {
+    let mut transcript = DefaultTranscript::<E>::new(&[]);
+    absorb_statement(
+        &mut transcript,
+        StatementKind::ContinuationEpoch { epoch_label },
+        elf_bytes,
+        public_output,
+        table_counts,
+        num_private_input_pages,
+        runtime_page_ranges,
+    );
+    transcript
+}
+
+/// Fresh transcript seeded with the global proof's statement (ELF + epoch count).
+/// `prove_global` and `verify_global` both seed via this so their challenges match.
+fn global_transcript(elf_bytes: &[u8], num_epochs: usize) -> DefaultTranscript<E> {
+    let mut transcript = DefaultTranscript::<E>::new(&[]);
+    absorb_continuation_global_statement(&mut transcript, elf_bytes, num_epochs);
+    transcript
+}
+
+/// The L2G table's AIR constraint: the `MU` selector column is boolean.
+///
+/// The Memory bus already pins `MU = 1` on real rows and `MU = 0` on padding —
+/// it's anchored to MEMW's own bit-constrained multiplicity, since a non-1 `MU`
+/// leaves the cell's seed/fini tokens unmatched. This constraint makes
+/// "`MU ∈ {0,1}`" explicit on the table itself rather than relying on that
+/// cross-bus argument. Lives on the epoch-local air; the global proof commits the
+/// identical trace (root-bound), so it inherits it.
+fn l2g_constraints()
+-> Vec<Box<dyn stark::constraints::transition::TransitionConstraintEvaluator<F, E>>> {
+    use crate::constraints::templates::IsBitConstraint;
+    use stark::constraints::transition::TransitionConstraint;
+    vec![IsBitConstraint::unconditional(local_to_global::cols::MU, 0).boxed()]
+}
+
+/// Local-to-global AIR on the cross-epoch GlobalMemory bus (used in the global proof).
+///
+/// `epoch_label` is this epoch's 1-based label; it is the `fini_epoch` constant
+/// the fini token carries (not a trace column, since it's the same for every row).
+fn l2g_global_air(
+    opts: &ProofOptions,
+    epoch_label: u64,
+) -> AirWithBuses<F, E, NullBoundaryConstraintBuilder, ()> {
+    AirWithBuses::new(
+        local_to_global::cols::NUM_COLUMNS,
+        AuxiliaryTraceBuildData {
+            interactions: local_to_global::bus_interactions(epoch_label),
+        },
+        opts,
+        1,
+        empty_constraints(),
+    )
+}
+
+/// Local-to-global AIR on the epoch-local Memory bus (used inside an epoch proof).
+///
+/// Carries the column range checks and the `init_epoch < fini_epoch` ordering
+/// check too: this proof has the BITWISE provider, and the global proof commits
+/// the identical trace (the commitment binding compares roots), so checking here
+/// covers both. `epoch_label` is the `fini_epoch` constant used by both.
+fn l2g_memory_air(
+    opts: &ProofOptions,
+    epoch_label: u64,
+) -> AirWithBuses<F, E, NullBoundaryConstraintBuilder, ()> {
+    let interactions = [
+        local_to_global::memory_bus_interactions(),
+        local_to_global::range_check_interactions(epoch_label),
+    ]
+    .concat();
+    AirWithBuses::new(
+        local_to_global::cols::NUM_COLUMNS,
+        AuxiliaryTraceBuildData { interactions },
+        opts,
+        1,
+        l2g_constraints(),
+    )
+}
+
+/// GLOBAL_MEMORY AIR for one touched page (the cross-epoch analog of PAGE).
+///
+/// It sends each cell's genesis init and receives its finalization on the
+/// GlobalMemory bus. The genesis `init` column is preprocessed, so the verifier
+/// recomputes its commitment from the ELF — exactly PAGE's binding mechanism:
+/// ELF-data pages via `page::compute_precomputed_commitment`, zero-init pages
+/// (stack/heap) via the static zero-page commitment. The prover cannot choose
+/// the genesis values.
+fn global_memory_air(
+    opts: &ProofOptions,
+    config: &PageConfig,
+) -> AirWithBuses<F, E, NullBoundaryConstraintBuilder, ()> {
+    let air = AirWithBuses::new(
+        global_memory::cols::NUM_COLUMNS,
+        AuxiliaryTraceBuildData {
+            interactions: global_memory::bus_interactions(config.page_base),
+        },
+        opts,
+        1,
+        empty_constraints(),
+    );
+    let commitment = if config.init_values.is_some() {
+        page::compute_precomputed_commitment(config, opts)
+    } else {
+        page::zero_init_preprocessed_commitment(opts)
+    };
+    air.with_preprocessed(commitment, global_memory::NUM_PREPROCESSED_COLS)
+}
+
+/// The touched pages (sorted) and their ELF-derived genesis configs, rebuilt
+/// identically by prover and verifier from the ELF + private input. Each cell
+/// the program touched lives on one of these pages; a page in the ELF/input
+/// image carries its bytes as `init`, every other (stack/heap) page is zero-init.
+fn global_memory_configs(
+    boundaries: &[Vec<CellBoundary>],
+    elf: &Elf,
+    private_inputs: &[u8],
+) -> Vec<PageConfig> {
+    let image = build_initial_image_paged(elf, private_inputs);
+    let init_page_data = build_init_page_data(&image);
+    global_memory_configs_from_init_page_data(boundaries, &init_page_data)
+}
+
+fn global_memory_configs_from_init_page_data(
+    boundaries: &[Vec<CellBoundary>],
+    init_page_data: &HashMap<u64, Vec<u8>>,
+) -> Vec<PageConfig> {
+    let touched_pages: std::collections::BTreeSet<u64> = boundaries
+        .iter()
+        .flatten()
+        .map(|b| page::page_base_for_address(b.address))
+        .collect();
+    touched_pages
+        .into_iter()
+        .map(|page_base| match init_page_data.get(&page_base) {
+            Some(data) => PageConfig::with_data(page_base, data.clone()),
+            None => PageConfig::zero_init(page_base),
+        })
+        .collect()
+}
+
+/// Per-epoch register state and label.
+struct EpochStart<'a> {
+    register_init: &'a [u32],
+    /// This epoch's 1-based table label (the `fini_epoch` constant).
+    label: u64,
+}
+
+/// One epoch's proof plus everything a standalone verifier needs to re-check it
+/// using ONLY the bundle (never the prover's in-memory traces). Each field is a
+/// public value the verifier re-binds: a wrong value either makes the proof's
+/// transcript challenges diverge or the AIRs not match the committed trace, so the
+/// proof fails to verify.
+///
+/// Note: continuation epochs use the L2G memory bookend, so PAGE is skipped and the
+/// per-epoch page config set is empty — the verifier builds the AIRs with no PAGE
+/// tables rather than trusting any prover-supplied page config.
+#[derive(serde::Serialize, serde::Deserialize)]
+struct EpochProof {
+    /// The epoch's STARK proof (its tables + the epoch-local L2G sub-table last).
+    proof: MultiProof<F, E, ()>,
+    /// Bytes this epoch committed — the COMMIT-bus receiver reference.
+    public_output: Vec<u8>,
+    /// Statement values the epoch transcript is seeded with (re-derived on verify).
+    table_counts: TableCounts,
+    /// Always zero for continuation epochs: PAGE is replaced by L2G, and private
+    /// input genesis is carried by the continuation bundle for global verification.
+    num_private_input_pages: usize,
+    /// Always empty for continuation epochs: PAGE tables are skipped, so runtime
+    /// pages are not part of the epoch AIR statement.
+    runtime_page_ranges: Vec<RuntimePageRange>,
+    /// The epoch's final register file `R_{i+1}` (its preprocessed FINI), which the
+    /// driver/verifier reuses as the next epoch's derived INIT — the cross-epoch
+    /// register binding. x254 (commit index) rides along at address 508.
+    reg_fini: Vec<u32>,
+    /// The committed L2G table root, tied to the global proof by
+    /// [`verify_l2g_commitment_binding`].
+    l2g_root: Commitment,
+    /// Touched-cell boundaries; the verifier rebuilds the global AIRs (touched-page
+    /// set) from these. Values are redundant with the committed L2G trace.
+    boundary: Vec<CellBoundary>,
+}
+
+/// A self-contained continuation proof: the per-epoch proofs in execution order,
+/// the one cross-epoch global-memory proof, and the private inputs (needed to
+/// rebuild the genesis image — bound by the global proof's genesis-from-ELF check).
+///
+/// `verify_continuation` checks this using only the bundle and the ELF. It derives
+/// serde, so it round-trips through `bincode` exactly like a monolithic `VmProof`.
+#[derive(serde::Serialize, serde::Deserialize)]
+pub struct ContinuationProof {
+    epochs: Vec<EpochProof>,
+    global: MultiProof<F, E, ()>,
+    private_inputs: Vec<u8>,
+}
+
+impl ContinuationProof {
+    /// Number of epochs the execution was split into.
+    pub fn num_epochs(&self) -> usize {
+        self.epochs.len()
+    }
+}
+
+/// Build an epoch's AIRs identically on the prove and verify sides — the single
+/// source of truth for the AIR set, so the two halves can never diverge. Mirrors
+/// the old integrated path: `VmAirs` (HALT included iff `is_final`), with REGISTER
+/// preprocessed to INIT = `register_init` and FINI = `reg_fini`. Continuation epochs
+/// use the L2G bookend, so PAGE is skipped and `page_configs` is empty. The
+/// epoch-local L2G air is built separately by the caller (it needs the `label`).
+fn build_epoch_airs(
+    elf: &Elf,
+    opts: &ProofOptions,
+    page_configs: &[PageConfig],
+    table_counts: &TableCounts,
+    register_init: &[u32],
+    reg_fini: &[u32],
+    is_final: bool,
+) -> VmAirs {
+    // Continuation epochs preprocess FINI = R_{i+1} too (not just INIT = R_i), so the
+    // final register file is a verifier-known public value bound by the REG-C2
+    // Memory-bus token; reusing the same R_{i+1} as the next epoch's INIT binds
+    // init(epoch i+1) == fini(epoch i).
+    let register_preprocessed = Some((
+        register::compute_precomputed_commitment_with_fini(opts, register_init, reg_fini),
+        register::NUM_PREPROCESSED_COLS_WITH_FINI,
+    ));
+    VmAirs::new(
+        elf,
+        opts,
+        false,
+        page_configs,
+        table_counts,
+        None,
+        is_final,
+        None,
+        None,
+        register_preprocessed,
+    )
+}
+
+/// Prove one epoch (prove half only). Commits its local-to-global table (built from
+/// `boundary`) on the epoch-local Memory bus and its REGISTER table with FINI
+/// preprocessed to the epoch's final register file. Returns the [`EpochProof`] the
+/// standalone verifier later re-checks; does NOT verify here.
+#[allow(clippy::too_many_arguments)]
+fn prove_epoch(
+    elf: &Elf,
+    elf_bytes: &[u8],
+    start: &EpochStart,
+    mut traces: Traces,
+    is_final: bool,
+    boundary: &[CellBoundary],
+    opts: &ProofOptions,
+) -> Result<EpochProof, Error> {
+    // Count this L2G table's range-check lookups into the BITWISE table so its
+    // AreBytes/IsHalfword multiplicities balance the range-check senders.
+    crate::tables::bitwise::update_multiplicities(
+        &mut traces.bitwise,
+        &local_to_global::collect_bitwise_from_l2g(boundary),
+    );
+
+    // Continuation epochs use the L2G bookend, so PAGE is skipped: page_configs is
+    // empty. The verifier hard-codes this (passes `&[]`); check the prover agrees so
+    // the two sides build identical AIRs.
+    if !traces.page_configs.is_empty() {
+        return Err(Error::ContinuationInvariant(
+            "continuation epoch must have no PAGE configs (L2G bookend replaces PAGE)".to_string(),
+        ));
+    }
+
+    // R_{i+1}, read from the committed REGISTER trace (FINI, bound to the last write).
+    let reg_fini = register::fini_from_trace(&traces.register);
+
+    let table_counts = traces.table_counts();
+    let public_output = traces.public_output_bytes.clone();
+    let runtime_page_ranges = traces.runtime_page_ranges();
+    let num_private_input_pages = traces
+        .page_configs
+        .iter()
+        .filter(|c| c.is_private_input)
+        .count();
+
+    let airs = build_epoch_airs(
+        elf,
+        opts,
+        &[],
+        &table_counts,
+        start.register_init,
+        &reg_fini,
+        is_final,
+    );
+
+    let label = start.label;
+    let seed = || {
+        epoch_transcript(
+            elf_bytes,
+            &public_output,
+            &table_counts,
+            num_private_input_pages,
+            &runtime_page_ranges,
+            label,
+        )
+    };
+
+    let l2g_air = l2g_memory_air(opts, label);
+    // Build this epoch's L2G table from the cross-epoch boundary so it is identical
+    // to the one the global proof commits (the commitment binding compares their
+    // roots). It is appended to the proof below, not through `air_trace_pairs`.
+    let mut l2g_trace = local_to_global::generate_local_to_global_trace(boundary);
+
+    let mut pairs = airs.air_trace_pairs(&mut traces);
+    pairs.push((&l2g_air, &mut l2g_trace, &()));
+    let proof = Prover::multi_prove(
+        pairs,
+        &mut seed(),
+        #[cfg(feature = "disk-spill")]
+        stark::storage_mode::StorageMode::Ram,
+    )
+    .map_err(|e| Error::Prover(format!("{e:?}")))?;
+
+    let l2g_root = proof
+        .proofs
+        .last()
+        .ok_or_else(|| {
+            Error::ContinuationInvariant("epoch proof is missing the L2G sub-table".to_string())
+        })?
+        .lde_trace_main_merkle_root;
+
+    Ok(EpochProof {
+        proof,
+        public_output,
+        table_counts,
+        num_private_input_pages,
+        runtime_page_ranges,
+        reg_fini,
+        l2g_root,
+        boundary: boundary.to_vec(),
+    })
+}
+
+/// Verify one epoch using ONLY the [`EpochProof`] bundle plus the verifier-derived
+/// `register_init` (epoch 0: from the ELF; epoch i>0: from the previous epoch's
+/// `reg_fini`), `is_final`, and `label`. Rebuilds the AIRs and transcript
+/// from the bundle's statement values and indexes commits from the carried x254
+/// (`register_init[X254_INDEX]`), never from the prover's memory. PAGE is skipped for
+/// continuation epochs, so the AIRs are built with no page configs (the bundle does
+/// not get to supply any). Returns `true` iff the proof verifies and its committed
+/// L2G root matches the claimed one.
+fn verify_epoch(
+    elf: &Elf,
+    elf_bytes: &[u8],
+    epoch: &EpochProof,
+    register_init: &[u32],
+    is_final: bool,
+    label: u64,
+    opts: &ProofOptions,
+) -> bool {
+    // Reject degenerate table counts (mirrors the monolithic verifier).
+    if epoch.table_counts.validate().is_err() {
+        return false;
+    }
+
+    // Cross-check table_counts before building AIRs from bundle data. Continuation
+    // epochs have no PAGE proofs, and append one epoch-local L2G proof after the VM
+    // tables. HALT is present only on the final epoch.
+    let fixed_tables = if is_final {
+        FIXED_TABLE_COUNT
+    } else {
+        FIXED_TABLE_COUNT - 1
+    };
+    let expected_proof_count = epoch.table_counts.total() + fixed_tables + 1;
+    if expected_proof_count != epoch.proof.proofs.len() {
+        return false;
+    }
+
+    let airs = build_epoch_airs(
+        elf,
+        opts,
+        &[],
+        &epoch.table_counts,
+        register_init,
+        &epoch.reg_fini,
+        is_final,
+    );
+    let l2g_air = l2g_memory_air(opts, label);
+    let mut refs = airs.air_refs();
+    refs.push(&l2g_air);
+
+    let seed = || {
+        epoch_transcript(
+            elf_bytes,
+            &epoch.public_output,
+            &epoch.table_counts,
+            epoch.num_private_input_pages,
+            &epoch.runtime_page_ranges,
+            label,
+        )
+    };
+
+    // Start the commit index from the carried x254 (the derived INIT), not a free
+    // input — this is what binds the per-epoch commit slice to its global position.
+    let commit_start_index = register_init
+        .get(register::X254_INDEX)
+        .copied()
+        .unwrap_or(0) as u64;
+
+    let expected = match compute_expected_commit_bus_balance(
+        &refs,
+        &epoch.proof,
+        &epoch.public_output,
+        commit_start_index,
+        &mut seed(),
+    ) {
+        Some(expected) => expected,
+        None => return false,
+    };
+
+    if !Verifier::multi_verify(&refs, &epoch.proof, &mut seed(), &expected) {
+        return false;
+    }
+
+    // The claimed L2G root must be the one this proof actually committed (it is what
+    // verify_l2g_commitment_binding later ties to the global proof).
+    epoch
+        .proof
+        .proofs
+        .last()
+        .map(|p| p.lde_trace_main_merkle_root)
+        == Some(epoch.l2g_root)
+}
+
+/// Build the cross-epoch global memory proof: every epoch's L2G sub-table on the
+/// GlobalMemory bus, plus one GLOBAL_MEMORY table per touched page that sends each
+/// cell's genesis init (preprocessed from the ELF, so the verifier recomputes it)
+/// and receives its final value. The bus balances iff every `fini` matches the next
+/// epoch's `init` and every genesis value matches the ELF.
+fn prove_global(
+    boundaries: &[Vec<CellBoundary>],
+    elf_bytes: &[u8],
+    init_page_data: &HashMap<u64, Vec<u8>>,
+    opts: &ProofOptions,
+) -> Result<MultiProof<F, E, ()>, Error> {
+    // Each cell's final state (boundaries are in epoch order, so the last fini wins).
+    let mut final_state: global_memory::FiniStateMap = HashMap::new();
+    for epoch in boundaries {
+        for b in epoch {
+            final_state.insert(
+                b.address,
+                global_memory::FiniState {
+                    value: (b.fini.value & 0xFF) as u8,
+                    epoch: b.fini.epoch,
+                },
+            );
+        }
+    }
+
+    let gm_configs = global_memory_configs_from_init_page_data(boundaries, init_page_data);
+
+    let mut l2g_traces: Vec<TraceTable<F, E>> = boundaries
+        .iter()
+        .map(|epoch| local_to_global::generate_local_to_global_trace(epoch))
+        .collect();
+    let mut gm_traces: Vec<TraceTable<F, E>> = gm_configs
+        .iter()
+        .map(|config| global_memory::generate_global_trace(config, &final_state))
+        .collect();
+
+    // One L2G air per epoch, each carrying its own 1-based `fini_epoch` constant.
+    let l2g_airs: Vec<_> = (0..boundaries.len())
+        .map(|i| l2g_global_air(opts, local_to_global::epoch_label(i as u64)))
+        .collect();
+    let gm_airs: Vec<_> = gm_configs
+        .iter()
+        .map(|config| global_memory_air(opts, config))
+        .collect();
+
+    let mut pairs: Vec<(AirRef, &mut TraceTable<F, E>, &())> = l2g_airs
+        .iter()
+        .zip(l2g_traces.iter_mut())
+        .map(|(air, t)| (air as AirRef, t, &()))
+        .collect();
+    for (air, trace) in gm_airs.iter().zip(gm_traces.iter_mut()) {
+        pairs.push((air as AirRef, trace, &()));
+    }
+
+    Prover::multi_prove(
+        pairs,
+        &mut global_transcript(elf_bytes, boundaries.len()),
+        #[cfg(feature = "disk-spill")]
+        stark::storage_mode::StorageMode::Ram,
+    )
+    .map_err(|e| Error::Prover(format!("{e:?}")))
+}
+
+fn verify_global(
+    boundaries: &[Vec<CellBoundary>],
+    proof: &MultiProof<F, E, ()>,
+    elf: &Elf,
+    elf_bytes: &[u8],
+    private_inputs: &[u8],
+    opts: &ProofOptions,
+) -> bool {
+    // One L2G air per epoch, each with its own 1-based `fini_epoch` constant —
+    // must match the order/labels the global proof committed in `prove_global`.
+    let l2g_airs: Vec<_> = (0..boundaries.len())
+        .map(|i| l2g_global_air(opts, local_to_global::epoch_label(i as u64)))
+        .collect();
+    // Rebuild the genesis configs FROM THE ELF and recompute their commitments:
+    // this is the binding — a prover that claimed different genesis values would
+    // commit a different root and fail to verify.
+    let gm_configs = global_memory_configs(boundaries, elf, private_inputs);
+    let gm_airs: Vec<_> = gm_configs
+        .iter()
+        .map(|config| global_memory_air(opts, config))
+        .collect();
+
+    let mut refs: Vec<AirRef> = l2g_airs.iter().map(|a| a as AirRef).collect();
+    for air in &gm_airs {
+        refs.push(air as AirRef);
+    }
+
+    Verifier::multi_verify(
+        &refs,
+        proof,
+        &mut global_transcript(elf_bytes, boundaries.len()),
+        &FieldElement::zero(),
+    )
+}
+
+/// Prove a full continuation and return a self-contained [`ContinuationProof`]
+/// (prove half only — no verification). Splits the execution into `2^epoch_size_log2`
+/// cycle epochs, proves each, and proves the one cross-epoch global-memory linkage.
+///
+/// Intermediate epochs run exactly `2^epoch_size_log2` cycles, so their CPU tables
+/// have power-of-two row counts and therefore zero padding rows — important because
+/// CPU padding rows participate in the inline-PC `memory` chain (carrying pc=1)
+/// which is only anchored by the HALT chip's emit_pc/consume_pc, and intermediate
+/// epochs exclude HALT. With padding rows present and no HALT their pc=1 tokens
+/// dangle and the Memory bus fails to balance; zero padding rows sidestep that. The
+/// final epoch keeps its remainder and its HALT, so its padding chain is anchored as
+/// usual. A program that fits in one epoch runs as a single final (monolithic-style)
+/// epoch.
+pub fn prove_continuation(
+    elf_bytes: &[u8],
+    private_inputs: &[u8],
+    epoch_size_log2: u32,
+    opts: &ProofOptions,
+) -> Result<ContinuationProof, Error> {
+    if epoch_size_log2 < 2 {
+        return Err(Error::InvalidContinuationEpochSize(
+            "epoch_size_log2 must be at least 2 (4 cycles)".to_string(),
+        ));
+    }
+    let epoch_size = 1usize.checked_shl(epoch_size_log2).ok_or_else(|| {
+        Error::InvalidContinuationEpochSize(format!(
+            "epoch_size_log2 {epoch_size_log2} is too large for this platform"
+        ))
+    })?;
+
+    let elf = Elf::load(elf_bytes).map_err(|e| Error::ElfLoad(format!("{e}")))?;
+    let mut executor = Executor::new(&elf, private_inputs.to_vec())
+        .map_err(|e| Error::Execution(format!("{e}")))?;
+
+    // The cross-epoch memory image, carried forward: epoch i+1's init is epoch i's
+    // fini, updated in place with each epoch's touched-cell final values.
+    let mut image = build_initial_image_paged(&elf, private_inputs);
+    let init_page_data = build_init_page_data(&image);
+    let mut provenance =
+        local_to_global::genesis_provenance(image.iter().map(|(a, v)| (a, v as u64)));
+
+    let mut epochs: Vec<EpochProof> = Vec::new();
+    // The previous epoch's bound final register file R_{i+1}; epoch i+1's init is
+    // derived from it (the cross-epoch register binding).
+    let mut prev_fini: Option<Vec<u32>> = None;
+
+    let mut index: u64 = 0;
+    loop {
+        if executor.pc() == 0 {
+            break;
+        }
+        // The cross-epoch ordering check (IsB20 on `fini_epoch - 1 - init_epoch`)
+        // only spans `local_to_global::MAX_EPOCHS` epochs. Beyond that the IsB20 bus
+        // cannot balance, so an honest proof is impossible — fail fast with a clear
+        // error instead of building an unprovable trace. The verifier already
+        // rejects any such proof; this is a prover-side guard for a clean message.
+        if index >= local_to_global::MAX_EPOCHS {
+            return Err(Error::InvalidContinuationEpochSize(format!(
+                "execution needs more than {} continuation epochs (the IsB20 cross-epoch \
+                 ordering range); use a larger epoch size",
+                local_to_global::MAX_EPOCHS
+            )));
+        }
+        let register_init: Vec<u32> = if index == 0 {
+            register::register_init_from_entry_point(elf.entry_point)
+        } else {
+            // Epoch i+1's init is epoch i's bound fini, reused directly (same
+            // `register_word_address_list` order) — the cross-epoch register binding.
+            prev_fini.clone().ok_or_else(|| {
+                Error::ContinuationInvariant(
+                    "previous epoch final registers are missing after the first epoch".to_string(),
+                )
+            })?
+        };
+
+        // Run one epoch; `logs` is this epoch's chunk only (the executor clears it).
+        let logs = match executor
+            .resume_with_limit(epoch_size)
+            .map_err(|e| Error::Execution(format!("{e}")))?
+        {
+            Some(logs) => logs.to_vec(),
+            None => break,
+        };
+        let is_final = executor.pc() == 0;
+
+        // Invariant: a non-final epoch ran the full `epoch_size` (a power of two),
+        // so its CPU table has no padding rows.
+        if !is_final && logs.len() != epoch_size {
+            return Err(Error::ContinuationInvariant(format!(
+                "intermediate epoch ran {} cycles, expected {epoch_size}",
+                logs.len()
+            )));
+        }
+
+        let label = local_to_global::epoch_label(index);
+        let traces = Traces::from_image_and_logs(
+            &elf,
+            &image,
+            &register_init,
+            &logs,
+            &MaxRowsConfig::default(),
+            private_inputs,
+            is_final,
+            true,
+            #[cfg(feature = "disk-spill")]
+            stark::storage_mode::StorageMode::Ram,
+        )?;
+        let boundary =
+            local_to_global::epoch_boundary(&mut provenance, label, &traces.touched_memory_cells);
+
+        let start = EpochStart {
+            register_init: &register_init,
+            label,
+        };
+        let epoch = prove_epoch(&elf, elf_bytes, &start, traces, is_final, &boundary, opts)?;
+        prev_fini = Some(epoch.reg_fini.clone());
+
+        // Carry the image forward: this epoch's fini is the next epoch's init.
+        for cell in &boundary {
+            image.set(cell.address, (cell.fini.value & 0xFF) as u8);
+        }
+        epochs.push(epoch);
+
+        if is_final {
+            break;
+        }
+        index += 1;
+    }
+
+    // One global LogUp over all the (kept) local-to-global tables.
+    let all_boundaries: Vec<Vec<CellBoundary>> =
+        epochs.iter().map(|e| e.boundary.clone()).collect();
+    let global = prove_global(&all_boundaries, elf_bytes, &init_page_data, opts)?;
+
+    Ok(ContinuationProof {
+        epochs,
+        global,
+        private_inputs: private_inputs.to_vec(),
+    })
+}
+
+/// Verify a [`ContinuationProof`] using ONLY the bundle and the ELF — nothing from
+/// the prover's memory. Returns `Ok(Some(public_output))` (the run-wide committed
+/// bytes, reconstructed from the per-epoch bound slices) iff every check holds, else
+/// `Ok(None)`.
+///
+/// The verifier (1) enumerates epochs itself, assigning `epoch_label` and `is_final`
+/// by position (a trusted enumeration); (2) verifies each epoch, deriving its
+/// `register_init` from the ELF (epoch 0) or the previous epoch's bound `reg_fini`
+/// (epoch i>0) — this is the cross-epoch register binding, and forces epoch 0 to start
+/// at the genesis register file; (3) closes the cross-epoch GlobalMemory bus with
+/// genesis rebuilt from the ELF; (4) ties each epoch's L2G root to the global proof;
+/// (5) reconstructs the output by concatenating the per-epoch slices in order.
+///
+/// Completeness is forced by the enumeration: epoch 0's INIT must be the ELF genesis
+/// (else its preprocessed-INIT commitment mismatches), and the last epoch must be
+/// `is_final` (HALT included — so the program actually terminated); a truncated run
+/// would have a non-halting last epoch built with HALT and fail.
+pub fn verify_continuation(
+    elf_bytes: &[u8],
+    bundle: &ContinuationProof,
+    opts: &ProofOptions,
+) -> Result<Option<Vec<u8>>, Error> {
+    if bundle.private_inputs.len() as u64 > MAX_PRIVATE_INPUT_SIZE {
+        return Err(Error::InvalidTableCounts(format!(
+            "private input size ({}) exceeds max ({MAX_PRIVATE_INPUT_SIZE})",
+            bundle.private_inputs.len()
+        )));
+    }
+
+    let elf = Elf::load(elf_bytes).map_err(|e| Error::ElfLoad(format!("{e}")))?;
+
+    let n = bundle.epochs.len();
+    if n == 0 {
+        return Ok(None);
+    }
+
+    // Reject a malformed bundle up front. `reg_fini` is prover-supplied (deserialized,
+    // untrusted) and is indexed by `NUM_REGISTER_ADDRESSES` when building each epoch's
+    // preprocessed REGISTER commitment, so a wrong length would otherwise panic the
+    // verifier instead of cleanly rejecting the proof.
+    if bundle
+        .epochs
+        .iter()
+        .any(|e| e.reg_fini.len() != register::NUM_REGISTER_ADDRESSES)
+    {
+        return Ok(None);
+    }
+
+    // Derived from the ELF for epoch 0, then from each epoch's bound fini.
+    let mut register_init = register::register_init_from_entry_point(elf.entry_point);
+    let mut epoch_roots: Vec<Commitment> = Vec::with_capacity(n);
+    let mut public_output: Vec<u8> = Vec::new();
+
+    for (index, epoch) in bundle.epochs.iter().enumerate() {
+        let is_final = index == n - 1;
+        let label = local_to_global::epoch_label(index as u64);
+
+        if !verify_epoch(
+            &elf,
+            elf_bytes,
+            epoch,
+            &register_init,
+            is_final,
+            label,
+            opts,
+        ) {
+            return Ok(None);
+        }
+
+        epoch_roots.push(epoch.l2g_root);
+        public_output.extend_from_slice(&epoch.public_output);
+        // Next epoch's init is this epoch's bound fini — the cross-epoch register
+        // (and x254) binding. A mismatched fini desyncs the next epoch's AIRs.
+        register_init = epoch.reg_fini.clone();
+    }
+
+    // Cross-epoch global memory: genesis rebuilt FROM THE ELF (+ private inputs),
+    // so the starting memory cannot be prover-chosen; the bus telescopes fini→init.
+    let all_boundaries: Vec<Vec<CellBoundary>> =
+        bundle.epochs.iter().map(|e| e.boundary.clone()).collect();
+    if !verify_global(
+        &all_boundaries,
+        &bundle.global,
+        &elf,
+        elf_bytes,
+        &bundle.private_inputs,
+        opts,
+    ) {
+        return Ok(None);
+    }
+
+    // Each epoch's committed L2G table is the same one the global proof used.
+    if !verify_l2g_commitment_binding(&epoch_roots, &bundle.global) {
+        return Ok(None);
+    }
+
+    Ok(Some(public_output))
+}
+
+/// Convenience wrapper: prove then verify in one call (the original integrated API).
+/// Returns `Ok(Some(public_output))` iff the continuation proves and verifies.
+pub fn prove_and_verify_continuation(
+    elf_bytes: &[u8],
+    private_inputs: &[u8],
+    epoch_size_log2: u32,
+    opts: &ProofOptions,
+) -> Result<Option<Vec<u8>>, Error> {
+    let bundle = prove_continuation(elf_bytes, private_inputs, epoch_size_log2, opts)?;
+    verify_continuation(elf_bytes, &bundle, opts)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::test_utils::asm_elf_bytes;
+
+    // `test_commit_split` issues two Commit syscalls, one early and one late, so a
+    // small epoch puts the second commit in a later epoch. That epoch starts with
+    // x254 > 0 (the carried commit index), which exercises the cross-epoch commit
+    // indexing: both the COMMIT trace and the verifier's `compute_commit_bus_offset`
+    // index from the carried x254 rather than 0. Regression test for that fix.
+    #[test]
+    fn test_commit_across_epochs_verifies() {
+        let _ = env_logger::builder().is_test(true).try_init();
+        let elf_bytes = asm_elf_bytes("test_commit_split");
+        let expected_output: [u8; 4] = [0xAA, 0xBB, 0xCC, 0xDD];
+
+        let total = Executor::new(&Elf::load(&elf_bytes).unwrap(), vec![])
+            .unwrap()
+            .run()
+            .unwrap()
+            .logs
+            .len();
+
+        // Both commits in a single 64-cycle epoch (x254 starts at 0).
+        let single = prove_and_verify_continuation(
+            &elf_bytes,
+            &[],
+            6,
+            &ProofOptions::default_test_options(),
+        )
+        .unwrap();
+        assert_eq!(single.as_deref(), Some(&expected_output[..]));
+        assert!(total <= (1 << 6), "single-epoch log2 must cover the run");
+
+        // The late commit (only `halt` follows it) lands past the midpoint, so a
+        // 16-cycle epoch forces it into a later epoch where x254 is already 2.
+        // Prove first so we can assert the run actually split into >1 epoch — without
+        // this the test would silently pass even if it degraded to a single epoch.
+        let bundle =
+            prove_continuation(&elf_bytes, &[], 4, &ProofOptions::default_test_options()).unwrap();
+        assert!(
+            bundle.num_epochs() > 1,
+            "16-cycle epochs must split the run into multiple epochs"
+        );
+        let split = verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options())
+            .unwrap();
+        assert_eq!(
+            split.as_deref(),
+            Some(&expected_output[..]),
+            "commit in a later epoch must verify and aggregate to the same output"
+        );
+    }
+
+    // A memory-heavy multi-epoch continuation. `all_loadstore_32` is ~34 cycles, so
+    // `epoch_size_log2 = 3` (8 cycles) yields several intermediate epochs (each an
+    // exact power-of-two cycle count → no CPU padding rows) plus a final epoch.
+    #[test]
+    fn test_prove_and_verify_continuation() {
+        let _ = env_logger::builder().is_test(true).try_init();
+        let elf_bytes = asm_elf_bytes("all_loadstore_32");
+        let epoch_size_log2 = 3;
+        let epoch_size = 8;
+        // Guard against silent degradation: the program must be longer than one
+        // epoch, otherwise this collapses to a single final epoch and stops testing
+        // the cross-epoch (intermediate-epoch) path.
+        let total = Executor::new(&Elf::load(&elf_bytes).unwrap(), vec![])
+            .unwrap()
+            .run()
+            .unwrap()
+            .logs
+            .len();
+        assert!(
+            total > epoch_size,
+            "program too short ({total} cycles) to exercise intermediate epochs"
+        );
+        assert!(
+            prove_and_verify_continuation(
+                &elf_bytes,
+                &[],
+                epoch_size_log2,
+                &ProofOptions::default_test_options()
+            )
+            .unwrap()
+            .is_some()
+        );
+    }
+
+    // Regression for touched-cell prediction from carried registers. A syscall
+    // whose operand pointers live in registers (ECSM reads a0/a1/a2) can have those
+    // registers set in an EARLIER epoch than the call. `test_ecsm_split` sets
+    // a0/a1/a2 at the very start and runs the ECSM ~46 cycles later;
+    // `epoch_size_log2 = 5` (32 cycles) puts the pointer setup in epoch 0 and the
+    // ecall in epoch 1. The per-epoch touched-cell pass must carry registers across
+    // the boundary — otherwise it reads the pointers as 0, mispredicts the touched
+    // cells (and the ECSM operands), and the epoch cannot verify.
+    #[test]
+    fn test_ecsm_across_epochs_verifies() {
+        let _ = env_logger::builder().is_test(true).try_init();
+        let elf_bytes = asm_elf_bytes("test_ecsm_split");
+        let total = Executor::new(&Elf::load(&elf_bytes).unwrap(), vec![])
+            .unwrap()
+            .run()
+            .unwrap()
+            .logs
+            .len();
+        assert!(total > 32, "the ECSM ecall must fall past the first epoch");
+        let out = prove_and_verify_continuation(
+            &elf_bytes,
+            &[],
+            5,
+            &ProofOptions::default_test_options(),
+        )
+        .unwrap();
+        assert!(
+            out.is_some(),
+            "an ECSM whose pointer registers were set in an earlier epoch must still verify"
+        );
+    }
+
+    // Guards that the continuation API takes `epoch_size_log2` directly. A log2 of
+    // 4 produces 16-cycle epochs over the 33-cycle `test_commit_split`, putting its
+    // two commits in different epochs and exercising the cross-epoch x254 carry.
+    #[test]
+    fn test_continuation_epoch_size_log2() {
+        let _ = env_logger::builder().is_test(true).try_init();
+        let elf_bytes = asm_elf_bytes("test_commit_split");
+        let out = prove_and_verify_continuation(
+            &elf_bytes,
+            &[],
+            4,
+            &ProofOptions::default_test_options(),
+        )
+        .unwrap();
+        assert_eq!(out.as_deref(), Some(&[0xAA, 0xBB, 0xCC, 0xDD][..]));
+    }
+
+    #[test]
+    fn test_continuation_rejects_too_small_epoch_size_log2() {
+        assert!(matches!(
+            prove_continuation(&[], &[], 1, &ProofOptions::default_test_options()),
+            Err(Error::InvalidContinuationEpochSize(_))
+        ));
+    }
+
+    // ---- Standalone (split) prover/verifier ----
+
+    // Round-trip: a bundle from prove_continuation verifies on its own (only the
+    // bundle + ELF) and reconstructs the exact run-wide output.
+    #[test]
+    fn test_split_verify_roundtrip() {
+        let _ = env_logger::builder().is_test(true).try_init();
+        let elf_bytes = asm_elf_bytes("test_commit_split");
+        let bundle =
+            prove_continuation(&elf_bytes, &[], 4, &ProofOptions::default_test_options()).unwrap();
+        let out = verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options())
+            .unwrap();
+        assert_eq!(out.as_deref(), Some(&[0xAA, 0xBB, 0xCC, 0xDD][..]));
+    }
+
+    // A bundle survives a bincode round-trip and still verifies to the same output —
+    // the serialization path the CLI's `prove`/`verify --continuations` relies on.
+    #[test]
+    fn test_continuation_bincode_roundtrip() {
+        let _ = env_logger::builder().is_test(true).try_init();
+        let elf_bytes = asm_elf_bytes("test_commit_split");
+        let bundle =
+            prove_continuation(&elf_bytes, &[], 4, &ProofOptions::default_test_options()).unwrap();
+
+        let bytes = bincode::serialize(&bundle).unwrap();
+        let restored: ContinuationProof = bincode::deserialize(&bytes).unwrap();
+
+        let out = verify_continuation(&elf_bytes, &restored, &ProofOptions::default_test_options())
+            .unwrap();
+        assert_eq!(out.as_deref(), Some(&[0xAA, 0xBB, 0xCC, 0xDD][..]));
+    }
+
+    // Negative: dropping the final (halting) epoch must be rejected — the new last
+    // epoch is non-halting but the verifier builds it as `is_final` (HALT included),
+    // so it can't verify. Guards completeness / no-truncation.
+    #[test]
+    fn test_split_verify_rejects_dropped_last_epoch() {
+        let _ = env_logger::builder().is_test(true).try_init();
+        let elf_bytes = asm_elf_bytes("all_loadstore_32");
+        let mut bundle =
+            prove_continuation(&elf_bytes, &[], 3, &ProofOptions::default_test_options()).unwrap();
+        assert!(bundle.epochs.len() >= 3, "need multiple epochs");
+        bundle.epochs.pop();
+        assert!(
+            verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options())
+                .unwrap()
+                .is_none()
+        );
+    }
+
+    // Negative: reordering epochs must be rejected — each epoch proof is bound to its
+    // 1-based label (and register chain), so a swapped epoch fails to verify. Guards
+    // the trusted-enumeration ordering.
+    #[test]
+    fn test_split_verify_rejects_reordered_epochs() {
+        let _ = env_logger::builder().is_test(true).try_init();
+        let elf_bytes = asm_elf_bytes("all_loadstore_32");
+        let mut bundle =
+            prove_continuation(&elf_bytes, &[], 3, &ProofOptions::default_test_options()).unwrap();
+        assert!(bundle.epochs.len() >= 3, "need multiple epochs");
+        bundle.epochs.swap(0, 1);
+        assert!(
+            verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options())
+                .unwrap()
+                .is_none()
+        );
+    }
+
+    // Negative: corrupting an epoch's bound final register file (R_{i+1}) must be
+    // rejected — the verifier derives the next epoch's INIT from it, so it no longer
+    // matches that epoch's committed preprocessed INIT. Guards the cross-epoch
+    // register binding (incl. x254).
+    #[test]
+    fn test_split_verify_rejects_tampered_register_fini() {
+        let _ = env_logger::builder().is_test(true).try_init();
+        let elf_bytes = asm_elf_bytes("all_loadstore_32");
+        let mut bundle =
+            prove_continuation(&elf_bytes, &[], 3, &ProofOptions::default_test_options()).unwrap();
+        assert!(
+            bundle.epochs.len() >= 2,
+            "need a second epoch to chain into"
+        );
+        bundle.epochs[0].reg_fini[0] ^= 1;
+        assert!(
+            verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options())
+                .unwrap()
+                .is_none()
+        );
+    }
+
+    // Negative: a malformed bundle whose `reg_fini` has the wrong length must be
+    // rejected with `Ok(None)`, not panic the verifier. `reg_fini` is deserialized
+    // (untrusted) and indexed by `NUM_REGISTER_ADDRESSES` when building the
+    // preprocessed REGISTER commitment, so a short one would otherwise be an
+    // out-of-bounds panic in release builds.
+    #[test]
+    fn test_split_verify_rejects_malformed_register_fini_length() {
+        let _ = env_logger::builder().is_test(true).try_init();
+        let elf_bytes = asm_elf_bytes("all_loadstore_32");
+        let mut bundle =
+            prove_continuation(&elf_bytes, &[], 3, &ProofOptions::default_test_options()).unwrap();
+        assert!(!bundle.epochs.is_empty());
+        bundle.epochs[0].reg_fini.pop();
+        assert!(
+            verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options())
+                .unwrap()
+                .is_none()
+        );
+    }
+
+    // Negative: table_counts are bundle data. Inflating a positive count must be
+    // rejected before the verifier builds AIRs from the malformed shape.
+    #[test]
+    fn test_split_verify_rejects_inflated_epoch_table_count() {
+        let _ = env_logger::builder().is_test(true).try_init();
+        let elf_bytes = asm_elf_bytes("all_loadstore_32");
+        let mut bundle =
+            prove_continuation(&elf_bytes, &[], 8, &ProofOptions::default_test_options()).unwrap();
+        bundle.epochs[0].table_counts.cpu += 1;
+        assert!(
+            verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options())
+                .unwrap()
+                .is_none()
+        );
+    }
+
+    // Negative: the verifier rebuilds private-input genesis from bundle bytes.
+    // Changing those bytes after proving changes the global-memory preprocessed
+    // genesis commitment, so the standalone verifier must reject.
+    #[test]
+    fn test_split_verify_rejects_tampered_private_input_genesis() {
+        let _ = env_logger::builder().is_test(true).try_init();
+        let elf_bytes = asm_elf_bytes("test_private_input_xpage");
+        let private_inputs: Vec<u8> = (0u8..16).collect();
+        let mut bundle = prove_continuation(
+            &elf_bytes,
+            &private_inputs,
+            4,
+            &ProofOptions::default_test_options(),
+        )
+        .unwrap();
+        assert!(
+            verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options())
+                .unwrap()
+                .is_some(),
+            "baseline must verify before tampering"
+        );
+
+        bundle.private_inputs[4] ^= 0xFF;
+        assert!(
+            verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options())
+                .unwrap()
+                .is_none()
+        );
+    }
+
+    // Negative: verifier-side private inputs are deserialized/untrusted, so reject
+    // oversized bundles before rebuilding genesis page configs from them.
+    #[test]
+    fn test_split_verify_rejects_oversized_private_inputs() {
+        let _ = env_logger::builder().is_test(true).try_init();
+        let elf_bytes = asm_elf_bytes("all_loadstore_32");
+        let mut bundle =
+            prove_continuation(&elf_bytes, &[], 8, &ProofOptions::default_test_options()).unwrap();
+        bundle.private_inputs = vec![0; MAX_PRIVATE_INPUT_SIZE as usize + 1];
+        assert!(matches!(
+            verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options()),
+            Err(Error::InvalidTableCounts(_))
+        ));
+    }
+
+    // The bundle's `boundary` field is used only to rebuild the global AIRs' touched-
+    // PAGE set (genesis is recomputed from the ELF). The cross-epoch memory values
+    // live in the committed L2G traces, tied to the epoch proofs by
+    // `verify_l2g_commitment_binding` (exercised by test_split_verify_rejects_tampered_l2g_root
+    // below). Tampering a boundary value is therefore inconsequential; omitting/adding
+    // a touched page is caught by the GlobalMemory bus (unmatched fini / air count
+    // mismatch). So there is no meaningful "tamper a boundary value" negative test.
+
+    // Negative: corrupting an epoch's claimed L2G table root must be rejected —
+    // `verify_l2g_commitment_binding` compares each epoch's `l2g_root` against the
+    // corresponding sub-proof root in the global proof, so a mismatched root causes
+    // the binding to fail. Guards the L2G root↔global commitment binding.
+    #[test]
+    fn test_split_verify_rejects_tampered_l2g_root() {
+        let _ = env_logger::builder().is_test(true).try_init();
+        let elf_bytes = asm_elf_bytes("all_loadstore_32");
+        let mut bundle =
+            prove_continuation(&elf_bytes, &[], 3, &ProofOptions::default_test_options()).unwrap();
+        assert!(
+            bundle.epochs.len() >= 2,
+            "need multiple epochs to exercise the binding"
+        );
+        bundle.epochs[0].l2g_root[0] ^= 0xFF;
+        assert!(
+            verify_continuation(&elf_bytes, &bundle, &ProofOptions::default_test_options())
+                .unwrap()
+                .is_none()
+        );
+    }
+}
diff --git a/prover/src/lib.rs b/prover/src/lib.rs
index 81233d39f..143d1ead6 100644
--- a/prover/src/lib.rs
+++ b/prover/src/lib.rs
@@ -13,10 +13,12 @@
 #[cfg(feature = "disk-spill")]
 pub mod auto_storage;
 pub mod constraints;
+pub mod continuation;
 #[cfg(feature = "debug-checks")]
 mod debug_report;
 #[cfg(feature = "instruments")]
 pub mod instruments;
+mod paged_mem;
 mod statement;
 pub mod tables;
 pub mod test_utils;
@@ -37,7 +39,7 @@ use stark::storage_mode::StorageMode;
 use stark::traits::AIR;
 use stark::verifier::{IsStarkVerifier, Verifier};
 
-use crate::statement::absorb_statement;
+use crate::statement::{StatementKind, absorb_statement};
 pub use crate::tables::MaxRowsConfig;
 use crate::tables::bitwise;
 use crate::tables::decode;
@@ -184,6 +186,13 @@ pub enum Error {
     Prover(String),
     /// Proof contains invalid table_counts (e.g. zero for a required table)
     InvalidTableCounts(String),
+    /// Continuation epoch size exponent is invalid.
+    InvalidContinuationEpochSize(String),
+    /// Continuation proof construction hit an internal invariant failure.
+    ContinuationInvariant(String),
+    /// A non-final continuation epoch contains the program-terminating
+    /// instruction. The terminating instruction must be in the final epoch.
+    HaltInNonFinalEpoch,
 }
 
 impl fmt::Display for Error {
@@ -197,6 +206,18 @@ impl fmt::Display for Error {
             Error::Execution(msg) => write!(f, "execution error: {msg}"),
             Error::Prover(msg) => write!(f, "proving error: {msg}"),
             Error::InvalidTableCounts(msg) => write!(f, "invalid table_counts: {msg}"),
+            Error::InvalidContinuationEpochSize(msg) => {
+                write!(f, "invalid continuation epoch size: {msg}")
+            }
+            Error::ContinuationInvariant(msg) => {
+                write!(f, "continuation invariant failed: {msg}")
+            }
+            Error::HaltInNonFinalEpoch => {
+                write!(
+                    f,
+                    "the program-terminating instruction must be in the final epoch"
+                )
+            }
         }
     }
 }
@@ -234,6 +255,9 @@ pub(crate) struct VmAirs {
     pub register: VmAir,
     pub pages: Vec<VmAir>,
     pub memw_registers: Vec<VmAir>,
+    /// Whether the HALT table participates in this proof. False for intermediate
+    /// continuation epochs, which do not terminate the program.
+    pub include_halt: bool,
     // Auxiliary ALU / memory / CPU32 dispatch chips
     pub eqs: Vec<VmAir>,
     pub bytewises: Vec<VmAir>,
@@ -247,7 +271,6 @@ impl VmAirs {
         let mut pairs: Vec<AirTracePair<'a>> = vec![
             (&self.bitwise, &mut traces.bitwise, &()),
             (&self.decode, &mut traces.decode, &()),
-            (&self.halt, &mut traces.halt, &()),
             (&self.commit, &mut traces.commit, &()),
             (&self.keccak, &mut traces.keccak, &()),
             (&self.keccak_rnd, &mut traces.keccak_rnd, &()),
@@ -257,6 +280,9 @@ impl VmAirs {
             (&self.ecdas, &mut traces.ecdas, &()),
             (&self.register, &mut traces.register, &()),
         ];
+        if self.include_halt {
+            pairs.push((&self.halt, &mut traces.halt, &()));
+        }
 
         for (air, trace) in self.cpus.iter().zip(traces.cpus.iter_mut()) {
             pairs.push((air, trace, &()));
@@ -320,7 +346,6 @@ impl VmAirs {
         let mut refs: Vec<&dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>> = vec![
             &self.bitwise,
             &self.decode,
-            &self.halt,
             &self.commit,
             &self.keccak,
             &self.keccak_rnd,
@@ -330,6 +355,9 @@ impl VmAirs {
             &self.ecdas,
             &self.register,
         ];
+        if self.include_halt {
+            refs.push(&self.halt);
+        }
 
         for air in &self.cpus {
             refs.push(air);
@@ -410,6 +438,7 @@ impl VmAirs {
     /// here. A wrong value is rejected, never silently accepted: it either
     /// mismatches the prover's committed precomputed root (an explicit
     /// verifier check) or yields diverging Fiat-Shamir challenges.
+    #[allow(clippy::too_many_arguments)]
     pub fn new(
         elf: &Elf,
         proof_options: &ProofOptions,
@@ -417,7 +446,10 @@ impl VmAirs {
         page_configs: &[crate::tables::page::PageConfig],
         table_counts: &TableCounts,
         decode_commitment: Option<Commitment>,
+        include_halt: bool,
+        register_init: Option<&[u32]>,
         page_commitments: Option<&[(u64, Commitment)]>,
+        register_preprocessed: Option<(Commitment, usize)>,
     ) -> Self {
         let cpus: Vec<_> = (0..table_counts.cpu)
             .map(|i| create_cpu_air(proof_options).with_name(&format!("CPU[{}]", i)))
@@ -471,10 +503,17 @@ impl VmAirs {
         let ecsm = create_ecsm_air(proof_options);
         let ec_scalar = create_ec_scalar_air(proof_options);
         let ecdas = create_ecdas_air(proof_options);
-        let register = create_register_air(proof_options).with_preprocessed(
-            register::preprocessed_commitment(proof_options, elf.entry_point),
-            register::NUM_PREPROCESSED_COLS,
-        );
+        let register = if let Some((commitment, num_preprocessed_cols)) = register_preprocessed {
+            create_register_air(proof_options).with_preprocessed(commitment, num_preprocessed_cols)
+        } else {
+            let register_init = register_init
+                .map(<[u32]>::to_vec)
+                .unwrap_or_else(|| register::register_init_from_entry_point(elf.entry_point));
+            create_register_air(proof_options).with_preprocessed(
+                register::preprocessed_commitment(proof_options, &register_init),
+                register::NUM_PREPROCESSED_COLS,
+            )
+        };
         // Every zero-init page shares one preprocessed commitment: OFFSET is
         // page-relative and INIT is all-zero, so it depends only on
         // (blowup, coset) — all fixed here. Compute it once (static const
@@ -553,6 +592,7 @@ impl VmAirs {
             register,
             pages,
             memw_registers,
+            include_halt,
             eqs,
             bytewises,
             stores,
@@ -597,6 +637,7 @@ pub(crate) fn replay_transcript_phase_a(
 /// which the caller should treat as verification failure.
 pub(crate) fn compute_commit_bus_offset(
     public_output: &[u8],
+    start_index: u64,
     z: &FieldElement<E>,
     alpha: &FieldElement<E>,
 ) -> Option<FieldElement<E>> {
@@ -607,13 +648,16 @@ pub(crate) fn compute_commit_bus_offset(
     let bus_id = FieldElement::<E>::from(BusId::Commit as u64);
     let alpha_sq = alpha * alpha;
 
-    // fingerprint_i = z - (BusId::Commit + i·α + value_i·α²)
+    // fingerprint_i = z - (BusId::Commit + (start_index + i)·α + value_i·α²).
+    // `start_index` is the carried x254: 0 for a monolithic proof or the first
+    // epoch, nonzero for a continuation epoch whose commits continue a prior one.
     let mut fingerprints: Vec<FieldElement<E>> = public_output
         .iter()
         .enumerate()
         .map(|(i, &value)| {
+            let global_index = start_index + i as u64;
             let linear_combination = bus_id
-                + (FieldElement::<E>::from(i as u64) * alpha)
+                + (FieldElement::<E>::from(global_index) * alpha)
                 + (FieldElement::<E>::from(value as u64) * alpha_sq);
             z - linear_combination
         })
@@ -639,10 +683,32 @@ pub(crate) fn compute_expected_commit_bus_balance(
     airs: &[&dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>],
     proof: &MultiProof<F, E, ()>,
     public_output_bytes: &[u8],
+    start_index: u64,
     transcript: &mut DefaultTranscript<E>,
 ) -> Option<FieldElement<E>> {
     let (z, alpha) = replay_transcript_phase_a(airs, proof, transcript);
-    compute_commit_bus_offset(public_output_bytes, &z, &alpha)
+    compute_commit_bus_offset(public_output_bytes, start_index, &z, &alpha)
+}
+
+/// Bind the final cross-epoch GlobalMemory proof to the per-epoch proofs.
+///
+/// The final proof commits one local-to-global sub-table per epoch as its first
+/// `N` tables, so `final_proof.proofs[i].lde_trace_main_merkle_root` is epoch
+/// `i`'s L2G commitment. `epoch_l2g_roots[i]` is the same root as committed in
+/// epoch `i`'s own proof. Equal roots prove the cross-epoch matching ran over
+/// the very same L2G tables the epochs committed (shared commitments).
+///
+/// Called by `continuation::verify_continuation`; also exercised by the
+/// local-to-global bus tests.
+pub(crate) fn verify_l2g_commitment_binding(
+    epoch_l2g_roots: &[Commitment],
+    final_proof: &MultiProof<F, E, ()>,
+) -> bool {
+    final_proof.proofs.len() >= epoch_l2g_roots.len()
+        && epoch_l2g_roots
+            .iter()
+            .enumerate()
+            .all(|(i, root)| final_proof.proofs[i].lde_trace_main_merkle_root == *root)
 }
 
 // =============================================================================
@@ -772,6 +838,9 @@ pub fn prove_with_options_and_inputs(
         &traces.page_configs,
         &table_counts,
         None,
+        true,
+        None,
+        None,
         None,
     );
 
@@ -793,6 +862,7 @@ pub fn prove_with_options_and_inputs(
     let mut transcript = DefaultTranscript::<E>::new(&[]);
     absorb_statement(
         &mut transcript,
+        StatementKind::Monolithic,
         elf_bytes,
         &traces.public_output_bytes,
         &table_counts,
@@ -930,7 +1000,10 @@ pub fn verify_with_options(
         &page_configs,
         &vm_proof.table_counts,
         decode_commitment,
+        true,
+        None,
         page_commitments,
+        None,
     );
 
     // Recompute the COMMIT output bus offset from VmProof.public_output.
@@ -944,6 +1017,7 @@ pub fn verify_with_options(
     let mut transcript = DefaultTranscript::<E>::new(&[]);
     absorb_statement(
         &mut transcript,
+        StatementKind::Monolithic,
         elf_bytes,
         &vm_proof.public_output,
         &vm_proof.table_counts,
@@ -959,6 +1033,8 @@ pub fn verify_with_options(
         &air_refs,
         &vm_proof.proof,
         &vm_proof.public_output,
+        // Monolithic proof: commits are indexed from 0.
+        0,
         &mut transcript_for_replay,
     ) {
         Some(balance) => balance,
diff --git a/prover/src/paged_mem.rs b/prover/src/paged_mem.rs
new file mode 100644
index 000000000..196d077bf
--- /dev/null
+++ b/prover/src/paged_mem.rs
@@ -0,0 +1,190 @@
+//! Page-bucketed dense memory store: `page_base -> [T; PAGE_SIZE]`.
+//!
+//! The prover's per-cell memory bookkeeping (the local-to-global `provenance`,
+//! and the carried memory `image`) is `O(footprint)` and held across the whole
+//! run. A per-cell `HashMap` is wasteful for that: per cell it also stores the
+//! 8-byte address key, hashing metadata, and ~30% empty load-factor slack.
+//!
+//! Measurement (ethrex 1-tx, `bench_continuation footprint`) showed the touched
+//! footprint is ~98% two big *contiguous* blocks — i.e. dense. For dense data a
+//! flat array indexed by offset is far cheaper: no keys, no hashing, no slack,
+//! and cache-friendly. This stores one dense `[T; PAGE_SIZE]` array per touched
+//! 256 KB page, in a small `Vec` sorted by page base (few entries — binary-search
+//! lookup + sorted insert, no hashing at all; the bulk lives in the arrays).
+//!
+//! Unset cells read back as `fill` (the genesis/default value) — pages are
+//! allocated filled — so callers that only `get`/`set` need no occupancy map.
+//! An occupancy bitmap is tracked so [`PagedMem::iter`] can yield exactly the
+//! cells that were explicitly `set`.
+
+use std::collections::HashMap;
+
+use crate::tables::page::DEFAULT_PAGE_SIZE;
+
+const WORD_BITS: usize = 64;
+const OCC_WORDS: usize = DEFAULT_PAGE_SIZE / WORD_BITS;
+
+struct Page<T> {
+    /// Dense values, length `DEFAULT_PAGE_SIZE`, initialized to `fill`.
+    data: Box<[T]>,
+    /// 1 bit per offset: set iff that offset was explicitly written via `set`.
+    occupied: Box<[u64]>,
+}
+
+/// A dense, page-bucketed `addr -> T` store. Cheaper than a per-cell `HashMap`
+/// when the touched addresses are contiguous. `get` on an unset cell returns
+/// the `fill` value supplied at construction.
+///
+/// The pages themselves are kept in a `Vec` sorted by base address (page bases
+/// are sparse across the 64-bit space, so a flat Vec-by-page-number is
+/// infeasible, but there are only a handful of touched pages, so binary-search
+/// lookup + sorted insert are cheap — and no hashing). The bulk (the cells)
+/// lives in each page's dense array.
+pub struct PagedMem<T> {
+    pages: Vec<(u64, Page<T>)>,
+    fill: T,
+}
+
+impl<T: Copy> PagedMem<T> {
+    /// Create an empty store. Unset cells read back as `fill`.
+    pub fn new(fill: T) -> Self {
+        Self {
+            pages: Vec::new(),
+            fill,
+        }
+    }
+
+    #[inline]
+    fn split(addr: u64) -> (u64, usize) {
+        // DEFAULT_PAGE_SIZE is a power of two, so the mask isolates the offset.
+        let mask = DEFAULT_PAGE_SIZE as u64 - 1;
+        (addr & !mask, (addr & mask) as usize)
+    }
+
+    /// Value at `addr`, or `fill` if never `set`.
+    #[inline]
+    pub fn get(&self, addr: u64) -> T {
+        let (base, off) = Self::split(addr);
+        match self.pages.binary_search_by_key(&base, |(b, _)| *b) {
+            Ok(i) => self.pages[i].1.data[off],
+            Err(_) => self.fill,
+        }
+    }
+
+    /// Set `addr` to `val`, allocating its page (filled) on first touch.
+    #[inline]
+    pub fn set(&mut self, addr: u64, val: T) {
+        let (base, off) = Self::split(addr);
+        let i = match self.pages.binary_search_by_key(&base, |(b, _)| *b) {
+            Ok(i) => i,
+            Err(i) => {
+                self.pages.insert(
+                    i,
+                    (
+                        base,
+                        Page {
+                            data: vec![self.fill; DEFAULT_PAGE_SIZE].into_boxed_slice(),
+                            occupied: vec![0u64; OCC_WORDS].into_boxed_slice(),
+                        },
+                    ),
+                );
+                i
+            }
+        };
+        let page = &mut self.pages[i].1;
+        page.data[off] = val;
+        page.occupied[off / WORD_BITS] |= 1u64 << (off % WORD_BITS);
+    }
+
+    /// Base addresses of the pages that hold at least one `set` cell, ascending.
+    /// (For a `DEFAULT_PAGE_SIZE`-aligned page, this equals `page_base_for_address`
+    /// of every cell in it, so it replaces `cells.keys().map(page_base)`.)
+    pub fn page_bases(&self) -> impl Iterator<Item = u64> + '_ {
+        self.pages.iter().map(|(b, _)| *b)
+    }
+
+    /// Number of cells that were explicitly `set`.
+    pub fn len(&self) -> usize {
+        self.pages
+            .iter()
+            .map(|(_, p)| {
+                p.occupied
+                    .iter()
+                    .map(|w| w.count_ones() as usize)
+                    .sum::<usize>()
+            })
+            .sum()
+    }
+
+    /// True if no cell was ever `set`.
+    pub fn is_empty(&self) -> bool {
+        self.pages
+            .iter()
+            .all(|(_, p)| p.occupied.iter().all(|&w| w == 0))
+    }
+
+    /// Iterate `(addr, value)` over exactly the cells that were `set`.
+    pub fn iter(&self) -> impl Iterator<Item = (u64, T)> + '_ {
+        self.pages.iter().flat_map(|(base, page)| {
+            let base = *base;
+            page.occupied
+                .iter()
+                .enumerate()
+                .flat_map(move |(w, &bits)| {
+                    BitIter { bits }.map(move |b| {
+                        let off = w * WORD_BITS + b;
+                        (base + off as u64, page.data[off])
+                    })
+                })
+        })
+    }
+}
+
+/// A read-only initial-memory image: `addr -> byte`, with an iterator over the
+/// bytes it holds. Implemented for both `HashMap<u64, u8>` (the monolithic
+/// prover's image) and [`PagedMem<u8>`] (the continuation's carried image), so
+/// trace generation can consume either without changing the monolithic path.
+pub trait ImageSource {
+    /// Byte at `addr`, or 0 if absent.
+    fn image_get(&self, addr: u64) -> u8;
+    /// Iterate `(addr, byte)` over every byte present in the image.
+    fn image_iter(&self) -> impl Iterator<Item = (u64, u8)> + '_;
+}
+
+impl ImageSource for HashMap<u64, u8> {
+    #[inline]
+    fn image_get(&self, addr: u64) -> u8 {
+        self.get(&addr).copied().unwrap_or(0)
+    }
+    fn image_iter(&self) -> impl Iterator<Item = (u64, u8)> + '_ {
+        self.iter().map(|(&addr, &byte)| (addr, byte))
+    }
+}
+
+impl ImageSource for PagedMem<u8> {
+    #[inline]
+    fn image_get(&self, addr: u64) -> u8 {
+        self.get(addr)
+    }
+    fn image_iter(&self) -> impl Iterator<Item = (u64, u8)> + '_ {
+        self.iter()
+    }
+}
+
+/// Yields the set-bit indices of a 64-bit word, low to high.
+struct BitIter {
+    bits: u64,
+}
+
+impl Iterator for BitIter {
+    type Item = usize;
+    fn next(&mut self) -> Option<usize> {
+        if self.bits == 0 {
+            None
+        } else {
+            let b = self.bits.trailing_zeros() as usize;
+            self.bits &= self.bits - 1; // clear lowest set bit
+            Some(b)
+        }
+    }
+}
diff --git a/prover/src/statement.rs b/prover/src/statement.rs
index 7935abe66..cca961be5 100644
--- a/prover/src/statement.rs
+++ b/prover/src/statement.rs
@@ -24,15 +24,34 @@ fn elf_digest(elf: &[u8]) -> [u8; 32] {
     h.finalize().into()
 }
 
+/// Which statement is being bound. Selects the leading domain tag and whether an
+/// epoch label is appended, so monolithic and continuation-epoch proofs share one
+/// function while each starts with its own tag. `Monolithic` reproduces the
+/// original encoding byte-for-byte (no label), so existing proofs are unaffected.
+#[derive(Clone, Copy)]
+pub(crate) enum StatementKind {
+    /// Whole-program (monolithic) proof.
+    Monolithic,
+    /// One continuation epoch proof, pinned to its position by `epoch_label`.
+    ContinuationEpoch { epoch_label: u64 },
+}
+
 pub(crate) fn absorb_statement(
     t: &mut impl IsTranscript<E>,
+    kind: StatementKind,
     elf_bytes: &[u8],
     public_output: &[u8],
     table_counts: &TableCounts,
     num_private_input_pages: usize,
     runtime_page_ranges: &[RuntimePageRange],
 ) {
-    t.append_bytes(DOMAIN_TAG);
+    // Leading domain tag — distinct per statement kind, so a monolithic proof and
+    // a continuation epoch proof can never share a transcript prefix.
+    let domain_tag = match kind {
+        StatementKind::Monolithic => DOMAIN_TAG,
+        StatementKind::ContinuationEpoch { .. } => CONTINUATION_EPOCH_TAG,
+    };
+    t.append_bytes(domain_tag);
 
     // ELF: fixed 32-byte digest — no length prefix needed.
     t.append_bytes(&elf_digest(elf_bytes));
@@ -90,4 +109,29 @@ pub(crate) fn absorb_statement(
         t.append_bytes(&base.to_le_bytes());
         t.append_bytes(&count.to_le_bytes());
     }
+
+    // Continuation epochs additionally bind their position (replay protection).
+    // Monolithic proofs append nothing here, so their encoding is unchanged.
+    if let StatementKind::ContinuationEpoch { epoch_label } = kind {
+        t.append_bytes(&epoch_label.to_le_bytes());
+    }
+}
+
+/// Continuation domain tags. Distinct from the monolithic `DOMAIN_TAG` so a
+/// monolithic proof and a continuation proof can never share a transcript prefix.
+const CONTINUATION_EPOCH_TAG: &[u8] = b"LAMBDAVM_CONTINUATION_EPOCH_V1";
+const CONTINUATION_GLOBAL_TAG: &[u8] = b"LAMBDAVM_CONTINUATION_GLOBAL_V1";
+
+/// Statement bound into the cross-epoch **global** proof's transcript before
+/// Phase A: the ELF (so the global proof is program-bound) and the epoch count
+/// (so a global proof from a run with a different number of epochs cannot be
+/// spliced in). Prove and verify must call this with identical arguments.
+pub(crate) fn absorb_continuation_global_statement(
+    t: &mut impl IsTranscript<E>,
+    elf_bytes: &[u8],
+    num_epochs: usize,
+) {
+    t.append_bytes(CONTINUATION_GLOBAL_TAG);
+    t.append_bytes(&elf_digest(elf_bytes));
+    t.append_bytes(&(num_epochs as u64).to_le_bytes());
 }
diff --git a/prover/src/tables/global_memory.rs b/prover/src/tables/global_memory.rs
new file mode 100644
index 000000000..b3f22b172
--- /dev/null
+++ b/prover/src/tables/global_memory.rs
@@ -0,0 +1,203 @@
+//! GLOBAL_MEMORY table for cross-epoch memory initialization and finalization.
+//!
+//! The cross-epoch analog of PAGE (`page.rs`): one dense table instance per
+//! touched page, bookending the `GlobalMemory` bus that links each epoch's
+//! local-to-global (`local_to_global.rs`) boundary claims. For every byte of the
+//! page it **sends** a genesis token (the cell's program-start value) and
+//! **receives** a finalization token (the cell's value after the last epoch that
+//! touched it). Untouched bytes send and receive the identical token, so they
+//! cancel — exactly as PAGE's init/fini bookend does on the epoch-local bus.
+//!
+//! Because the genesis value lives in a PREPROCESSED column (OFFSET + INIT,
+//! byte-for-byte identical to PAGE's), the verifier recomputes the same
+//! commitment from the ELF via [`page::compute_precomputed_commitment`]. This
+//! binds the program's initial memory to the ELF binary.
+//!
+//! ## Columns
+//!
+//! | Column | Type | Description |
+//! |--------|------|-------------|
+//! | offset | RowIndex | 0, 1, ..., page_size-1 (preprocessed) |
+//! | init | Byte | Genesis value (from ELF or 0) (preprocessed) |
+//! | fini | Byte | Value after the last touching epoch |
+//! | fini_epoch | Epoch | Last touching epoch (`GENESIS_EPOCH` if untouched) |
+//!
+//! Virtual: `address = page_base + offset`, `page_base` constant per instance.
+//!
+//! ## Bus Interactions
+//!
+//! GlobalMemory token: `[address_lo, address_hi, value, epoch]` (same order as
+//! `local_to_global::bus_interactions`; no timestamp — the chain is ordered by epoch).
+//!
+//! | Tag | Bus | Token | Multiplicity |
+//! |-----|-----|-------|--------------|
+//! | GM-GENESIS | GlobalMemory | `[address, init, GENESIS]` | 1 (sender) |
+//! | GM-FINAL   | GlobalMemory | `[address, fini, fini_epoch]` | 1 (receiver) |
+
+use std::collections::HashMap;
+
+use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity};
+use stark::trace::TraceTable;
+
+use super::local_to_global::{GENESIS_EPOCH, direct};
+use super::page::{DEFAULT_PAGE_SIZE, PageConfig};
+use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField};
+
+// =========================================================================
+// Column indices
+// =========================================================================
+
+/// Column definitions for the GLOBAL_MEMORY table.
+///
+/// `address` is virtual, computed as `page_base + offset`; it is NOT a column.
+pub mod cols {
+    /// offset: Row index (0, 1, ..., page_size-1) - preprocessed
+    pub const OFFSET: usize = 0;
+
+    /// init: Genesis byte value (from ELF or 0) - preprocessed
+    pub const INIT: usize = 1;
+
+    // Note: there is no init-epoch column. The genesis token always carries
+    // `GENESIS_EPOCH`, so the GM-GENESIS sender emits it as a constant (like L2G's
+    // `fini_epoch`), saving a column and removing a prover-chosen value.
+
+    /// fini: Final byte value after the last touching epoch
+    pub const FINI: usize = 2;
+
+    /// fini_epoch: Last epoch that touched the cell (`GENESIS_EPOCH` if untouched)
+    pub const FINI_EPOCH: usize = 3;
+
+    // Note: no fini-timestamp column. The GlobalMemory bus carries no timestamp
+    // (the cross-epoch chain is ordered by epoch); timestamps are epoch-local.
+
+    /// Total number of columns
+    pub const NUM_COLUMNS: usize = 4;
+}
+
+/// Number of preprocessed columns (OFFSET, INIT). Identical to PAGE's preprocessed
+/// columns, so the preprocessed commitment is shared with PAGE — compute it with
+/// [`page::compute_precomputed_commitment`].
+pub const NUM_PREPROCESSED_COLS: usize = 2;
+
+// =========================================================================
+// Types
+// =========================================================================
+
+/// Final state for a single byte address after the last epoch that touched it.
+#[derive(Debug, Clone, Copy, Default)]
+pub struct FiniState {
+    /// Final byte value.
+    pub value: u8,
+    /// Index of the last epoch that touched the cell.
+    pub epoch: u64,
+}
+
+/// Map from byte address to final state, for the bytes touched across all epochs.
+pub type FiniStateMap = HashMap<u64, FiniState>;
+
+// =========================================================================
+// Trace generation
+// =========================================================================
+
+/// Generates a GLOBAL_MEMORY trace for a single page.
+///
+/// `config` supplies `page_base` and the genesis `init_values` (from the ELF);
+/// `final_state` maps each touched byte to its final value and last-touch epoch.
+pub fn generate_global_trace(
+    config: &PageConfig,
+    final_state: &FiniStateMap,
+) -> TraceTable<GoldilocksField, GoldilocksExtension> {
+    let page_size = DEFAULT_PAGE_SIZE;
+    let page_base = config.page_base;
+
+    assert!(
+        page_base.is_multiple_of(page_size as u64),
+        "Page base must be page-aligned"
+    );
+
+    let num_rows = page_size; // One row per byte in the page
+    let mut data = vec![FE::zero(); num_rows * cols::NUM_COLUMNS];
+
+    for offset in 0..page_size {
+        let byte_addr = page_base + (offset as u64);
+        let base = offset * cols::NUM_COLUMNS;
+
+        // Offset (preprocessed) - address is virtual: page_base + offset
+        data[base + cols::OFFSET] = FE::from(offset as u64);
+
+        // Genesis value (init_values may be shorter than the page → trailing zeros)
+        let init_value = config
+            .init_values
+            .as_ref()
+            .and_then(|v| v.get(offset).copied())
+            .unwrap_or(0);
+        data[base + cols::INIT] = FE::from(init_value as u64);
+
+        // Final state: if touched use it, otherwise the cell stays at genesis
+        // (fini=init, epoch=GENESIS) so its genesis/finalization tokens cancel.
+        let (fini_value, fini_epoch) = match final_state.get(&byte_addr) {
+            Some(state) => (state.value, state.epoch),
+            None => (init_value, GENESIS_EPOCH),
+        };
+
+        data[base + cols::FINI] = FE::from(fini_value as u64);
+        data[base + cols::FINI_EPOCH] = FE::from(fini_epoch);
+    }
+
+    TraceTable::new_main(data, cols::NUM_COLUMNS, 1)
+}
+
+// =========================================================================
+// Bus interactions
+// =========================================================================
+
+/// Creates the GlobalMemory bus interactions for a GLOBAL_MEMORY table.
+///
+/// The token order matches `local_to_global::bus_interactions` exactly:
+/// `[address_lo, address_hi, value, epoch]` (no timestamp — the cross-epoch chain
+/// is ordered by epoch). The address is computed as `page_base + offset` via a
+/// linear combination, like PAGE.
+///
+/// - GM-GENESIS: sends `[address, init, GENESIS]` — the token an L2G
+///   init-receiver consumes for a genesis-origin cell.
+/// - GM-FINAL: receives `[address, fini, fini_epoch]` — the token the
+///   last touching epoch's L2G fini-sender produces.
+pub fn bus_interactions(page_base: u64) -> Vec<BusInteraction> {
+    let page_base_lo = page_base & 0xFFFF_FFFF;
+    let page_base_hi = page_base >> 32;
+
+    let address_lo = BusValue::linear(vec![
+        LinearTerm::Constant(page_base_lo as i64),
+        LinearTerm::Column {
+            coefficient: 1,
+            column: cols::OFFSET,
+        },
+    ]);
+    let address_hi = BusValue::constant(page_base_hi);
+
+    vec![
+        // GM-GENESIS: send the genesis token [address, init, GENESIS]. No timestamp:
+        // the GlobalMemory chain is ordered by epoch (timestamps are epoch-local).
+        BusInteraction::sender(
+            BusId::GlobalMemory,
+            Multiplicity::One,
+            vec![
+                address_lo.clone(),
+                address_hi.clone(),
+                direct(cols::INIT),
+                BusValue::constant(GENESIS_EPOCH),
+            ],
+        ),
+        // GM-FINAL: receive the finalization token [address, fini, fini_epoch].
+        BusInteraction::receiver(
+            BusId::GlobalMemory,
+            Multiplicity::One,
+            vec![
+                address_lo,
+                address_hi,
+                direct(cols::FINI),
+                direct(cols::FINI_EPOCH),
+            ],
+        ),
+    ]
+}
diff --git a/prover/src/tables/local_to_global.rs b/prover/src/tables/local_to_global.rs
new file mode 100644
index 000000000..3f9221d57
--- /dev/null
+++ b/prover/src/tables/local_to_global.rs
@@ -0,0 +1,831 @@
+//! Local-to-global memory boundary claims for cross-epoch continuations.
+//!
+//! Each epoch, for every memory cell it touches,
+//! makes an `init` claim (the cell's value when first touched this epoch, which
+//! earlier epoch last wrote it, and that write's timestamp) and a `fini` claim
+//! (the cell's value at this epoch's end, this epoch's index, and the last
+//! access timestamp). A final LogUp matches each `fini` against the `init` of the
+//! next epoch that touches the same cell, proving global memory consistency.
+//!
+//! ## Epoch labels
+//!
+//! Epochs are labelled 1-based (epoch index `i` → label `i+1`) and the genesis
+//! sentinel is `0` ([`GENESIS_EPOCH`]). This makes "the originating epoch is
+//! strictly earlier" a plain `init_epoch < fini_epoch` — genesis (`0`) is below
+//! every real epoch, so it needs no special case.
+//!
+//! ## Ordering constraint
+//!
+//! The GlobalMemory LogUp only proves the init/fini tokens *match as a set*; it
+//! does not by itself force the chain to be consumed in increasing-epoch order.
+//! Without that, a prover could let an init consume a *later* epoch's fini (a
+//! backward/self edge), seeding a cell with an unjustified value. So each real
+//! row also proves `init_epoch < fini_epoch` via an `IsB20` lookup on
+//! `fini_epoch − 1 − init_epoch` (it must be a valid 20-bit value). This bounds
+//! the number of epochs to `< 2^20` (~1M) — unreachable in practice (optimal
+//! epochs are millions of cycles, so thousands of epochs) and fails closed.
+//!
+//! ## Range-checked columns
+//!
+//! A column needs an explicit range check only if nothing else already pins it.
+//! Most L2G columns travel on the epoch-local `Memory` bus and are matched there
+//! against MEMW, which already range/order-checks address, timestamp and value —
+//! exactly how PAGE relies on MEMW in the monolithic prover. So `address` and
+//! `fini_timestamp` are plain 32-bit columns with no extra check, and the value
+//! bytes get the same batched `AreBytes` check PAGE uses (the `init` value is a
+//! trusted source, so it must be checked). `fini_epoch` is the same constant for
+//! every row of an epoch's table, so it is supplied as a per-table constant (not
+//! a column) by [`bus_interactions`].
+//!
+//! The only column that lives ONLY on the cross-epoch `GlobalMemory` bus has no
+//! MEMW partner: `init_epoch`. It is stored as two 16-bit halfword columns, each
+//! checked via `IsHalfword`, and the 32-bit bus value is rebuilt from them by a
+//! linear combination (see [`word`]). The checks are emitted on the epoch-local
+//! table (which has the BITWISE provider); the global proof commits the identical
+//! trace (the commitment binding compares their roots), so it inherits the same
+//! guarantee. There is no `init_timestamp` column: timestamps are epoch-local, and
+//! the cross-epoch chain is ordered by epoch.
+//!
+//! ## Padding
+//!
+//! Real rows carry `MU = 1`; the power-of-two padding rows carry `MU = 0`. Every
+//! interaction uses `Multiplicity::Column(MU)`, so padding rows fire nothing —
+//! we never rely on token self-cancellation (this is the standard pattern used
+//! by every variable-length table). `MU` is self-enforced: dropping a real row
+//! (`MU = 0`) breaks its telescoping link → bus imbalance.
+
+use std::collections::HashMap;
+
+use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing};
+use stark::trace::TraceTable;
+
+use super::bitwise::{BitwiseOperation, BitwiseOperationType};
+use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField};
+use crate::paged_mem::PagedMem;
+
+/// Per-cell provenance: `(last_writer_epoch, value, timestamp)`. Unset cells read
+/// back as the genesis default `(GENESIS_EPOCH, 0, 0)`.
+type Provenance = PagedMem<(u64, u64, u64)>;
+
+/// Sentinel `originating_epoch` for cells whose value comes from the program's
+/// initial memory — no prior epoch wrote them. Chosen as `0`, below every real
+/// (1-based) epoch label, so `init_epoch < fini_epoch` holds for genesis cells.
+pub const GENESIS_EPOCH: u64 = 0;
+
+/// Maximum number of epochs a continuation run may have.
+///
+/// The cross-epoch ordering check proves `init_epoch < fini_epoch` via an `IsB20`
+/// (20-bit) lookup on `fini_epoch - 1 - init_epoch`. A genesis-sourced cell
+/// finalized in epoch `index` (0-based) has gap `index`, so every epoch must
+/// satisfy `index < 2^20`. A run needing more epochs cannot be proved — the
+/// IsB20 bus would not balance — so the driver rejects it up front (see
+/// `prove_continuation`).
+pub const MAX_EPOCHS: u64 = 1 << 20;
+
+/// A cell's state when an epoch first touches it.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+pub struct InitClaim {
+    /// Value the cell held when this epoch first touched it.
+    pub value: u64,
+    /// Epoch that last wrote the cell (or [`GENESIS_EPOCH`]).
+    pub originating_epoch: u64,
+    /// Timestamp of that originating write.
+    pub timestamp: u64,
+}
+
+/// A cell's state at the end of the epoch that touched it.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+pub struct FiniClaim {
+    /// Value the cell holds at this epoch's end.
+    pub value: u64,
+    /// This epoch's label (1-based).
+    pub epoch: u64,
+    /// Last access timestamp for the cell this epoch.
+    pub timestamp: u64,
+}
+
+/// The init/fini boundary claims for a single touched cell.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+pub struct CellBoundary {
+    pub address: u64,
+    pub init: InitClaim,
+    pub fini: FiniClaim,
+}
+
+/// One epoch's touched cells, each as `(address, end_value, end_timestamp)`.
+pub type EpochTouches = Vec<(u64, u64, u64)>;
+
+/// Convert a 0-based epoch index into its 1-based table label.
+pub fn epoch_label(epoch_index: u64) -> u64 {
+    epoch_index + 1
+}
+
+/// Compute the sparse per-epoch boundary claims.
+///
+/// `initial_memory` maps each address to its program-start value (originating
+/// epoch [`GENESIS_EPOCH`], timestamp 0). `epochs[e]` lists the cells touched in
+/// epoch `e` with their end value and end timestamp. Returns, per epoch, the
+/// boundary claims for exactly the cells that epoch touched (sparse): each
+/// cell's `init` is taken from the previous epoch that wrote it, and its `fini`
+/// records this epoch (1-based label) as the new writer.
+pub fn epoch_boundaries(
+    initial_memory: &HashMap<u64, u64>,
+    epochs: &[EpochTouches],
+) -> Vec<Vec<CellBoundary>> {
+    // provenance[addr] = (last_writer_epoch, value, timestamp)
+    let mut provenance = genesis_provenance(initial_memory.iter().map(|(&a, &v)| (a, v)));
+
+    let mut result = Vec::with_capacity(epochs.len());
+    for (epoch, touched) in epochs.iter().enumerate() {
+        result.push(epoch_boundary(
+            &mut provenance,
+            epoch_label(epoch as u64),
+            touched,
+        ));
+    }
+    result
+}
+
+/// One epoch's boundaries, taking `init` from the running `provenance` (the cell's
+/// last writer) and updating `provenance` with this epoch's `fini`. `epoch` is the
+/// 1-based label. This is the per-epoch step of [`epoch_boundaries`], exposed so
+/// the streaming continuation prover can build each epoch's table incrementally
+/// without all epochs at once.
+pub fn epoch_boundary(
+    provenance: &mut Provenance,
+    epoch: u64,
+    touched: &[(u64, u64, u64)],
+) -> Vec<CellBoundary> {
+    let mut boundaries = Vec::with_capacity(touched.len());
+    for &(address, end_value, end_timestamp) in touched {
+        // Unset cells read back as the genesis default `(GENESIS_EPOCH, 0, 0)`.
+        let (originating_epoch, init_value, init_timestamp) = provenance.get(address);
+        boundaries.push(CellBoundary {
+            address,
+            init: InitClaim {
+                value: init_value,
+                originating_epoch,
+                timestamp: init_timestamp,
+            },
+            fini: FiniClaim {
+                value: end_value,
+                epoch,
+                timestamp: end_timestamp,
+            },
+        });
+        provenance.set(address, (epoch, end_value, end_timestamp));
+    }
+    boundaries
+}
+
+/// Seed the provenance store from the program's initial memory (genesis cells),
+/// supplied as an `(address, value)` iterator. The continuation prover feeds the
+/// paged genesis image directly, avoiding an intermediate address→value map.
+pub fn genesis_provenance(genesis: impl IntoIterator<Item = (u64, u64)>) -> Provenance {
+    let mut provenance = Provenance::new((GENESIS_EPOCH, 0, 0));
+    for (addr, value) in genesis {
+        provenance.set(addr, (GENESIS_EPOCH, value, 0));
+    }
+    provenance
+}
+
+// =========================================================================
+// AIR trace columns
+// =========================================================================
+
+/// Column indices for the local-to-global table: one row per touched cell.
+///
+/// `address` and `fini_timestamp` are plain 32-bit columns (matched on the Memory
+/// bus against MEMW). The cross-epoch-only `init_epoch` is stored as 16-bit
+/// halfword columns ([`RANGE_CHECKED_HALFWORDS`]), checked via `IsHalfword`, and
+/// rebuilt into its 32-bit bus value via [`word`]. The value bytes get the
+/// batched `AreBytes` check. `fini_epoch` is a per-table constant (not a column).
+/// `MU` is the real-row selector / multiplicity.
+pub mod cols {
+    /// address_lo: 32-bit; matched on the Memory bus against MEMW.
+    pub const ADDRESS_LO: usize = 0;
+    /// address_hi: 32-bit; matched on the Memory bus against MEMW.
+    pub const ADDRESS_HI: usize = 1;
+
+    /// Init value: a single byte, like PAGE's `value`.
+    pub const INIT_VALUE: usize = 2;
+
+    // Init epoch — GlobalMemory-bus only, range-checked: two halfwords
+    // (`init_epoch = INIT_EPOCH_0 + 2^16·INIT_EPOCH_1`).
+    pub const INIT_EPOCH_0: usize = 3;
+    pub const INIT_EPOCH_1: usize = 4;
+
+    // Note: there is no init-timestamp column. Timestamps are epoch-local ordering
+    // scratch (the Memory-bus init token is seeded at ts=0); across epochs the chain
+    // is ordered by `init_epoch < fini_epoch`, so the GlobalMemory bus carries no
+    // timestamp at all (see `bus_interactions`).
+
+    /// Fini value: a single byte.
+    pub const FINI_VALUE: usize = 5;
+
+    /// fini_timestamp_lo: 32-bit; matched on the Memory bus against MEMW.
+    pub const FINI_TIMESTAMP_LO: usize = 6;
+    /// fini_timestamp_hi: 32-bit; matched on the Memory bus against MEMW.
+    pub const FINI_TIMESTAMP_HI: usize = 7;
+
+    /// MU: real-row selector / LogUp multiplicity (1 on real rows, 0 on padding).
+    pub const MU: usize = 8;
+
+    pub const NUM_COLUMNS: usize = 9;
+
+    /// The halfword columns (cross-epoch-only quantities), in order — every column
+    /// that is `IsHalfword`-checked.
+    pub const RANGE_CHECKED_HALFWORDS: [usize; 2] = [INIT_EPOCH_0, INIT_EPOCH_1];
+}
+
+/// The two halfwords of an epoch label (genesis `0` or a small 1-based index, all
+/// well under 2^32).
+fn epoch_halfwords(epoch: u64) -> [u64; 2] {
+    debug_assert!(epoch < (1 << 32), "epoch label exceeds 32 bits");
+    [epoch & 0xFFFF, (epoch >> 16) & 0xFFFF]
+}
+
+// =========================================================================
+// Trace generation
+// =========================================================================
+
+/// Build the local-to-global trace: one row per touched cell's boundary claims,
+/// padded up to a power of two. Real rows set `MU = 1`; padding rows stay all-zero
+/// (`MU = 0`), so they fire no interactions.
+pub fn generate_local_to_global_trace(
+    boundaries: &[CellBoundary],
+) -> TraceTable<GoldilocksField, GoldilocksExtension> {
+    let num_rows = boundaries.len().next_power_of_two().max(1);
+    let mut data = vec![FE::zero(); num_rows * cols::NUM_COLUMNS];
+
+    for (row, b) in boundaries.iter().enumerate() {
+        let base = row * cols::NUM_COLUMNS;
+        let init_epoch = epoch_halfwords(b.init.originating_epoch);
+
+        // Plain 32-bit columns (MEMW-checked on the Memory bus).
+        data[base + cols::ADDRESS_LO] = FE::from(b.address & 0xFFFF_FFFF);
+        data[base + cols::ADDRESS_HI] = FE::from(b.address >> 32);
+        data[base + cols::FINI_TIMESTAMP_LO] = FE::from(b.fini.timestamp & 0xFFFF_FFFF);
+        data[base + cols::FINI_TIMESTAMP_HI] = FE::from(b.fini.timestamp >> 32);
+        // Byte values (AreBytes-checked).
+        data[base + cols::INIT_VALUE] = FE::from(b.init.value & 0xFF);
+        data[base + cols::FINI_VALUE] = FE::from(b.fini.value & 0xFF);
+        // Cross-epoch-only quantity as IsHalfword-checked halfwords.
+        data[base + cols::INIT_EPOCH_0] = FE::from(init_epoch[0]);
+        data[base + cols::INIT_EPOCH_1] = FE::from(init_epoch[1]);
+        // Real-row selector.
+        data[base + cols::MU] = FE::one();
+    }
+
+    TraceTable::new_main(data, cols::NUM_COLUMNS, 1)
+}
+
+// =========================================================================
+// Bus interactions
+// =========================================================================
+
+/// A 32-bit value reconstructed from its two halfword columns: `lo + 2^16·hi`.
+fn word(lo_col: usize, hi_col: usize) -> BusValue {
+    BusValue::linear(vec![
+        LinearTerm::Column {
+            coefficient: 1,
+            column: lo_col,
+        },
+        LinearTerm::Column {
+            coefficient: 1 << 16,
+            column: hi_col,
+        },
+    ])
+}
+
+/// A column read directly as a single field element (a 32-bit word or a byte).
+pub(crate) fn direct(column: usize) -> BusValue {
+    BusValue::Packed {
+        start_column: column,
+        packing: Packing::Direct,
+    }
+}
+
+fn mu() -> Multiplicity {
+    Multiplicity::Column(cols::MU)
+}
+
+/// Cross-epoch memory bus interactions, two per row (one touched cell):
+/// - **receive** the `init` token `(address, value, originating_epoch)` left by the
+///   epoch that last wrote the cell;
+/// - **send** the `fini` token `(address, value, epoch_label)` for the next epoch
+///   that touches the cell.
+///
+/// `fini_epoch` is the per-table constant `epoch_label`; `init_epoch` comes from the
+/// range-checked halfword columns via [`word`]; `address` is direct 32-bit columns.
+/// No timestamp is carried: the chain is ordered by epoch, and timestamps are
+/// epoch-local (only the Memory bus, not this one, uses them).
+///
+/// These tokens are matched ACROSS epochs by the final aggregation LogUp (step 4),
+/// so within a single epoch's table the GlobalMemory bus is deliberately
+/// unbalanced (real rows have `init != fini`). Padding rows fire nothing (`MU = 0`).
+pub fn bus_interactions(epoch_label: u64) -> Vec<BusInteraction> {
+    vec![
+        // init: receive the token left by the originating epoch. No timestamp: the
+        // chain is ordered by epoch, and timestamps are epoch-local (see cols).
+        BusInteraction::receiver(
+            BusId::GlobalMemory,
+            mu(),
+            vec![
+                direct(cols::ADDRESS_LO),
+                direct(cols::ADDRESS_HI),
+                direct(cols::INIT_VALUE),
+                word(cols::INIT_EPOCH_0, cols::INIT_EPOCH_1),
+            ],
+        ),
+        // fini: send the token for the next epoch to consume.
+        BusInteraction::sender(
+            BusId::GlobalMemory,
+            mu(),
+            vec![
+                direct(cols::ADDRESS_LO),
+                direct(cols::ADDRESS_HI),
+                direct(cols::FINI_VALUE),
+                BusValue::constant(epoch_label),
+            ],
+        ),
+    ]
+}
+
+/// Epoch-LOCAL memory bus interactions, mirroring PAGE-C3/C4 (`page.rs`).
+///
+/// Inside an epoch proof the L2G table bookends the epoch's `Memory` bus for the
+/// RAM bytes it touches: it receives each cell's initial token at timestamp 0
+/// (the epoch-start seed, matching the first MEMW read's `old_timestamp`) and
+/// sends its final token at the last access timestamp. This replaces PAGE's
+/// init/fini bookend for touched bytes. The `Memory` token layout is
+/// `[is_register, address_lo, address_hi, timestamp_lo, timestamp_hi, value]`;
+/// RAM only, so `is_register = 0`, and the byte value is the LO column.
+///
+/// Address, fini timestamp and the values appear here, so MEMW range-checks them
+/// for us — they need no L2G range check (see [`range_check_interactions`]).
+pub fn memory_bus_interactions() -> Vec<BusInteraction> {
+    vec![
+        // init: receive the cell's initial token at the epoch-start seed (ts = 0).
+        BusInteraction::receiver(
+            BusId::Memory,
+            mu(),
+            vec![
+                BusValue::constant(0),
+                direct(cols::ADDRESS_LO),
+                direct(cols::ADDRESS_HI),
+                BusValue::constant(0),
+                BusValue::constant(0),
+                direct(cols::INIT_VALUE),
+            ],
+        ),
+        // fini: send the cell's final token at the last access timestamp.
+        BusInteraction::sender(
+            BusId::Memory,
+            mu(),
+            vec![
+                BusValue::constant(0),
+                direct(cols::ADDRESS_LO),
+                direct(cols::ADDRESS_HI),
+                direct(cols::FINI_TIMESTAMP_LO),
+                direct(cols::FINI_TIMESTAMP_HI),
+                direct(cols::FINI_VALUE),
+            ],
+        ),
+    ]
+}
+
+/// Range-check + ordering bus interactions for the columns nothing else
+/// constrains, all with multiplicity `MU` (so padding fires none):
+/// - one `AreBytes` for the two value bytes (the `init` value is a trusted source);
+/// - one `IsHalfword` per cross-epoch-only halfword column;
+/// - one `IsB20` proving `init_epoch < fini_epoch` (the ordering constraint), via
+///   `fini_epoch − 1 − init_epoch` being a valid 20-bit value. With genesis epoch
+///   `0` this also covers genesis cells (`0 < fini_epoch`) with no special case.
+///
+/// Address and fini timestamp are NOT here — MEMW checks them on the Memory bus.
+/// These are committed only on the epoch-local table (`l2g_memory_air`), whose
+/// proof carries the BITWISE provider; the global proof commits the identical
+/// trace, so its columns inherit the same guarantee via the commitment binding.
+/// Keep this in sync with [`collect_bitwise_from_l2g`].
+pub fn range_check_interactions(epoch_label: u64) -> Vec<BusInteraction> {
+    let mut interactions = Vec::with_capacity(2 + cols::RANGE_CHECKED_HALFWORDS.len());
+    interactions.push(BusInteraction::sender(
+        BusId::AreBytes,
+        mu(),
+        vec![direct(cols::INIT_VALUE), direct(cols::FINI_VALUE)],
+    ));
+    for &column in &cols::RANGE_CHECKED_HALFWORDS {
+        interactions.push(BusInteraction::sender(
+            BusId::IsHalfword,
+            mu(),
+            vec![direct(column)],
+        ));
+    }
+    // Ordering: IsB20[epoch_label - 1 - init_epoch], where
+    // init_epoch = INIT_EPOCH_0 + 2^16·INIT_EPOCH_1.
+    interactions.push(BusInteraction::sender(
+        BusId::IsB20,
+        mu(),
+        vec![BusValue::linear(vec![
+            LinearTerm::Constant(epoch_label as i64 - 1),
+            LinearTerm::Column {
+                coefficient: -1,
+                column: cols::INIT_EPOCH_0,
+            },
+            LinearTerm::Column {
+                coefficient: -(1 << 16),
+                column: cols::INIT_EPOCH_1,
+            },
+        ])],
+    ));
+    interactions
+}
+
+/// The BITWISE lookups the L2G range checks + ordering check send, so the BITWISE
+/// table's multiplicities balance the [`range_check_interactions`] senders. Emits,
+/// per real row, one `AreBytes`, one `IsHalfword` per cross-epoch halfword, and one
+/// `IsB20` for the ordering difference. Padding rows fire nothing (`MU = 0`), so
+/// none are emitted for them.
+pub fn collect_bitwise_from_l2g(boundaries: &[CellBoundary]) -> Vec<BitwiseOperation> {
+    let per_row = 2 + cols::RANGE_CHECKED_HALFWORDS.len();
+    let mut ops = Vec::with_capacity(boundaries.len() * per_row);
+
+    let push_halfword = |ops: &mut Vec<BitwiseOperation>, v16: u64| {
+        ops.push(BitwiseOperation::halfword(
+            BitwiseOperationType::IsHalf,
+            (v16 & 0xFF) as u8,
+            ((v16 >> 8) & 0xFF) as u8,
+        ));
+    };
+
+    for b in boundaries {
+        ops.push(BitwiseOperation::byte_op(
+            BitwiseOperationType::AreBytes,
+            (b.init.value & 0xFF) as u8,
+            (b.fini.value & 0xFF) as u8,
+        ));
+        let init_epoch = epoch_halfwords(b.init.originating_epoch);
+        for v in init_epoch {
+            push_halfword(&mut ops, v);
+        }
+        // Ordering: IsB20[fini_epoch - 1 - init_epoch]. Honest rows have
+        // init_epoch < fini_epoch, so the difference is a small non-negative value.
+        let diff = b.fini.epoch - 1 - b.init.originating_epoch;
+        debug_assert!(diff < MAX_EPOCHS, "epoch gap exceeds IsB20 range");
+        ops.push(BitwiseOperation::b20(
+            (diff & 0xFF) as u8,
+            ((diff >> 8) & 0xFF) as u8,
+            ((diff >> 16) & 0xF) as u8,
+        ));
+    }
+
+    ops
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn find(epoch: &[CellBoundary], address: u64) -> &CellBoundary {
+        epoch
+            .iter()
+            .find(|b| b.address == address)
+            .expect("address not found in epoch boundaries")
+    }
+
+    #[test]
+    fn test_sparse_only_touched_cells() {
+        let initial_memory = HashMap::from([(10, 5)]);
+        let epochs = vec![
+            vec![(10, 7, 3), (20, 9, 4)], // epoch 0 touches 10 and 20
+            vec![(10, 8, 10)],            // epoch 1 touches only 10
+            vec![(20, 9, 20)],            // epoch 2 touches only 20
+        ];
+        let boundaries = epoch_boundaries(&initial_memory, &epochs);
+
+        assert_eq!(boundaries.len(), 3);
+        // Only touched cells appear, nothing else.
+        assert_eq!(boundaries[0].len(), 2);
+        assert_eq!(boundaries[1].len(), 1);
+        assert_eq!(boundaries[2].len(), 1);
+        assert_eq!(boundaries[1][0].address, 10);
+        assert_eq!(boundaries[2][0].address, 20);
+    }
+
+    #[test]
+    fn test_genesis_init_for_first_touch() {
+        let initial_memory = HashMap::from([(10, 5)]);
+        let epochs = vec![vec![(10, 7, 3), (20, 9, 4)]];
+        let boundaries = epoch_boundaries(&initial_memory, &epochs);
+
+        // Cell 10 starts from program memory: value 5, genesis epoch, ts 0.
+        let c10 = find(&boundaries[0], 10);
+        assert_eq!(
+            c10.init,
+            InitClaim {
+                value: 5,
+                originating_epoch: GENESIS_EPOCH,
+                timestamp: 0,
+            }
+        );
+        // Cell 20 was never in initial memory: genesis, value 0.
+        let c20 = find(&boundaries[0], 20);
+        assert_eq!(
+            c20.init,
+            InitClaim {
+                value: 0,
+                originating_epoch: GENESIS_EPOCH,
+                timestamp: 0,
+            }
+        );
+    }
+
+    #[test]
+    fn test_fini_records_current_epoch_label_and_timestamp() {
+        let initial_memory = HashMap::from([(10, 5)]);
+        let epochs = vec![vec![(10, 7, 3)], vec![(10, 8, 10)]];
+        let boundaries = epoch_boundaries(&initial_memory, &epochs);
+
+        // Labels are 1-based: epoch index 0 → label 1, index 1 → label 2.
+        assert_eq!(
+            find(&boundaries[0], 10).fini,
+            FiniClaim {
+                value: 7,
+                epoch: 1,
+                timestamp: 3,
+            }
+        );
+        assert_eq!(
+            find(&boundaries[1], 10).fini,
+            FiniClaim {
+                value: 8,
+                epoch: 2,
+                timestamp: 10,
+            }
+        );
+    }
+
+    #[test]
+    fn test_telescoping_consecutive_epochs() {
+        let initial_memory = HashMap::from([(10, 5)]);
+        let epochs = vec![vec![(10, 7, 3)], vec![(10, 8, 10)]];
+        let boundaries = epoch_boundaries(&initial_memory, &epochs);
+
+        // Epoch 0's fini for cell 10 is consumed as epoch 1's init.
+        let fini0 = find(&boundaries[0], 10).fini;
+        let init1 = find(&boundaries[1], 10).init;
+        assert_eq!(fini0.value, init1.value);
+        assert_eq!(fini0.epoch, init1.originating_epoch);
+        assert_eq!(fini0.timestamp, init1.timestamp);
+        // Concretely: epoch 0 (label 1) left (7, label 1, ts 3).
+        assert_eq!(
+            init1,
+            InitClaim {
+                value: 7,
+                originating_epoch: 1,
+                timestamp: 3,
+            }
+        );
+        // And init_epoch (1) < fini_epoch (2), the ordering invariant.
+        assert!(init1.originating_epoch < find(&boundaries[1], 10).fini.epoch);
+    }
+
+    #[test]
+    fn test_telescoping_skips_untouched_epochs() {
+        // Cell 20 is touched in epoch 0, skipped in epoch 1, touched again in 2.
+        let initial_memory = HashMap::new();
+        let epochs = vec![
+            vec![(20, 9, 4)],  // epoch 0 writes 20
+            vec![(10, 1, 5)],  // epoch 1 does not touch 20
+            vec![(20, 9, 20)], // epoch 2 touches 20 again
+        ];
+        let boundaries = epoch_boundaries(&initial_memory, &epochs);
+
+        // Epoch 2's init for cell 20 links straight back to epoch 0 (label 1).
+        let fini0 = find(&boundaries[0], 20).fini;
+        let init2 = find(&boundaries[2], 20).init;
+        assert_eq!(init2.originating_epoch, 1);
+        assert_eq!(init2.value, fini0.value);
+        assert_eq!(init2.timestamp, fini0.timestamp);
+    }
+
+    fn sample_boundary(address: u64) -> CellBoundary {
+        CellBoundary {
+            address,
+            init: InitClaim {
+                value: 0x1_0000_0005,
+                originating_epoch: GENESIS_EPOCH,
+                timestamp: 0,
+            },
+            fini: FiniClaim {
+                value: 0x2_0000_0007,
+                epoch: 1,
+                timestamp: 0x3_0000_0009,
+            },
+        }
+    }
+
+    /// Reconstruct a 32-bit value from its two halfword columns, as the bus does.
+    fn word_value(
+        trace: &TraceTable<GoldilocksField, GoldilocksExtension>,
+        lo: usize,
+        hi: usize,
+    ) -> FE {
+        *trace.main_table.get(0, lo) + FE::from(1u64 << 16) * *trace.main_table.get(0, hi)
+    }
+
+    #[test]
+    fn test_num_columns() {
+        assert_eq!(cols::NUM_COLUMNS, 9);
+        assert_eq!(cols::RANGE_CHECKED_HALFWORDS.len(), 2);
+    }
+
+    #[test]
+    fn test_columns_hold_the_split_values() {
+        let b = sample_boundary(0x4_0000_0001);
+        let trace = generate_local_to_global_trace(&[b]);
+
+        assert_eq!(trace.num_rows(), 1);
+
+        let lo32 = |v: u64| FE::from(v & 0xFFFF_FFFF);
+        let hi32 = |v: u64| FE::from(v >> 32);
+        let byte = |v: u64| FE::from(v & 0xFF);
+        let at = |c: usize| *trace.main_table.get(0, c);
+
+        // Address and fini timestamp are plain 32-bit columns (MEMW-checked).
+        assert_eq!(at(cols::ADDRESS_LO), lo32(b.address));
+        assert_eq!(at(cols::ADDRESS_HI), hi32(b.address));
+        assert_eq!(at(cols::FINI_TIMESTAMP_LO), lo32(b.fini.timestamp));
+        assert_eq!(at(cols::FINI_TIMESTAMP_HI), hi32(b.fini.timestamp));
+        // Values are stored as single bytes.
+        assert_eq!(at(cols::INIT_VALUE), byte(b.init.value));
+        assert_eq!(at(cols::FINI_VALUE), byte(b.fini.value));
+        // The cross-epoch-only quantity reconstructs from its halfwords.
+        // Genesis init epoch reconstructs to 0 (== GENESIS_EPOCH).
+        assert_eq!(
+            word_value(&trace, cols::INIT_EPOCH_0, cols::INIT_EPOCH_1),
+            FE::from(GENESIS_EPOCH)
+        );
+        // Real row carries MU = 1.
+        assert_eq!(at(cols::MU), FE::one());
+    }
+
+    #[test]
+    fn test_padding_rows_are_zero_including_mu() {
+        // 3 boundaries pad up to 4 rows; the padding row is all zero, MU = 0.
+        let boundaries: Vec<CellBoundary> = (0..3).map(sample_boundary).collect();
+        let trace = generate_local_to_global_trace(&boundaries);
+        assert_eq!(trace.num_rows(), 4);
+        for col in 0..cols::NUM_COLUMNS {
+            assert_eq!(*trace.main_table.get(3, col), FE::zero());
+        }
+        // And real rows have MU = 1.
+        for row in 0..3 {
+            assert_eq!(*trace.main_table.get(row, cols::MU), FE::one());
+        }
+    }
+
+    #[test]
+    fn test_empty_trace_is_padded_to_one_row() {
+        let trace = generate_local_to_global_trace(&[]);
+        assert_eq!(trace.num_rows(), 1);
+        for col in 0..cols::NUM_COLUMNS {
+            assert_eq!(*trace.main_table.get(0, col), FE::zero());
+        }
+    }
+
+    #[test]
+    fn test_bus_interactions() {
+        let interactions = bus_interactions(1);
+        assert_eq!(interactions.len(), 2); // init (receive) + fini (send)
+
+        let global_memory = u64::from(BusId::GlobalMemory);
+        let init = &interactions[0];
+        let fini = &interactions[1];
+
+        // init consumes the originating epoch's token; fini produces this epoch's.
+        assert!(!init.is_sender);
+        assert!(fini.is_sender);
+        assert_eq!(init.bus_id, global_memory);
+        assert_eq!(fini.bus_id, global_memory);
+
+        // Both tokens have the same 4-element shape so they can match across
+        // epochs: address(lo,hi), value(byte), epoch. No timestamp — the chain is
+        // ordered by epoch, and timestamps are epoch-local.
+        assert_eq!(init.values.len(), 4);
+        assert_eq!(fini.values.len(), 4);
+    }
+
+    #[test]
+    fn test_range_check_interactions_cover_every_column() {
+        let interactions = range_check_interactions(1);
+        // 1 AreBytes + one IsHalfword per cross-epoch halfword + 1 IsB20 ordering.
+        assert_eq!(interactions.len(), 2 + cols::RANGE_CHECKED_HALFWORDS.len());
+        let are_bytes = u64::from(BusId::AreBytes);
+        let is_halfword = u64::from(BusId::IsHalfword);
+        let is_b20 = u64::from(BusId::IsB20);
+        assert_eq!(interactions[0].bus_id, are_bytes);
+        assert_eq!(interactions[0].values.len(), 2);
+        for interaction in &interactions[1..1 + cols::RANGE_CHECKED_HALFWORDS.len()] {
+            assert!(interaction.is_sender);
+            assert_eq!(interaction.bus_id, is_halfword);
+            assert_eq!(interaction.values.len(), 1);
+        }
+        let ordering = interactions.last().unwrap();
+        assert!(ordering.is_sender);
+        assert_eq!(ordering.bus_id, is_b20);
+    }
+
+    #[test]
+    fn test_collect_bitwise_matches_sender_count() {
+        // Per row: 1 AreBytes + one IsHalfword per cross-epoch halfword + 1 IsB20.
+        // No padding ops (padding has MU = 0 and fires nothing).
+        let boundaries: Vec<CellBoundary> = (0..3).map(sample_boundary).collect();
+        let ops = collect_bitwise_from_l2g(&boundaries);
+        let per_row = 2 + cols::RANGE_CHECKED_HALFWORDS.len();
+        assert_eq!(ops.len(), boundaries.len() * per_row);
+
+        let count = |t: BitwiseOperationType| ops.iter().filter(|o| o.lookup_type == t).count();
+        assert_eq!(count(BitwiseOperationType::AreBytes), boundaries.len());
+        assert_eq!(
+            count(BitwiseOperationType::IsHalf),
+            boundaries.len() * cols::RANGE_CHECKED_HALFWORDS.len()
+        );
+        assert_eq!(count(BitwiseOperationType::IsB20), boundaries.len());
+    }
+
+    #[test]
+    fn test_collect_bitwise_values_match_the_committed_halfword_columns() {
+        // Each IsHalfword op the collector emits must carry the same value as the
+        // corresponding halfword column the range-check sender reads. Use a
+        // boundary with distinct values, and a real (>=1) originating epoch.
+        let b = CellBoundary {
+            address: 0x1234_5678_9abc_def0,
+            init: InitClaim {
+                value: 0xAB,
+                originating_epoch: 3,
+                timestamp: 0x4455_6677_8899_aabb,
+            },
+            fini: FiniClaim {
+                value: 0xCD,
+                epoch: 9,
+                timestamp: 0xccdd_eeff_0011_2233,
+            },
+        };
+        let trace = generate_local_to_global_trace(&[b]);
+        let ops = collect_bitwise_from_l2g(&[b]);
+
+        // The single AreBytes op carries the two value bytes.
+        assert_eq!(ops[0].lookup_type, BitwiseOperationType::AreBytes);
+        assert_eq!(ops[0].x as u64, b.init.value & 0xFF);
+        assert_eq!(ops[0].y as u64, b.fini.value & 0xFF);
+
+        // The IsHalfword ops follow, in RANGE_CHECKED_HALFWORDS order, each
+        // matching the value committed in that column.
+        for (i, &col) in cols::RANGE_CHECKED_HALFWORDS.iter().enumerate() {
+            let op = &ops[1 + i];
+            assert_eq!(op.lookup_type, BitwiseOperationType::IsHalf);
+            let op_value = op.x as u64 + ((op.y as u64) << 8);
+            assert_eq!(
+                FE::from(op_value),
+                *trace.main_table.get(0, col),
+                "IsHalfword op {i} value disagrees with column {col}"
+            );
+        }
+
+        // The last op is the ordering IsB20 of `fini_epoch - 1 - init_epoch`.
+        let ordering = ops.last().unwrap();
+        assert_eq!(ordering.lookup_type, BitwiseOperationType::IsB20);
+        let value = ordering.x as u64 + ((ordering.y as u64) << 8) + ((ordering.z as u64) << 16);
+        assert_eq!(value, b.fini.epoch - 1 - b.init.originating_epoch);
+    }
+
+    #[test]
+    fn test_ordering_rejects_future_reference() {
+        // The ordering sender computes the field value `fini_epoch - 1 - init_epoch`.
+        // For an honest row (init_epoch < fini_epoch) it's a small valid IsB20 value;
+        // for a forged FUTURE reference (init_epoch >= fini_epoch) it underflows in
+        // the field to a value far outside [0, 2^20), so no IsB20 row matches and the
+        // bus cannot balance.
+        let order_value = |fini_label: u64, init_epoch: u64| -> FE {
+            FE::from(fini_label - 1) - FE::from(init_epoch)
+        };
+
+        // Honest: epoch 5 consuming epoch 2's fini → 5 - 1 - 2 = 2, in range.
+        let honest = order_value(5, 2);
+        assert!(*honest.value() < (1 << 20));
+
+        // Forged future reference: epoch 5's init claims originating epoch 9.
+        let forged = order_value(5, 9);
+        assert!(
+            *forged.value() >= (1 << 20),
+            "a future-epoch reference must fall outside the IsB20 range"
+        );
+
+        // Forged self reference: epoch 5's init claims originating epoch 5.
+        // 5 - 1 - 5 = -1 in the field → also out of range.
+        let self_ref = order_value(5, 5);
+        assert!(*self_ref.value() >= (1 << 20));
+    }
+}
diff --git a/prover/src/tables/mod.rs b/prover/src/tables/mod.rs
index 50bc399af..78d5f9a6a 100644
--- a/prover/src/tables/mod.rs
+++ b/prover/src/tables/mod.rs
@@ -33,11 +33,13 @@ pub mod ec_scalar;
 pub mod ecdas;
 pub mod ecsm;
 pub mod eq;
+pub mod global_memory;
 pub mod halt;
 pub mod keccak;
 pub mod keccak_rc;
 pub mod keccak_rnd;
 pub mod load;
+pub mod local_to_global;
 pub mod lt;
 pub mod memw;
 pub mod memw_aligned;
diff --git a/prover/src/tables/register.rs b/prover/src/tables/register.rs
index 5a09fb2fa..09485595a 100644
--- a/prover/src/tables/register.rs
+++ b/prover/src/tables/register.rs
@@ -28,6 +28,9 @@ use stark::proof::options::ProofOptions;
 use stark::prover::evaluate_polynomial_on_lde_domain;
 use stark::trace::{TraceTable, columns2rows};
 
+#[cfg(test)]
+use executor::vm::registers::Registers;
+
 use super::page::STACK_TOP;
 use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField, VmTable};
 
@@ -48,11 +51,21 @@ pub const WORDS_PER_REGISTER: usize = 2;
 /// -1 because x254 is single-word (1 address instead of 2).
 pub const NUM_REGISTER_ADDRESSES: usize = NUM_REGISTERS * WORDS_PER_REGISTER - 1;
 
-/// Number of preprocessed columns (OFFSET, INIT).
+/// Number of preprocessed columns (OFFSET, INIT) for the monolithic prover.
 /// OFFSET encodes the Word address, INIT holds the initial value.
 /// Program-dependent: x255 init = ELF entry point.
 pub const NUM_PREPROCESSED_COLS: usize = 2;
 
+/// Number of preprocessed columns (OFFSET, INIT, FINI) for continuation epochs.
+/// A continuation epoch additionally preprocesses FINI so the epoch's final
+/// register file becomes a verifier-known public value (`R_{i+1}`): the verifier
+/// recomputes the commitment from it, the REG-C2 Memory-bus token forces it to
+/// equal the true final registers, and the next epoch reuses the same `R_{i+1}`
+/// as its preprocessed INIT — binding `init(epoch i+1) == fini(epoch i)` with no
+/// extra bus. The monolithic prover keeps FINI as a main-trace column (it has no
+/// verifier-known final state), using `NUM_PREPROCESSED_COLS` instead.
+pub const NUM_PREPROCESSED_COLS_WITH_FINI: usize = 3;
+
 // =========================================================================
 // Column indices for REGISTER table
 // =========================================================================
@@ -114,8 +127,22 @@ fn register_word_address_list() -> [u64; NUM_REGISTER_ADDRESSES] {
     addrs
 }
 
+// Positions of the non-general-purpose registers within a register-init vector
+// (indexed in `register_word_address_list` order). x0-x31 occupy positions 0..63
+// (position `i` is word address `i`), so register `r`'s two words are at `2r`, `2r+1`.
+/// Position of x254 (synthetic commit index, word address 508).
+pub(crate) const X254_INDEX: usize = 64;
+/// Position of x255 (PC) low word (word address 510).
+pub(crate) const PC_LO_INDEX: usize = 65;
+/// Position of x255 (PC) high word (word address 511).
+pub(crate) const PC_HI_INDEX: usize = 66;
+
 /// Compute the initial value for a register Word address.
 ///
+/// This is the **program-start** register image, so it only applies to the first
+/// continuation epoch (or a whole-program run). Later epochs start mid-execution
+/// and supply their own boundary register snapshot instead.
+///
 /// - SP (x2) words at offset 4,5 hold STACK_TOP
 /// - x254 at offset 508 is the synthetic commit index, initialized to 0
 /// - PC (x255) words at offset 510,511 hold entry_point
@@ -130,6 +157,48 @@ fn init_value_for_address(word_addr: u64, entry_point: u64) -> u32 {
     }
 }
 
+/// Build the register init vector (one initial value per row, in
+/// `register_word_address_list` order) for a program starting at `entry_point`
+/// (the program-start register image). A continuation epoch would instead supply
+/// its boundary register snapshot.
+pub(crate) fn register_init_from_entry_point(entry_point: u64) -> Vec<u32> {
+    register_word_address_list()
+        .iter()
+        .map(|&addr| init_value_for_address(addr, entry_point))
+        .collect()
+}
+
+/// Build the register init map from an epoch's boundary register snapshot: the
+/// executor `Registers` (x1-x31, including SP) plus the program counter (x255).
+/// x0 and the synthetic commit index (x254) are zero in the naive version.
+///
+/// Used by tests that build a single epoch from a boundary snapshot. The
+/// continuation prover no longer uses this for chaining: epoch i+1's register
+/// init comes from epoch i's *bound* fini (`fini_from_trace`, carried as the next
+/// epoch's preprocessed INIT), not a trusted executor snapshot.
+#[cfg(test)]
+pub(crate) fn register_init_from_snapshot(registers: &Registers, pc: u64) -> Vec<u32> {
+    let mut init = vec![0u32; NUM_REGISTER_ADDRESSES];
+    for reg in 0u8..32 {
+        let value = if reg == 0 {
+            0
+        } else {
+            registers.read(reg as u32).unwrap_or(0)
+        };
+        let base = (reg as usize) * 2;
+        init[base] = (value & 0xFFFF_FFFF) as u32;
+        init[base + 1] = (value >> 32) as u32;
+    }
+    // x254 synthetic commit index, hardcoded to 0 in this test-only helper, so it
+    // is only correct for an epoch with no preceding COMMIT. The production path
+    // carries x254 across epochs via the previous epoch's bound FINI vector, not
+    // this snapshot helper.
+    init[X254_INDEX] = 0;
+    init[PC_LO_INDEX] = (pc & 0xFFFF_FFFF) as u32;
+    init[PC_HI_INDEX] = (pc >> 32) as u32;
+    init
+}
+
 /// Generates the REGISTER trace table.
 ///
 /// Creates a table with NUM_REGISTER_ADDRESSES rows.
@@ -139,14 +208,15 @@ fn init_value_for_address(word_addr: u64, entry_point: u64) -> u32 {
 /// ## Arguments
 ///
 /// * `final_state` - Map from register Word address to final (timestamp, value)
-/// * `entry_point` - ELF entry point (initial PC value for x255)
+/// * `init` - Initial value per row, in `register_word_address_list` order
+///   (program-start image, or an epoch's boundary register snapshot)
 ///
 /// ## Returns
 ///
 /// The trace table for registers.
 pub fn generate_register_trace(
     final_state: &FinalRegisterStateMap,
-    entry_point: u64,
+    init: &[u32],
 ) -> TraceTable<GoldilocksField, GoldilocksExtension> {
     let num_rows = NUM_REGISTER_ADDRESSES.next_power_of_two();
     let mut trace = TraceTable::new_main(
@@ -161,7 +231,7 @@ pub fn generate_register_trace(
         // Offset = actual Word address in register space
         table.set_u64(row, cols::OFFSET, word_addr);
 
-        let init_value = init_value_for_address(word_addr, entry_point);
+        let init_value = init.get(row).copied().unwrap_or(0);
         table.set_word(row, cols::INIT, init_value);
 
         // Final state: if accessed use final, otherwise use initial (timestamp 1)
@@ -186,6 +256,18 @@ pub fn generate_register_trace(
     trace
 }
 
+/// Extract the per-register final values (`R_{i+1}`) from a committed REGISTER
+/// trace: reads `FINI` on the real rows (the first `NUM_REGISTER_ADDRESSES`) into
+/// a vector in `register_word_address_list` order — entry `i` is the final value
+/// of the register at `register_word_address_list()[i]`. This is the epoch's final
+/// register file; the continuation builds this epoch's preprocessed FINI
+/// commitment from it and reuses it as the next epoch's preprocessed INIT.
+pub fn fini_from_trace(trace: &TraceTable<GoldilocksField, GoldilocksExtension>) -> Vec<u32> {
+    (0..NUM_REGISTER_ADDRESSES)
+        .map(|row| trace.main_table.get(row, cols::FINI).to_raw() as u32)
+        .collect()
+}
+
 // =========================================================================
 // Preprocessed commitment
 // =========================================================================
@@ -195,21 +277,55 @@ pub fn generate_register_trace(
 /// Program-dependent: x255 (PC) init = entry_point.
 /// OFFSET encodes the Word address (0..63 for x0-x31, 508 for x254, 510-511 for x255).
 /// INIT holds the initial value (SP=STACK_TOP, PC=entry_point, rest=0).
-pub fn compute_precomputed_commitment(options: &ProofOptions, entry_point: u64) -> Commitment {
+pub fn compute_precomputed_commitment(options: &ProofOptions, init: &[u32]) -> Commitment {
+    let num_rows = NUM_REGISTER_ADDRESSES.next_power_of_two();
+    let addr_list = register_word_address_list();
+
+    let mut offset_col = vec![FE::zero(); num_rows];
+    let mut init_col = vec![FE::zero(); num_rows];
+
+    for i in 0..NUM_REGISTER_ADDRESSES {
+        offset_col[i] = FE::from(addr_list[i]);
+        init_col[i] = FE::from(init.get(i).copied().unwrap_or(0) as u64);
+    }
+
+    commit_register_columns(options, vec![offset_col, init_col])
+}
+
+/// Continuation variant: commits OFFSET + INIT + FINI, so the verifier recomputes
+/// the commitment from the public `init` (`R_i`) and `fini` (`R_{i+1}`) and the
+/// proof's FINI column is locked to `R_{i+1}`. `fini` is the vector produced by
+/// `fini_from_trace` (entry `i` = the register at `register_word_address_list()[i]`).
+/// Used by continuation epochs with `NUM_PREPROCESSED_COLS_WITH_FINI`; must match
+/// the column order of the REGISTER trace (OFFSET, INIT, FINI), and FINI on padding
+/// rows is 0 (as the trace builds it).
+pub fn compute_precomputed_commitment_with_fini(
+    options: &ProofOptions,
+    init: &[u32],
+    fini: &[u32],
+) -> Commitment {
+    debug_assert_eq!(fini.len(), NUM_REGISTER_ADDRESSES);
     let num_rows = NUM_REGISTER_ADDRESSES.next_power_of_two();
     let addr_list = register_word_address_list();
 
     let mut offset_col = vec![FE::zero(); num_rows];
     let mut init_col = vec![FE::zero(); num_rows];
+    let mut fini_col = vec![FE::zero(); num_rows];
 
     for i in 0..NUM_REGISTER_ADDRESSES {
-        let word_addr = addr_list[i];
-        offset_col[i] = FE::from(word_addr);
-        init_col[i] = FE::from(init_value_for_address(word_addr, entry_point) as u64);
+        offset_col[i] = FE::from(addr_list[i]);
+        init_col[i] = FE::from(init.get(i).copied().unwrap_or(0) as u64);
+        fini_col[i] = FE::from(fini[i] as u64);
     }
 
-    let columns = [offset_col, init_col];
+    commit_register_columns(options, vec![offset_col, init_col, fini_col])
+}
 
+/// LDE + bit-reverse + Merkle-commit the given preprocessed columns (in column
+/// order). Shared by the monolithic (OFFSET, INIT) and continuation
+/// (OFFSET, INIT, FINI) preprocessed commitments.
+fn commit_register_columns(options: &ProofOptions, columns: Vec<Vec<FE>>) -> Commitment {
+    let num_rows = NUM_REGISTER_ADDRESSES.next_power_of_two();
     let polys: Vec<Polynomial<FE>> = columns
         .iter()
         .map(|col| {
@@ -241,8 +357,8 @@ pub fn compute_precomputed_commitment(options: &ProofOptions, entry_point: u64)
 /// Returns the preprocessed commitment for the REGISTER table.
 ///
 /// Program-dependent (entry_point varies per ELF), so not globally cached.
-pub fn preprocessed_commitment(options: &ProofOptions, entry_point: u64) -> Commitment {
-    compute_precomputed_commitment(options, entry_point)
+pub fn preprocessed_commitment(options: &ProofOptions, init: &[u32]) -> Commitment {
+    compute_precomputed_commitment(options, init)
 }
 
 // =========================================================================
diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs
index 99a0ded51..f3ca090d7 100644
--- a/prover/src/tables/trace_builder.rs
+++ b/prover/src/tables/trace_builder.rs
@@ -56,6 +56,7 @@ use super::keccak::{self, KeccakOperation};
 use super::keccak_rc;
 use super::keccak_rnd::{self, KeccakRoundOperation};
 use super::load::{self, LoadOperation};
+use super::local_to_global;
 use super::lt::{self, LtOperation};
 use super::memw::{self, MemwOperation};
 use super::memw_aligned;
@@ -67,6 +68,7 @@ use super::shift::{self, ShiftOperation};
 use super::store;
 use super::types::{GoldilocksExtension, GoldilocksField};
 use crate::Error;
+use crate::paged_mem::{ImageSource, PagedMem};
 
 // =============================================================================
 // Memory and Register State Tracking
@@ -80,35 +82,30 @@ type RegisterCell = (u64, u64);
 
 /// Memory state tracker for generating MEMW/LOAD traces.
 struct MemoryState {
-    /// Map from byte address to (value, timestamp)
-    cells: HashMap<u64, MemoryCell>,
+    /// Per byte-address `(value, timestamp)`, as a dense per-page store. This is
+    /// the hot structure — `read_byte`/`write_byte` hit it on every memory access
+    /// during the replay, and it's rebuilt each epoch — so a per-page array (small
+    /// page-map lookup + dense indexing, no per-cell hashing or rehash-on-grow)
+    /// is both lighter and faster than a per-cell `HashMap`.
+    cells: PagedMem<MemoryCell>,
 }
 
 impl MemoryState {
     fn new() -> Self {
         Self {
-            cells: HashMap::new(),
+            cells: PagedMem::new((0, 0)),
         }
     }
 
-    /// Initialize memory state from ELF segments.
+    /// Initialize memory state from a pre-built initial-memory image.
     ///
-    /// Pre-populates all ELF bytes with timestamp=0 so that when MEMW first
+    /// Pre-populates all starting bytes with timestamp=0 so that when MEMW first
     /// accesses an address, it gets the correct initial value for `old_value`.
     /// This is required for the Memory bus to balance (MEMW-M1 must match PAGE-C3).
-    fn from_elf(elf: &Elf) -> Self {
-        let mut cells = HashMap::new();
-        for segment in &elf.data {
-            for (i, &word) in segment.values.iter().enumerate() {
-                let word_addr = segment.base_addr.wrapping_add(i as u64 * 4);
-                // Split 32-bit word into 4 bytes (little-endian)
-                for byte_offset in 0..4u64 {
-                    let byte_addr = word_addr.wrapping_add(byte_offset);
-                    let byte_value = ((word >> (byte_offset * 8)) & 0xFF) as u8;
-                    // Initial state: value from ELF, timestamp=0
-                    cells.insert(byte_addr, (byte_value, 0));
-                }
-            }
+    fn from_image<I: ImageSource>(image: &I) -> Self {
+        let mut cells = PagedMem::new((0, 0));
+        for (addr, value) in image.image_iter() {
+            cells.set(addr, (value, 0));
         }
         Self { cells }
     }
@@ -121,30 +118,18 @@ impl MemoryState {
             "page_size must be a power of two for the bitmask to work"
         );
         let mask = !(page_size - 1);
-        let pages: HashSet<u64> = self.cells.keys().map(|&a| a & mask).collect();
+        let pages: HashSet<u64> = self.cells.iter().map(|(a, _)| a & mask).collect();
         pages.len() as u64
     }
 
-    /// Pre-populate the private input memory region at `PRIVATE_INPUT_START_INDEX`.
-    fn add_private_input(&mut self, private_input: &[u8]) {
-        if private_input.is_empty() {
-            return;
-        }
-        use executor::vm::memory::PRIVATE_INPUT_START_INDEX;
-        let start = PRIVATE_INPUT_START_INDEX;
-        for (i, &b) in private_input_bytes(private_input).iter().enumerate() {
-            self.cells.insert(start + i as u64, (b, 0));
-        }
-    }
-
     /// Read a byte from memory. Returns (value, timestamp) or (0, 0) if never written.
     fn read_byte(&self, address: u64) -> MemoryCell {
-        self.cells.get(&address).copied().unwrap_or((0, 0))
+        self.cells.get(address)
     }
 
     /// Write a byte to memory with the given timestamp.
     fn write_byte(&mut self, address: u64, value: u8, timestamp: u64) {
-        self.cells.insert(address, (value, timestamp));
+        self.cells.set(address, (value, timestamp));
     }
 
     /// Read multiple bytes. Returns arrays of values and timestamps.
@@ -193,6 +178,28 @@ impl RegisterState {
         }
     }
 
+    /// Seed register state from a register init vector (one value per row, in
+    /// `register_word_address_list` order), so the first access in a continuation
+    /// epoch reads the epoch's boundary register values as `old_value`. All initial
+    /// timestamps are 1, matching the REGISTER table's init token. Mirrors
+    /// `MemoryState::from_image`.
+    fn from_init(init: &[u32]) -> Self {
+        let word = |pos: usize| init.get(pos).copied().unwrap_or(0) as u64;
+        let mut regs = [(0u64, 1u64); 32];
+        for (reg, slot) in regs.iter_mut().enumerate() {
+            let base = reg * 2;
+            *slot = (word(base) | (word(base + 1) << 32), 1);
+        }
+        Self {
+            regs,
+            index_register: (init.get(register::X254_INDEX).copied().unwrap_or(0), 1),
+            pc_register: (
+                word(register::PC_LO_INDEX) | (word(register::PC_HI_INDEX) << 32),
+                1,
+            ),
+        }
+    }
+
     /// Read a register. Returns (value, last_write_timestamp).
     fn read(&self, reg: u8) -> RegisterCell {
         self.regs[reg as usize]
@@ -386,7 +393,12 @@ fn collect_ops_from_cpu(
     let mut ecsm_ops = Vec::new();
     let mut ec_scalar_ops = Vec::new();
     let mut ecdas_ops = Vec::new();
-    let mut current_commit_index = 0u32;
+    // Seed from the carried x254 (0 for a monolithic run or the first epoch) so a
+    // continuation epoch indexes its commits globally, matching the x254 the
+    // register binding transports across epochs. Resetting to 0 here would drift
+    // from x254 and break the COMMIT chip's Memw token (see the drift assert below).
+    let start_commit_index = register_state.read_index().0;
+    let mut current_commit_index = start_commit_index;
     let mut commit_ecall_count = 0u32;
 
     for op in cpu_ops {
@@ -510,10 +522,11 @@ fn collect_ops_from_cpu(
         bitwise_ops.extend(op.collect_bitwise_ops());
     }
 
-    // Each ecall generates count+1 operations (count real rows + 1 end row)
+    // Each ecall generates count+1 operations (count real rows + 1 end row).
+    // Count only this epoch's rows, so subtract the carried start index.
     debug_assert_eq!(
         commit_ops.len(),
-        current_commit_index as usize + commit_ecall_count as usize,
+        (current_commit_index - start_commit_index) as usize + commit_ecall_count as usize,
         "commit_ops count should match accumulated commit index plus end rows"
     );
 
@@ -1058,8 +1071,15 @@ fn collect_commit_memw_ops(
         let old_value = [old_index as u64, 0, 0, 0, 0, 0, 0, 0];
         let new_value = [new_index as u64, 0, 0, 0, 0, 0, 0, 0];
         let old_timestamps = [old_ts, 0, 0, 0, 0, 0, 0, 0];
-        let memw_op = MemwOperation::new(true, 508, new_value, ts, 1, true)
-            .with_old(old_value, old_timestamps);
+        let memw_op = MemwOperation::new(
+            true,
+            register::register_base_address(254),
+            new_value,
+            ts,
+            1,
+            true,
+        )
+        .with_old(old_value, old_timestamps);
         memw_ops.push(memw_op);
         register_state.write_index(new_index, ts);
     }
@@ -1830,67 +1850,130 @@ fn private_input_bytes(private_input: &[u8]) -> Vec<u8> {
         .collect()
 }
 
-fn build_init_page_data(elf: &Elf, private_input: &[u8]) -> HashMap<u64, Vec<u8>> {
-    use executor::vm::memory::PRIVATE_INPUT_START_INDEX;
-    let page_size = page::DEFAULT_PAGE_SIZE;
-    let mut init_page_data: HashMap<u64, Vec<u8>> = HashMap::new();
+/// Build the initial-memory image (byte address -> value) from the ELF segments
+/// and the private-input region. Single source of "what memory starts as", read
+/// by both `MemoryState` seeding and PAGE/bitwise init.
+pub(crate) fn build_initial_image(elf: &Elf, private_input: &[u8]) -> HashMap<u64, u8> {
+    let mut image: HashMap<u64, u8> = HashMap::new();
     for segment in &elf.data {
         for (i, &word) in segment.values.iter().enumerate() {
-            let word_addr = segment.base_addr + (i as u64 * 4);
+            let word_addr = segment.base_addr.wrapping_add(i as u64 * 4);
             for byte_offset in 0..4u64 {
-                let byte_addr = word_addr + byte_offset;
+                let byte_addr = word_addr.wrapping_add(byte_offset);
                 let byte_value = ((word >> (byte_offset * 8)) & 0xFF) as u8;
-                let page_base = page::page_base_for_address(byte_addr);
-                let offset = page::offset_in_page(byte_addr);
-                let page_data = init_page_data
-                    .entry(page_base)
-                    .or_insert_with(|| vec![0u8; page_size]);
-                page_data[offset] = byte_value;
+                image.insert(byte_addr, byte_value);
             }
         }
     }
     if !private_input.is_empty() {
+        use executor::vm::memory::PRIVATE_INPUT_START_INDEX;
         for (i, &b) in private_input_bytes(private_input).iter().enumerate() {
-            let addr = PRIVATE_INPUT_START_INDEX + i as u64;
-            let page_base = page::page_base_for_address(addr);
-            let offset = page::offset_in_page(addr);
-            let page_data = init_page_data
-                .entry(page_base)
-                .or_insert_with(|| vec![0u8; page_size]);
-            page_data[offset] = b;
+            image.insert(PRIVATE_INPUT_START_INDEX + i as u64, b);
         }
     }
-    init_page_data
+    image
 }
 
-fn collect_bitwise_from_page(
+/// Build the initial-memory image as a dense per-page store instead of a
+/// per-cell `HashMap`. Used by the streaming continuation, which carries the
+/// image across all epochs (so its size matters); the byte values are identical
+/// to [`build_initial_image`]. Unset cells read back as 0.
+pub(crate) fn build_initial_image_paged(elf: &Elf, private_input: &[u8]) -> PagedMem<u8> {
+    let mut image = PagedMem::new(0u8);
+    for segment in &elf.data {
+        for (i, &word) in segment.values.iter().enumerate() {
+            let word_addr = segment.base_addr.wrapping_add(i as u64 * 4);
+            for byte_offset in 0..4u64 {
+                let byte_addr = word_addr.wrapping_add(byte_offset);
+                let byte_value = ((word >> (byte_offset * 8)) & 0xFF) as u8;
+                image.set(byte_addr, byte_value);
+            }
+        }
+    }
+    if !private_input.is_empty() {
+        use executor::vm::memory::PRIVATE_INPUT_START_INDEX;
+        for (i, &b) in private_input_bytes(private_input).iter().enumerate() {
+            image.set(PRIVATE_INPUT_START_INDEX + i as u64, b);
+        }
+    }
+    image
+}
+
+/// Test helper for computing one epoch's local-to-global touched cells without
+/// building every trace table.
+#[cfg(test)]
+pub(crate) fn epoch_touched_cells<I: ImageSource>(
     elf: &Elf,
+    initial_image: &I,
+    register_init: &[u32],
+    logs: &[Log],
+) -> Result<Vec<(u64, u64, u64)>, Error> {
+    let instructions = decode::instructions_from_elf(elf)
+        .map_err(|e| Error::Execution(format!("Failed to parse instructions: {e}")))?;
+    let cpu_ops = collect_cpu_ops(logs, &instructions)?;
+
+    let mut memory_state = MemoryState::from_image(initial_image);
+    let mut register_state = RegisterState::from_init(register_init);
+    let _ = collect_ops_from_cpu(&cpu_ops, &mut memory_state, &mut register_state);
+
+    Ok(touched_cells_from_memory_state(&memory_state))
+}
+
+fn touched_cells_from_memory_state(memory_state: &MemoryState) -> local_to_global::EpochTouches {
+    let mut touched: Vec<(u64, u64, u64)> = memory_state
+        .cells
+        .iter()
+        .filter(|(_, cell)| cell.1 > 0)
+        .map(|(addr, cell)| (addr, cell.0 as u64, cell.1))
+        .collect();
+    touched.sort_by_key(|&(addr, _, _)| addr);
+    touched
+}
+
+/// Bucket an initial-memory image into per-page byte arrays for PAGE init columns.
+pub(crate) fn build_init_page_data<I: ImageSource>(image: &I) -> HashMap<u64, Vec<u8>> {
+    let page_size = page::DEFAULT_PAGE_SIZE;
+    let mut init_page_data: HashMap<u64, Vec<u8>> = HashMap::new();
+    for (addr, value) in image.image_iter() {
+        let page_base = page::page_base_for_address(addr);
+        let offset = page::offset_in_page(addr);
+        let page_data = init_page_data
+            .entry(page_base)
+            .or_insert_with(|| vec![0u8; page_size]);
+        page_data[offset] = value;
+    }
+    init_page_data
+}
+
+fn collect_bitwise_from_page<I: ImageSource>(
+    image: &I,
     memory_state: &MemoryState,
-    private_input: &[u8],
+    exclude_touched: bool,
 ) -> Vec<BitwiseOperation> {
     use std::collections::BTreeSet;
 
     let page_size = page::DEFAULT_PAGE_SIZE;
     let mut bitwise_ops = Vec::new();
 
-    let elf_page_data = build_init_page_data(elf, private_input);
+    let init_page_data = build_init_page_data(image);
 
     // Derive ALL page bases from memory_state (includes ELF + runtime pages)
-    let mut page_bases: BTreeSet<u64> = BTreeSet::new();
-    for &addr in memory_state.cells.keys() {
-        page_bases.insert(page::page_base_for_address(addr));
-    }
+    let page_bases: BTreeSet<u64> = memory_state.cells.page_bases().collect();
 
-    // Build final state map from memory_state
+    // Build final state map from memory_state, matching `generate_page_tables`:
+    // when `exclude_touched`, touched cells (timestamp > 0) are dropped so PAGE
+    // emits `fini == init` for them, and the ARE_BYTES multiplicities here must
+    // agree (otherwise the AreBytes bus would not balance).
     let final_state: FinalStateMap = memory_state
         .cells
         .iter()
-        .map(|(&addr, &(value, timestamp))| (addr, FinalByteState { timestamp, value }))
+        .filter(|(_, cell)| !exclude_touched || cell.1 == 0)
+        .map(|(addr, (value, timestamp))| (addr, FinalByteState { timestamp, value }))
         .collect();
 
     // For each page and each byte, add ARE_BYTES lookups for init and fini
     for &page_base in &page_bases {
-        let init_data = elf_page_data.get(&page_base);
+        let init_data = init_page_data.get(&page_base);
 
         for offset in 0..page_size {
             let addr = page_base + offset as u64;
@@ -2008,13 +2091,10 @@ fn collect_bitwise_from_commit(commit_ops: &[CommitOperation]) -> Vec<BitwiseOpe
 }
 
 // =============================================================================
-// PAGE Table Generation
+// BITWISE lookup helpers
 // =============================================================================
 
-/// Generates PAGE tables for memory initialization and finalization.
-///
-/// Derives all page bases from `memory_state.cells.keys()` — this includes
-/// IS_HALF lookup for a value `v ∈ [0, 2^16)` (split into low/high bytes).
+/// IS_HALF lookup for a value `v in [0, 2^16)` (split into low/high bytes).
 fn is_half_op(v: u16) -> BitwiseOperation {
     BitwiseOperation::halfword(
         BitwiseOperationType::IsHalf,
@@ -2345,30 +2425,32 @@ pub(crate) fn collect_bitwise_from_keccak(keccak_ops: &[KeccakOperation]) -> Vec
 
 /// every address accessed during execution (ELF init + runtime stores/loads).
 /// ELF pages get their init data from the binary; all others are zero-init.
-fn generate_page_tables(
-    elf: &Elf,
+fn generate_page_tables<I: ImageSource>(
+    image: &I,
     memory_state: &MemoryState,
     private_input: &[u8],
+    exclude_touched: bool,
 ) -> (
     Vec<TraceTable<GoldilocksField, GoldilocksExtension>>,
     Vec<PageConfig>,
 ) {
     use std::collections::BTreeSet;
 
-    // Collect init data from ELF segments + private input region
-    let init_page_data = build_init_page_data(elf, private_input);
+    // Per-page init bytes from the initial-memory image.
+    let init_page_data = build_init_page_data(image);
 
     // Derive ALL page bases from memory_state (includes ELF + runtime pages)
-    let mut page_bases: BTreeSet<u64> = BTreeSet::new();
-    for &addr in memory_state.cells.keys() {
-        page_bases.insert(page::page_base_for_address(addr));
-    }
+    let page_bases: BTreeSet<u64> = memory_state.cells.page_bases().collect();
 
-    // Build final state map from memory_state
+    // Build final state map from memory_state. When `exclude_touched` (continuation
+    // epoch with L2G bookend), drop touched cells (timestamp > 0) so PAGE self-
+    // cancels them (init == fini, ts == 0) and the local-to-global table owns their
+    // Memory-bus init/fini instead.
     let final_state: FinalStateMap = memory_state
         .cells
         .iter()
-        .map(|(&addr, &(value, timestamp))| (addr, FinalByteState { timestamp, value }))
+        .filter(|(_, cell)| !exclude_touched || cell.1 == 0)
+        .map(|(addr, (value, timestamp))| (addr, FinalByteState { timestamp, value }))
         .collect();
 
     // Generate PAGE tables and configs
@@ -2481,6 +2563,14 @@ pub struct Traces {
 
     /// MEMW_R register-only fast-path traces (split into chunks of max_rows::MEMW_R)
     pub memw_registers: Vec<TraceTable<GoldilocksField, GoldilocksExtension>>,
+    /// Local-to-global boundary table for continuation epochs. Empty unless the
+    /// continuation driver fills it with the boundary derived from
+    /// `touched_memory_cells`.
+    pub local_to_global: TraceTable<GoldilocksField, GoldilocksExtension>,
+    /// Touched cells observed while replaying this epoch's logs, each as
+    /// `(address, end_value, end_timestamp)`. Populated only for continuation
+    /// epochs that use the L2G memory bookend.
+    pub touched_memory_cells: local_to_global::EpochTouches,
     // Auxiliary ALU / memory / CPU32 dispatch chips (split into chunks of their max_rows)
     pub eqs: Vec<TraceTable<GoldilocksField, GoldilocksExtension>>,
     pub bytewises: Vec<TraceTable<GoldilocksField, GoldilocksExtension>>,
@@ -2569,11 +2659,16 @@ fn collect_all_ops(
     ec_scalar_ops: Vec<ec_scalar::EcScalarOperation>,
     ecdas_ops: Vec<ecdas::EcdasOperation>,
     register_state: &mut RegisterState,
+    is_final: bool,
 ) -> CollectedOps {
     // HALT finalization: 33 register MEMW operations at timestamp u64::MAX.
     // Must come before Phase 3 (LT from MEMW) so HALT ops get timestamp checks.
-    let halt_memw_ops = collect_halt_ops(register_state);
-    memw_ops.extend(halt_memw_ops);
+    // Only the final epoch terminates; intermediate epochs keep their boundary
+    // register state (no zeroizing) so it can seed the next epoch.
+    if is_final {
+        let halt_memw_ops = collect_halt_ops(register_state);
+        memw_ops.extend(halt_memw_ops);
+    }
 
     // Route MEMW_R (register fast-path) first, then MEMW_A (aligned), rest → MEMW.
     // Order matters: register ops would also pass is_aligned_op, so check first.
@@ -2707,20 +2802,23 @@ fn collect_all_ops(
 
 /// Phases 3-5: From routed ops, produce all traces and assemble `Traces`.
 ///
-/// `elf` controls PAGE table generation: `Some(elf)` generates real PAGE tables
-/// and PAGE bitwise lookups; `None` produces empty page tables.
+/// `initial_image` controls PAGE table generation: `Some(image)` generates real
+/// PAGE tables and PAGE bitwise lookups seeded from the initial-memory image;
+/// `None` produces empty page tables.
 #[allow(clippy::too_many_arguments)]
-fn build_traces(
+fn build_traces<I: ImageSource + Sync>(
     ops: CollectedOps,
-    elf: Option<&Elf>,
+    initial_image: Option<&I>,
     memory_state: &MemoryState,
-    entry_point: u64,
+    register_init: &[u32],
     decode_trace: TraceTable<GoldilocksField, GoldilocksExtension>,
     decode_pc_to_row: decode::PcToRow,
     mut register_state: RegisterState,
     max_rows: &super::MaxRowsConfig,
     #[cfg(feature = "disk-spill")] storage_mode: StorageMode,
     private_input: &[u8],
+    is_final: bool,
+    l2g_memory_bookend: bool,
 ) -> Result<Traces, Error> {
     let CollectedOps {
         cpu_ops,
@@ -2775,9 +2873,17 @@ fn build_traces(
     bitwise_ops.extend(collect_bitwise_from_memw_aligned(&memw_aligned_ops));
     // MEMW_R sends IS_HALFWORD[timestamp_0 - old_timestamp_lo - 1]
     bitwise_ops.extend(collect_bitwise_from_memw_register(&memw_register_ops));
-    // PAGE tables do a batched ARE_BYTES[init, fini] lookup per row (C1+C2)
-    if let Some(elf) = elf {
-        bitwise_ops.extend(collect_bitwise_from_page(elf, memory_state, private_input));
+    // PAGE tables do a batched ARE_BYTES[init, fini] lookup per row (C1+C2).
+    // Continuation epochs (l2g_memory_bookend) skip PAGE entirely (see the
+    // generate_page_tables call below), so they skip its AreBytes lookups too.
+    if let Some(image) = initial_image
+        && !l2g_memory_bookend
+    {
+        bitwise_ops.extend(collect_bitwise_from_page(
+            image,
+            memory_state,
+            l2g_memory_bookend,
+        ));
     }
 
     let public_output_bytes: Vec<u8> = commit_ops
@@ -2804,22 +2910,28 @@ fn build_traces(
     // PHASE 5: Generate final traces (parallelized)
     // =====================================================================
 
-    // Extract halt timestamp from the last ECALL instruction
-    let halt_op = cpu_ops
-        .iter()
-        .rev()
-        .find(|op| op.decode.fields.ecall)
-        .ok_or(Error::MissingHaltEcall)?;
-    let halt_timestamp = halt_op.timestamp;
-    let halt_next_pc = halt_op.next_pc;
-
-    // Finalize the PC (x255) on the REGISTER table. The CPU padding rows carry
-    // pc=1 and chain the inline-PC `memory` tokens with a +4 timestamp cadence
-    // starting from the HALT chip's emit_pc at `halt_timestamp + 1`; the last
-    // padding write therefore lands at `halt_timestamp + 4*num_padding_rows + 1`
-    // (= `halt_timestamp + 1` when there is no padding). The REGISTER final token
-    // must match that last write to balance the memory argument.
-    register_state.write_pc(1, halt_timestamp + 4 * num_padding_rows as u64 + 1);
+    // A monolithic run or the final continuation epoch terminates on the program's
+    // halt ECALL. Intermediate continuation epochs do not halt, so fall back to the
+    // last cycle's timestamp and skip HALT-based PC finalization — the PC is carried
+    // to the next epoch via the register snapshot, and HALT is excluded from the
+    // proof (`include_halt = false`) so `halt_trace` is unused there.
+    let (halt_timestamp, halt_next_pc) = if is_final {
+        let halt_op = cpu_ops
+            .iter()
+            .rev()
+            .find(|op| op.decode.fields.ecall)
+            .ok_or(Error::MissingHaltEcall)?;
+        // Finalize the PC (x255) on the REGISTER table. The CPU padding rows carry
+        // pc=1 and chain the inline-PC `memory` tokens with a +4 timestamp cadence
+        // starting from the HALT chip's emit_pc at `halt_timestamp + 1`; the last
+        // padding write therefore lands at `halt_timestamp + 4*num_padding_rows + 1`
+        // (= `halt_timestamp + 1` when there is no padding). The REGISTER final token
+        // must match that last write to balance the memory argument.
+        register_state.write_pc(1, halt_op.timestamp + 4 * num_padding_rows as u64 + 1);
+        (halt_op.timestamp, halt_op.next_pc)
+    } else {
+        (cpu_ops.last().map(|op| op.timestamp).unwrap_or(0), 0)
+    };
 
     let register_final_state = register_state.to_final_state_map();
 
@@ -2989,11 +3101,16 @@ fn build_traces(
         keccak_rc::update_multiplicities(&mut keccak_rc_trace, keccak_ops.len());
         keccak_rc_trace
     };
-    let gen_pages = || match elf {
-        Some(elf) => generate_page_tables(elf, memory_state, private_input),
-        None => (Vec::new(), Vec::new()),
+    let gen_pages = || match initial_image {
+        // Continuation epochs (l2g_memory_bookend) skip PAGE: the L2G table owns
+        // every touched cell's Memory init/fini, and every untouched PAGE row
+        // self-cancels (init==fini, ts=0), so PAGE contributes nothing here.
+        Some(image) if !l2g_memory_bookend => {
+            generate_page_tables(image, memory_state, private_input, l2g_memory_bookend)
+        }
+        _ => (Vec::new(), Vec::new()),
     };
-    let gen_register = || register::generate_register_trace(&register_final_state, entry_point);
+    let gen_register = || register::generate_register_trace(&register_final_state, register_init);
     let gen_halt = || halt::generate_halt_trace(halt_timestamp, halt_next_pc);
     // ECSM accelerator traces (empty/all-padding for programs that do not use ECSM).
     let gen_ecsm = || ecsm::generate_ecsm_trace(&ecsm_ops);
@@ -3148,6 +3265,16 @@ fn build_traces(
         }
     }
 
+    // Continuation callers derive the real cross-epoch boundary from this set and
+    // install its L2G trace after provenance is applied. Avoid building a
+    // throwaway genesis-only L2G trace here.
+    let touched_memory_cells = if l2g_memory_bookend {
+        touched_cells_from_memory_state(memory_state)
+    } else {
+        Vec::new()
+    };
+    let local_to_global = local_to_global::generate_local_to_global_trace(&[]);
+
     Ok(Traces {
         cpus,
         bitwise,
@@ -3173,6 +3300,8 @@ fn build_traces(
         ec_scalar: ec_scalar_trace,
         ecdas: ecdas_trace,
         memw_registers,
+        local_to_global,
+        touched_memory_cells,
         eqs,
         bytewises,
         stores,
@@ -3236,8 +3365,7 @@ pub fn count_table_lengths(
     let decode_rows = (instructions.len() as u64 + 1).next_power_of_two().max(2);
 
     // Memory + register state for partition predicates that need timestamps.
-    let mut memory_state = MemoryState::from_elf(elf);
-    memory_state.add_private_input(private_input);
+    let mut memory_state = MemoryState::from_image(&build_initial_image(elf, private_input));
     let mut register_state = RegisterState::new(elf.entry_point);
 
     // Raw counts (pre-chunking + pre-padding).
@@ -3478,6 +3606,8 @@ impl Traces {
             cpu32s,
             page_configs: _,
             public_output_bytes: _,
+            local_to_global: _,
+            touched_memory_cells: _,
         } = self;
 
         let mut total: u64 = 0;
@@ -3609,6 +3739,8 @@ impl Traces {
             cpu32s,
             page_configs: _,
             public_output_bytes: _,
+            local_to_global: _,
+            touched_memory_cells: _,
         } = self;
 
         let mut total: u64 = 0;
@@ -3699,7 +3831,7 @@ impl Traces {
     pub fn page_configs_from_elf(elf: &Elf) -> Vec<PageConfig> {
         use std::collections::BTreeSet;
 
-        let init_page_data = build_init_page_data(elf, &[]);
+        let init_page_data = build_init_page_data(&build_initial_image(elf, &[]));
 
         let page_bases: BTreeSet<u64> = init_page_data.keys().copied().collect();
 
@@ -3812,6 +3944,54 @@ impl Traces {
         private_input: &[u8],
         #[cfg(feature = "disk-spill")] storage_mode: StorageMode,
     ) -> Result<Self, Error> {
+        let initial_image = build_initial_image(elf, private_input);
+        let register_init = register::register_init_from_entry_point(elf.entry_point);
+        Self::from_image_and_logs(
+            elf,
+            &initial_image,
+            &register_init,
+            logs,
+            max_rows,
+            private_input,
+            true,
+            false,
+            #[cfg(feature = "disk-spill")]
+            storage_mode,
+        )
+    }
+
+    /// Build traces for one execution epoch starting from an explicit
+    /// initial-memory image (the epoch's starting memory) rather than the ELF
+    /// image. `elf` is still used for the program code (DECODE) and entry point.
+    ///
+    /// `register_init` is the epoch's starting register image (word address ->
+    /// value): the program-start image for the first epoch, or an epoch's boundary
+    /// register snapshot for later epochs. It seeds both `RegisterState` (for
+    /// first-access old values) and the REGISTER table's init column.
+    ///
+    /// `is_final` marks the last epoch: it applies HALT finalization (zeroize
+    /// registers, require the terminating ECALL). Intermediate epochs (`false`)
+    /// skip HALT and keep their boundary register/memory state.
+    #[allow(clippy::too_many_arguments)]
+    pub fn from_image_and_logs<I: ImageSource + Sync>(
+        elf: &Elf,
+        initial_image: &I,
+        register_init: &[u32],
+        logs: &[Log],
+        max_rows: &super::MaxRowsConfig,
+        private_input: &[u8],
+        is_final: bool,
+        l2g_memory_bookend: bool,
+        #[cfg(feature = "disk-spill")] storage_mode: StorageMode,
+    ) -> Result<Self, Error> {
+        // A non-final epoch must not contain the program-terminating instruction
+        // (next_pc == 0). Otherwise the CPU sends an ECALL bus token with no HALT
+        // table to receive it (HALT is excluded when !is_final), producing an
+        // unverifiable proof. Fail explicitly instead.
+        if !is_final && logs.iter().any(|log| log.next_pc == 0) {
+            return Err(Error::HaltInNonFinalEpoch);
+        }
+
         // Phase 0: ELF → DECODE + instructions
         // IMPORTANT: Use generate_decode_trace (same as compute_precomputed_commitment)
         // so the DECODE trace row ordering matches the AIR's hardcoded commitment.
@@ -3823,9 +4003,8 @@ impl Traces {
         let cpu_ops = collect_cpu_ops(logs, &instructions)?;
 
         // Phase 2: Collect + route all ops
-        let mut memory_state = MemoryState::from_elf(elf);
-        memory_state.add_private_input(private_input);
-        let mut register_state = RegisterState::new(elf.entry_point);
+        let mut memory_state = MemoryState::from_image(initial_image);
+        let mut register_state = RegisterState::from_init(register_init);
         let (
             memw_ops,
             load_ops,
@@ -3854,14 +4033,15 @@ impl Traces {
             ec_scalar_ops,
             ecdas_ops,
             &mut register_state,
+            is_final,
         );
 
         // Phases 3-5
         build_traces(
             ops,
-            Some(elf),
+            Some(initial_image),
             &memory_state,
-            elf.entry_point,
+            register_init,
             decode_trace,
             decode_pc_to_row,
             register_state,
@@ -3869,6 +4049,8 @@ impl Traces {
             #[cfg(feature = "disk-spill")]
             storage_mode,
             private_input,
+            is_final,
+            l2g_memory_bookend,
         )
     }
 
@@ -3889,6 +4071,7 @@ impl Traces {
         // Phase 2: Collect + route all ops
         let mut memory_state = MemoryState::new();
         let entry_point = cpu_ops.first().map_or(0, |op| op.decode.pc);
+        let register_init = register::register_init_from_entry_point(entry_point);
         let mut register_state = RegisterState::new(entry_point);
         let (
             memw_ops,
@@ -3918,6 +4101,7 @@ impl Traces {
             ec_scalar_ops,
             ecdas_ops,
             &mut register_state,
+            true,
         );
 
         // DECODE (from_elf_and_logs does this in Phase 0; same result either way)
@@ -3926,9 +4110,9 @@ impl Traces {
         // Phases 3-5 (elf=None → empty PAGE tables)
         build_traces(
             ops,
-            None,
+            None::<&HashMap<u64, u8>>,
             &memory_state,
-            entry_point,
+            &register_init,
             decode_trace,
             decode_pc_to_row,
             register_state,
@@ -3936,6 +4120,8 @@ impl Traces {
             #[cfg(feature = "disk-spill")]
             StorageMode::Ram,
             &[],
+            true,
+            false,
         )
     }
 }
diff --git a/prover/src/tables/types.rs b/prover/src/tables/types.rs
index d6091d0fd..71c83284a 100644
--- a/prover/src/tables/types.rs
+++ b/prover/src/tables/types.rs
@@ -307,6 +307,13 @@ pub enum BusId {
     /// Scalar-bit bus: EC_SCALAR sends one per set bit (timestamp, bit_index);
     /// ECDAS receives one per add, ECSM receives the MSB.
     Bit = 30,
+
+    // =========================================================================
+    // Continuations
+    // =========================================================================
+    /// Cross-epoch memory bus: the local-to-global table's per-cell init/fini
+    /// boundary claims, matched across epochs by the final aggregation LogUp.
+    GlobalMemory = 31,
 }
 
 impl BusId {
@@ -336,6 +343,7 @@ impl BusId {
             BusId::Ecdas => "Ecdas",
             BusId::ServeK => "ServeK",
             BusId::Bit => "Bit",
+            BusId::GlobalMemory => "GlobalMemory",
         }
     }
 }
@@ -368,6 +376,7 @@ impl TryFrom<u64> for BusId {
             28 => Ok(BusId::Ecdas),
             29 => Ok(BusId::ServeK),
             30 => Ok(BusId::Bit),
+            31 => Ok(BusId::GlobalMemory),
             other => Err(other),
         }
     }
diff --git a/prover/src/tests/compute_commit_bus_offset_tests.rs b/prover/src/tests/compute_commit_bus_offset_tests.rs
index 79c092ae2..ca6aab272 100644
--- a/prover/src/tests/compute_commit_bus_offset_tests.rs
+++ b/prover/src/tests/compute_commit_bus_offset_tests.rs
@@ -16,6 +16,7 @@ type E = GoldilocksExtension;
 /// future refactor of the batched routine must remain equivalent to this.
 fn naive_offset(
     public_output: &[u8],
+    start_index: u64,
     z: &FieldElement<E>,
     alpha: &FieldElement<E>,
 ) -> Option<FieldElement<E>> {
@@ -24,7 +25,7 @@ fn naive_offset(
     let mut total = FieldElement::<E>::zero();
     for (i, &value) in public_output.iter().enumerate() {
         let lc = bus_id
-            + (FieldElement::<E>::from(i as u64) * alpha)
+            + (FieldElement::<E>::from(start_index + i as u64) * alpha)
             + (FieldElement::<E>::from(value as u64) * alpha_sq);
         let fingerprint = z - lc;
         total += fingerprint.inv().ok()?;
@@ -37,7 +38,7 @@ fn test_empty_public_output_returns_zero() {
     let z = FieldElement::<E>::from(7u64);
     let alpha = FieldElement::<E>::from(11u64);
     assert_eq!(
-        compute_commit_bus_offset(&[], &z, &alpha),
+        compute_commit_bus_offset(&[], 0, &z, &alpha),
         Some(FieldElement::<E>::zero()),
     );
 }
@@ -48,8 +49,8 @@ fn test_non_empty_matches_naive_per_element_inverse() {
     let alpha = FieldElement::<E>::from(31_415_926u64);
     let public_output: [u8; 5] = [0x01, 0x02, 0xff, 0x10, 0x80];
 
-    let batched = compute_commit_bus_offset(&public_output, &z, &alpha);
-    let naive = naive_offset(&public_output, &z, &alpha);
+    let batched = compute_commit_bus_offset(&public_output, 0, &z, &alpha);
+    let naive = naive_offset(&public_output, 0, &z, &alpha);
 
     assert_eq!(batched, naive);
     assert!(batched.is_some(), "no fingerprint should collide here");
@@ -61,16 +62,36 @@ fn test_longer_input_matches_naive() {
     let alpha = FieldElement::<E>::from(0xcafe_babeu64);
     let public_output: Vec<u8> = (0..=255u16).map(|x| x as u8).collect();
 
-    let batched = compute_commit_bus_offset(&public_output, &z, &alpha);
-    let naive = naive_offset(&public_output, &z, &alpha);
+    let batched = compute_commit_bus_offset(&public_output, 0, &z, &alpha);
+    let naive = naive_offset(&public_output, 0, &z, &alpha);
 
     assert_eq!(batched, naive);
     assert!(batched.is_some());
 }
 
+#[test]
+fn test_nonzero_start_index_matches_naive() {
+    // A continuation epoch whose commits continue a prior epoch: the offset must
+    // index from the carried x254, not 0.
+    let z = FieldElement::<E>::from(0x1234_5678u64);
+    let alpha = FieldElement::<E>::from(0x9abc_def0u64);
+    let public_output: [u8; 3] = [0xCC, 0xDD, 0xEE];
+    let start_index = 7u64;
+
+    let batched = compute_commit_bus_offset(&public_output, start_index, &z, &alpha);
+    let naive = naive_offset(&public_output, start_index, &z, &alpha);
+
+    assert_eq!(batched, naive);
+    assert!(batched.is_some());
+
+    // A different start index yields a different offset (the index is bound in).
+    let shifted = compute_commit_bus_offset(&public_output, start_index + 1, &z, &alpha);
+    assert_ne!(batched, shifted);
+}
+
 #[test]
 fn test_zero_fingerprint_returns_none() {
-    // Craft fingerprint_0 = 0: i = 0, value = 0, then
+    // Craft fingerprint_0 = 0: start_index = 0, value = 0, then
     //   fingerprint_0 = z - (BusId::Commit + 0·α + 0·α²) = z - BusId::Commit.
     // Setting z = BusId::Commit forces the collision regardless of alpha.
     let z = FieldElement::<E>::from(BusId::Commit as u64);
@@ -78,7 +99,7 @@ fn test_zero_fingerprint_returns_none() {
     let public_output: [u8; 1] = [0];
 
     assert_eq!(
-        compute_commit_bus_offset(&public_output, &z, &alpha),
+        compute_commit_bus_offset(&public_output, 0, &z, &alpha),
         None,
         "zero fingerprint must propagate as None",
     );
@@ -96,5 +117,8 @@ fn test_zero_fingerprint_in_middle_returns_none() {
         + (FieldElement::<E>::from(3u64) * alpha_sq);
     let public_output: [u8; 4] = [1, 2, 3, 4];
 
-    assert_eq!(compute_commit_bus_offset(&public_output, &z, &alpha), None,);
+    assert_eq!(
+        compute_commit_bus_offset(&public_output, 0, &z, &alpha),
+        None,
+    );
 }
diff --git a/prover/src/tests/local_to_global_bus_tests.rs b/prover/src/tests/local_to_global_bus_tests.rs
new file mode 100644
index 000000000..263e3d938
--- /dev/null
+++ b/prover/src/tests/local_to_global_bus_tests.rs
@@ -0,0 +1,1082 @@
+//! Cross-epoch GlobalMemory bus tests for the local-to-global table.
+//!
+//! Proves+verifies that the `GlobalMemory` bus balances over the combined L2G
+//! table plus two anchors: a genesis sender (program-start initial memory) and a
+//! program-end receiver (final value of each cell). The bus balances iff every
+//! epoch's `fini` matches the next epoch's `init` (the cross-epoch telescoping).
+
+use std::collections::HashMap;
+
+use crypto::fiat_shamir::default_transcript::DefaultTranscript;
+use math::field::element::FieldElement;
+
+use stark::config::Commitment;
+use stark::constraints::transition::TransitionConstraintEvaluator;
+use stark::lookup::{
+    AirWithBuses, AuxiliaryTraceBuildData, BusInteraction, BusValue, Multiplicity,
+    NullBoundaryConstraintBuilder, Packing,
+};
+use stark::proof::options::ProofOptions;
+use stark::proof::stark::MultiProof;
+use stark::trace::TraceTable;
+use stark::traits::AIR;
+use stark::verifier::{IsStarkVerifier, Verifier};
+
+use crate::tables::bitwise::{BitwiseOperation, BitwiseOperationType};
+use crate::tables::local_to_global::{
+    self, CellBoundary, GENESIS_EPOCH, epoch_boundaries, generate_local_to_global_trace,
+};
+use crate::tables::types::{BusId, FE, GoldilocksExtension, GoldilocksField};
+use crate::test_utils::multi_prove_ram;
+
+type F = GoldilocksField;
+type E = GoldilocksExtension;
+
+/// Columns of an anchor trace: one GlobalMemory token `(address, value, epoch)`
+/// per row, packed in the same order as the L2G init/fini tokens (no timestamp —
+/// the cross-epoch chain is ordered by epoch).
+mod anchor_cols {
+    pub const ADDR_LO: usize = 0;
+    pub const ADDR_HI: usize = 1;
+    pub const VAL: usize = 2;
+    pub const EPOCH: usize = 3;
+    pub const NUM_COLUMNS: usize = 4;
+}
+
+type Token = (u64, u64, u64);
+
+fn l2g_air(
+    proof_options: &ProofOptions,
+    epoch_label: u64,
+) -> AirWithBuses<F, E, NullBoundaryConstraintBuilder, ()> {
+    let transition_constraints: Vec<Box<dyn TransitionConstraintEvaluator<F, E>>> = vec![];
+    AirWithBuses::new(
+        local_to_global::cols::NUM_COLUMNS,
+        AuxiliaryTraceBuildData {
+            interactions: local_to_global::bus_interactions(epoch_label),
+        },
+        proof_options,
+        1,
+        transition_constraints,
+    )
+}
+
+fn anchor_air(
+    proof_options: &ProofOptions,
+    is_sender: bool,
+) -> AirWithBuses<F, E, NullBoundaryConstraintBuilder, ()> {
+    let transition_constraints: Vec<Box<dyn TransitionConstraintEvaluator<F, E>>> = vec![];
+    let values = vec![
+        BusValue::Packed {
+            start_column: anchor_cols::ADDR_LO,
+            packing: Packing::Direct,
+        },
+        BusValue::Packed {
+            start_column: anchor_cols::ADDR_HI,
+            packing: Packing::Direct,
+        },
+        BusValue::Packed {
+            start_column: anchor_cols::VAL,
+            packing: Packing::Direct,
+        },
+        BusValue::Packed {
+            start_column: anchor_cols::EPOCH,
+            packing: Packing::Direct,
+        },
+    ];
+    let interaction = if is_sender {
+        BusInteraction::sender(BusId::GlobalMemory, Multiplicity::One, values)
+    } else {
+        BusInteraction::receiver(BusId::GlobalMemory, Multiplicity::One, values)
+    };
+    AirWithBuses::new(
+        anchor_cols::NUM_COLUMNS,
+        AuxiliaryTraceBuildData {
+            interactions: vec![interaction],
+        },
+        proof_options,
+        1,
+        transition_constraints,
+    )
+}
+
+fn anchor_trace(tokens: &[Token]) -> TraceTable<F, E> {
+    let num_rows = tokens.len().next_power_of_two().max(4);
+    let mut data = vec![FE::zero(); num_rows * anchor_cols::NUM_COLUMNS];
+    for (i, &(addr, value, epoch)) in tokens.iter().enumerate() {
+        let base = i * anchor_cols::NUM_COLUMNS;
+        data[base + anchor_cols::ADDR_LO] = FE::from(addr & 0xFFFF_FFFF);
+        data[base + anchor_cols::ADDR_HI] = FE::from(addr >> 32);
+        data[base + anchor_cols::VAL] = FE::from(value & 0xFF);
+        data[base + anchor_cols::EPOCH] = FE::from(epoch);
+    }
+    TraceTable::new_main(data, anchor_cols::NUM_COLUMNS, 1)
+}
+
+/// L2G air on the epoch-LOCAL `Memory` bus (uses `memory_bus_interactions`).
+fn l2g_memory_air(
+    proof_options: &ProofOptions,
+) -> AirWithBuses<F, E, NullBoundaryConstraintBuilder, ()> {
+    let transition_constraints: Vec<Box<dyn TransitionConstraintEvaluator<F, E>>> = vec![];
+    AirWithBuses::new(
+        local_to_global::cols::NUM_COLUMNS,
+        AuxiliaryTraceBuildData {
+            interactions: local_to_global::memory_bus_interactions(),
+        },
+        proof_options,
+        1,
+        transition_constraints,
+    )
+}
+
+/// Columns of a MEMW-substitute trace: per touched byte, the `Memory` tokens the
+/// real access chain would emit — opposite polarity to L2G's bookend.
+mod memw_sub_cols {
+    pub const ADDR_LO: usize = 0;
+    pub const ADDR_HI: usize = 1;
+    pub const INIT_VAL: usize = 2;
+    pub const FINI_TS_LO: usize = 3;
+    pub const FINI_TS_HI: usize = 4;
+    pub const FINI_VAL: usize = 5;
+    pub const NUM_COLUMNS: usize = 6;
+}
+
+/// Minimal BITWISE-receiver substitute for the L2G range-check buses. It receives
+/// the same AreBytes, IsHalfword, and IsB20 tokens that the real BITWISE table
+/// would receive, but only for rows supplied by the test.
+mod range_recv_cols {
+    pub const X: usize = 0;
+    pub const Y: usize = 1;
+    pub const Z: usize = 2;
+    pub const MU_ARE_BYTES: usize = 3;
+    pub const MU_IS_HALF: usize = 4;
+    pub const MU_IS_B20: usize = 5;
+    pub const NUM_COLUMNS: usize = 6;
+}
+
+/// MEMW-substitute air: counterpart to `memory_bus_interactions`. Sends each
+/// cell's init token at ts=0 (cancelling L2G's init-receive) and receives each
+/// cell's fini token at the last timestamp (cancelling L2G's fini-send).
+fn memw_sub_air(
+    proof_options: &ProofOptions,
+) -> AirWithBuses<F, E, NullBoundaryConstraintBuilder, ()> {
+    let transition_constraints: Vec<Box<dyn TransitionConstraintEvaluator<F, E>>> = vec![];
+    let init_send = BusInteraction::sender(
+        BusId::Memory,
+        Multiplicity::One,
+        vec![
+            BusValue::constant(0),
+            BusValue::Packed {
+                start_column: memw_sub_cols::ADDR_LO,
+                packing: Packing::Direct,
+            },
+            BusValue::Packed {
+                start_column: memw_sub_cols::ADDR_HI,
+                packing: Packing::Direct,
+            },
+            BusValue::constant(0),
+            BusValue::constant(0),
+            BusValue::Packed {
+                start_column: memw_sub_cols::INIT_VAL,
+                packing: Packing::Direct,
+            },
+        ],
+    );
+    let fini_recv = BusInteraction::receiver(
+        BusId::Memory,
+        Multiplicity::One,
+        vec![
+            BusValue::constant(0),
+            BusValue::Packed {
+                start_column: memw_sub_cols::ADDR_LO,
+                packing: Packing::Direct,
+            },
+            BusValue::Packed {
+                start_column: memw_sub_cols::ADDR_HI,
+                packing: Packing::Direct,
+            },
+            BusValue::Packed {
+                start_column: memw_sub_cols::FINI_TS_LO,
+                packing: Packing::Direct,
+            },
+            BusValue::Packed {
+                start_column: memw_sub_cols::FINI_TS_HI,
+                packing: Packing::Direct,
+            },
+            BusValue::Packed {
+                start_column: memw_sub_cols::FINI_VAL,
+                packing: Packing::Direct,
+            },
+        ],
+    );
+    AirWithBuses::new(
+        memw_sub_cols::NUM_COLUMNS,
+        AuxiliaryTraceBuildData {
+            interactions: vec![init_send, fini_recv],
+        },
+        proof_options,
+        1,
+        transition_constraints,
+    )
+}
+
+fn l2g_range_air(
+    proof_options: &ProofOptions,
+    epoch_label: u64,
+) -> AirWithBuses<F, E, NullBoundaryConstraintBuilder, ()> {
+    let transition_constraints: Vec<Box<dyn TransitionConstraintEvaluator<F, E>>> = vec![];
+    AirWithBuses::new(
+        local_to_global::cols::NUM_COLUMNS,
+        AuxiliaryTraceBuildData {
+            interactions: local_to_global::range_check_interactions(epoch_label),
+        },
+        proof_options,
+        1,
+        transition_constraints,
+    )
+}
+
+fn range_receiver_air(
+    proof_options: &ProofOptions,
+) -> AirWithBuses<F, E, NullBoundaryConstraintBuilder, ()> {
+    let transition_constraints: Vec<Box<dyn TransitionConstraintEvaluator<F, E>>> = vec![];
+    let interactions = vec![
+        BusInteraction::receiver(
+            BusId::AreBytes,
+            Multiplicity::Column(range_recv_cols::MU_ARE_BYTES),
+            vec![
+                BusValue::Packed {
+                    start_column: range_recv_cols::X,
+                    packing: Packing::Direct,
+                },
+                BusValue::Packed {
+                    start_column: range_recv_cols::Y,
+                    packing: Packing::Direct,
+                },
+            ],
+        ),
+        BusInteraction::receiver(
+            BusId::IsHalfword,
+            Multiplicity::Column(range_recv_cols::MU_IS_HALF),
+            vec![BusValue::linear(vec![
+                stark::lookup::LinearTerm::Column {
+                    coefficient: 1,
+                    column: range_recv_cols::X,
+                },
+                stark::lookup::LinearTerm::Column {
+                    coefficient: 256,
+                    column: range_recv_cols::Y,
+                },
+            ])],
+        ),
+        BusInteraction::receiver(
+            BusId::IsB20,
+            Multiplicity::Column(range_recv_cols::MU_IS_B20),
+            vec![BusValue::linear(vec![
+                stark::lookup::LinearTerm::Column {
+                    coefficient: 1,
+                    column: range_recv_cols::X,
+                },
+                stark::lookup::LinearTerm::Column {
+                    coefficient: 256,
+                    column: range_recv_cols::Y,
+                },
+                stark::lookup::LinearTerm::Column {
+                    coefficient: 65536,
+                    column: range_recv_cols::Z,
+                },
+            ])],
+        ),
+    ];
+    AirWithBuses::new(
+        range_recv_cols::NUM_COLUMNS,
+        AuxiliaryTraceBuildData { interactions },
+        proof_options,
+        1,
+        transition_constraints,
+    )
+}
+
+fn range_receiver_trace(ops: &[BitwiseOperation]) -> TraceTable<F, E> {
+    let num_rows = ops.len().next_power_of_two().max(4);
+    let mut data = vec![FE::zero(); num_rows * range_recv_cols::NUM_COLUMNS];
+    for (i, op) in ops.iter().enumerate() {
+        let base = i * range_recv_cols::NUM_COLUMNS;
+        data[base + range_recv_cols::X] = FE::from(op.x as u64);
+        data[base + range_recv_cols::Y] = FE::from(op.y as u64);
+        data[base + range_recv_cols::Z] = FE::from(op.z as u64);
+        let mu_col = match op.lookup_type {
+            BitwiseOperationType::AreBytes => range_recv_cols::MU_ARE_BYTES,
+            BitwiseOperationType::IsHalf => range_recv_cols::MU_IS_HALF,
+            BitwiseOperationType::IsB20 => range_recv_cols::MU_IS_B20,
+            _ => panic!("unexpected L2G range-check lookup"),
+        };
+        data[base + mu_col] = FE::one();
+    }
+    TraceTable::new_main(data, range_recv_cols::NUM_COLUMNS, 1)
+}
+
+fn memw_sub_trace(boundary: &[CellBoundary]) -> TraceTable<F, E> {
+    let num_rows = boundary.len().next_power_of_two().max(4);
+    let mut data = vec![FE::zero(); num_rows * memw_sub_cols::NUM_COLUMNS];
+    for (i, b) in boundary.iter().enumerate() {
+        let base = i * memw_sub_cols::NUM_COLUMNS;
+        data[base + memw_sub_cols::ADDR_LO] = FE::from(b.address & 0xFFFF_FFFF);
+        data[base + memw_sub_cols::ADDR_HI] = FE::from(b.address >> 32);
+        data[base + memw_sub_cols::INIT_VAL] = FE::from(b.init.value & 0xFF);
+        data[base + memw_sub_cols::FINI_TS_LO] = FE::from(b.fini.timestamp & 0xFFFF_FFFF);
+        data[base + memw_sub_cols::FINI_TS_HI] = FE::from(b.fini.timestamp >> 32);
+        data[base + memw_sub_cols::FINI_VAL] = FE::from(b.fini.value & 0xFF);
+    }
+    TraceTable::new_main(data, memw_sub_cols::NUM_COLUMNS, 1)
+}
+
+/// Prove + verify the epoch-local `Memory` bus over L2G's bookend (built from
+/// `l2g_boundary`) plus the MEMW-substitute chain (built from `memw_boundary`).
+/// Equal boundaries balance; a mismatch leaves the bus unbalanced.
+fn prove_verify_memory(l2g_boundary: &[CellBoundary], memw_boundary: &[CellBoundary]) -> bool {
+    let proof_options = ProofOptions::default_test_options();
+    let l2g = l2g_memory_air(&proof_options);
+    let memw = memw_sub_air(&proof_options);
+    let mut l2g_trace = generate_local_to_global_trace(l2g_boundary);
+    let mut memw_trace = memw_sub_trace(memw_boundary);
+    let pairs: Vec<(
+        &dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>,
+        _,
+        _,
+    )> = vec![(&l2g, &mut l2g_trace, &()), (&memw, &mut memw_trace, &())];
+    let proof = multi_prove_ram(pairs, &mut DefaultTranscript::<E>::new(&[])).unwrap();
+    let airs: Vec<&dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>> = vec![&l2g, &memw];
+    Verifier::multi_verify(
+        &airs,
+        &proof,
+        &mut DefaultTranscript::<E>::new(&[]),
+        &FieldElement::zero(),
+    )
+}
+
+fn prove_verify_l2g_range_with_trace(
+    l2g_trace: &mut TraceTable<F, E>,
+    range_ops: &[BitwiseOperation],
+    epoch_label: u64,
+) -> bool {
+    let proof_options = ProofOptions::default_test_options();
+    let l2g = l2g_range_air(&proof_options, epoch_label);
+    let receiver = range_receiver_air(&proof_options);
+    let mut receiver_trace = range_receiver_trace(range_ops);
+    let pairs: Vec<(
+        &dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>,
+        _,
+        _,
+    )> = vec![
+        (&l2g, l2g_trace, &()),
+        (&receiver, &mut receiver_trace, &()),
+    ];
+    let proof = multi_prove_ram(pairs, &mut DefaultTranscript::<E>::new(&[])).unwrap();
+    let airs: Vec<&dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>> =
+        vec![&l2g, &receiver];
+    Verifier::multi_verify(
+        &airs,
+        &proof,
+        &mut DefaultTranscript::<E>::new(&[]),
+        &FieldElement::zero(),
+    )
+}
+
+/// Inert L2G AIR: commits the trace columns with no bus and no constraints —
+/// the deterministic commitment an epoch proof publishes for its L2G table. The
+/// main-trace Merkle root is over the main columns only, so it matches the L2G
+/// sub-table root committed in the bus proof.
+fn inert_l2g_air(
+    proof_options: &ProofOptions,
+) -> AirWithBuses<F, E, NullBoundaryConstraintBuilder, ()> {
+    let transition_constraints: Vec<Box<dyn TransitionConstraintEvaluator<F, E>>> = vec![];
+    AirWithBuses::new(
+        local_to_global::cols::NUM_COLUMNS,
+        AuxiliaryTraceBuildData {
+            interactions: vec![],
+        },
+        proof_options,
+        1,
+        transition_constraints,
+    )
+}
+
+/// Commit one epoch's L2G trace in a minimal proof and return its Merkle root —
+/// the `R_i` an epoch proof publishes for that epoch.
+fn l2g_root(boundary: &[CellBoundary]) -> Commitment {
+    let proof_options = ProofOptions::default_test_options();
+    let air = inert_l2g_air(&proof_options);
+    let mut trace = generate_local_to_global_trace(boundary);
+    let pairs: Vec<(
+        &dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>,
+        _,
+        _,
+    )> = vec![(&air, &mut trace, &())];
+    let proof = multi_prove_ram(pairs, &mut DefaultTranscript::<E>::new(&[])).unwrap();
+    proof.proofs[0].lde_trace_main_merkle_root
+}
+
+/// Prove the cross-epoch GlobalMemory bus over one L2G sub-table per epoch plus
+/// the genesis/program-end anchors. The first N sub-tables (epoch order) are the
+/// per-epoch L2G tables.
+pub(crate) fn prove_global(boundaries: &[Vec<CellBoundary>]) -> MultiProof<F, E, ()> {
+    let all: Vec<CellBoundary> = boundaries.iter().flatten().copied().collect();
+
+    // Genesis anchor: a SEND token for each cell first touched from program memory.
+    let genesis: Vec<Token> = all
+        .iter()
+        .filter(|b| b.init.originating_epoch == GENESIS_EPOCH)
+        .map(|b| (b.address, b.init.value, b.init.originating_epoch))
+        .collect();
+
+    // Program-end anchor: a RECEIVE token for each cell's final fini (epochs are
+    // in order, so the last write wins).
+    let mut final_fini: HashMap<u64, Token> = HashMap::new();
+    for epoch in boundaries {
+        for b in epoch {
+            final_fini.insert(b.address, (b.address, b.fini.value, b.fini.epoch));
+        }
+    }
+    let program_end: Vec<Token> = final_fini.into_values().collect();
+
+    let mut l2g_traces: Vec<TraceTable<F, E>> = boundaries
+        .iter()
+        .map(|epoch| generate_local_to_global_trace(epoch))
+        .collect();
+    let mut genesis_trace = anchor_trace(&genesis);
+    let mut program_end_trace = anchor_trace(&program_end);
+
+    let proof_options = ProofOptions::default_test_options();
+    // One L2G air per epoch, each carrying its 1-based `fini_epoch` constant.
+    let l2g_airs: Vec<_> = (0..boundaries.len())
+        .map(|i| l2g_air(&proof_options, local_to_global::epoch_label(i as u64)))
+        .collect();
+    let genesis_anchor = anchor_air(&proof_options, true);
+    let program_end_anchor = anchor_air(&proof_options, false);
+
+    // Per-epoch L2G sub-tables (each with its own air), then the anchors.
+    let mut air_trace_pairs: Vec<(
+        &dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>,
+        _,
+        _,
+    )> = l2g_airs
+        .iter()
+        .zip(l2g_traces.iter_mut())
+        .map(|(air, trace)| {
+            (
+                air as &dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>,
+                trace,
+                &(),
+            )
+        })
+        .collect();
+    air_trace_pairs.push((&genesis_anchor, &mut genesis_trace, &()));
+    air_trace_pairs.push((&program_end_anchor, &mut program_end_trace, &()));
+
+    multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::<E>::new(&[])).unwrap()
+}
+
+pub(crate) fn prove_and_verify(boundaries: &[Vec<CellBoundary>]) -> bool {
+    let proof = prove_global(boundaries);
+
+    let proof_options = ProofOptions::default_test_options();
+    let l2g_airs: Vec<_> = (0..boundaries.len())
+        .map(|i| l2g_air(&proof_options, local_to_global::epoch_label(i as u64)))
+        .collect();
+    let genesis_anchor = anchor_air(&proof_options, true);
+    let program_end_anchor = anchor_air(&proof_options, false);
+
+    // air_refs must match the air_trace_pairs order: one L2G air per epoch, then anchors.
+    let mut airs: Vec<&dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>> = l2g_airs
+        .iter()
+        .map(|a| a as &dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>)
+        .collect();
+    airs.push(&genesis_anchor);
+    airs.push(&program_end_anchor);
+
+    Verifier::multi_verify(
+        &airs,
+        &proof,
+        &mut DefaultTranscript::<E>::new(&[]),
+        &FieldElement::zero(),
+    )
+}
+
+#[test]
+fn test_global_memory_bus_balances() {
+    // Cell 10 touched in epochs 0,1,2; cell 20 in epoch 0 then again epoch 2
+    // (skipping 1); cell 30 once.
+    let initial_memory = HashMap::from([(10u64, 5u64)]);
+    let epochs = vec![
+        vec![(10, 7, 3), (20, 9, 4)],
+        vec![(10, 8, 10)],
+        vec![(20, 9, 20), (30, 1, 21)],
+    ];
+    let boundaries = epoch_boundaries(&initial_memory, &epochs);
+    assert!(prove_and_verify(&boundaries));
+}
+
+#[test]
+fn test_global_memory_bus_rejects_tampered_boundary() {
+    let initial_memory = HashMap::from([(10u64, 5u64)]);
+    let epochs = vec![vec![(10, 7, 3)], vec![(10, 8, 10)]];
+    let mut boundaries = epoch_boundaries(&initial_memory, &epochs);
+    assert!(prove_and_verify(&boundaries));
+
+    // Break the chain: epoch 0 now claims a different fini than epoch 1's init.
+    boundaries[0][0].fini.value = 999;
+    assert!(!prove_and_verify(&boundaries));
+}
+
+#[test]
+fn test_l2g_binding_holds() {
+    // Per-epoch L2G roots committed by the epoch proofs match the per-epoch L2G
+    // sub-table roots in the final cross-epoch proof.
+    let initial_memory = HashMap::from([(10u64, 5u64)]);
+    let epochs = vec![
+        vec![(10, 7, 3), (20, 9, 4)],
+        vec![(10, 8, 10)],
+        vec![(20, 9, 20), (30, 1, 21)],
+    ];
+    let boundaries = epoch_boundaries(&initial_memory, &epochs);
+
+    let final_proof = prove_global(&boundaries);
+    let roots: Vec<Commitment> = boundaries.iter().map(|b| l2g_root(b)).collect();
+
+    assert!(crate::verify_l2g_commitment_binding(&roots, &final_proof));
+}
+
+#[test]
+fn test_l2g_binding_rejects_mismatch() {
+    // The final proof uses a DIFFERENT epoch-0 L2G table than the epoch proofs
+    // committed, so the binding must reject it.
+    let initial_memory = HashMap::from([(10u64, 5u64)]);
+    let epochs = vec![
+        vec![(10, 7, 3), (20, 9, 4)],
+        vec![(10, 8, 10)],
+        vec![(20, 9, 20), (30, 1, 21)],
+    ];
+    let boundaries = epoch_boundaries(&initial_memory, &epochs);
+
+    // Honest per-epoch roots.
+    let roots: Vec<Commitment> = boundaries.iter().map(|b| l2g_root(b)).collect();
+
+    // Final proof built over a tampered epoch-0 L2G table.
+    let mut tampered = boundaries.clone();
+    tampered[0][0].fini.value = 999;
+    let final_proof = prove_global(&tampered);
+
+    assert!(!crate::verify_l2g_commitment_binding(&roots, &final_proof));
+}
+
+// =========================================================================
+// Helpers for soundness regression tests
+// =========================================================================
+
+/// Like `prove_verify_memory` but accepts a pre-built (possibly mutated)
+/// l2g trace instead of deriving it from a boundary slice.
+///
+/// Used by tests that forge individual columns (MU, epoch halfwords) after
+/// trace generation — the mutation must survive into the proof so the
+/// verifier sees the forged commitment.
+fn prove_verify_memory_with_trace(
+    l2g_trace: &mut TraceTable<F, E>,
+    memw_boundary: &[CellBoundary],
+) -> bool {
+    let proof_options = ProofOptions::default_test_options();
+    let l2g = l2g_memory_air(&proof_options);
+    let memw = memw_sub_air(&proof_options);
+    let mut memw_trace = memw_sub_trace(memw_boundary);
+    let pairs: Vec<(
+        &dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>,
+        _,
+        _,
+    )> = vec![(&l2g, l2g_trace, &()), (&memw, &mut memw_trace, &())];
+    let proof = multi_prove_ram(pairs, &mut DefaultTranscript::<E>::new(&[])).unwrap();
+    let airs: Vec<&dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>> = vec![&l2g, &memw];
+    Verifier::multi_verify(
+        &airs,
+        &proof,
+        &mut DefaultTranscript::<E>::new(&[]),
+        &FieldElement::zero(),
+    )
+}
+
+/// Like `prove_global` (and `prove_and_verify`) but accepts pre-built l2g
+/// traces (one per epoch) so that column mutations applied before this call
+/// survive into the proof.
+///
+/// Returns `true` iff the multi-table verifier accepts the proof.
+fn prove_and_verify_global_with_traces(
+    boundaries: &[Vec<CellBoundary>],
+    l2g_traces: &mut [TraceTable<F, E>],
+) -> bool {
+    let all: Vec<CellBoundary> = boundaries.iter().flatten().copied().collect();
+
+    let genesis: Vec<Token> = all
+        .iter()
+        .filter(|b| b.init.originating_epoch == GENESIS_EPOCH)
+        .map(|b| (b.address, b.init.value, b.init.originating_epoch))
+        .collect();
+
+    let mut final_fini: HashMap<u64, Token> = HashMap::new();
+    for epoch in boundaries {
+        for b in epoch {
+            final_fini.insert(b.address, (b.address, b.fini.value, b.fini.epoch));
+        }
+    }
+    let program_end: Vec<Token> = final_fini.into_values().collect();
+
+    let mut genesis_trace = anchor_trace(&genesis);
+    let mut program_end_trace = anchor_trace(&program_end);
+
+    let proof_options = ProofOptions::default_test_options();
+    let l2g_airs: Vec<_> = (0..boundaries.len())
+        .map(|i| l2g_air(&proof_options, local_to_global::epoch_label(i as u64)))
+        .collect();
+    let genesis_anchor = anchor_air(&proof_options, true);
+    let program_end_anchor = anchor_air(&proof_options, false);
+
+    let mut air_trace_pairs: Vec<(
+        &dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>,
+        _,
+        _,
+    )> = l2g_airs
+        .iter()
+        .zip(l2g_traces.iter_mut())
+        .map(|(air, trace)| {
+            (
+                air as &dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>,
+                trace,
+                &(),
+            )
+        })
+        .collect();
+    air_trace_pairs.push((&genesis_anchor, &mut genesis_trace, &()));
+    air_trace_pairs.push((&program_end_anchor, &mut program_end_trace, &()));
+
+    let proof = multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::<E>::new(&[])).unwrap();
+
+    let mut airs: Vec<&dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>> = l2g_airs
+        .iter()
+        .map(|a| a as &dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>)
+        .collect();
+    airs.push(&genesis_anchor);
+    airs.push(&program_end_anchor);
+
+    Verifier::multi_verify(
+        &airs,
+        &proof,
+        &mut DefaultTranscript::<E>::new(&[]),
+        &FieldElement::zero(),
+    )
+}
+
+// =========================================================================
+// Soundness regression tests: MU selector (Design X / Statement S)
+// =========================================================================
+
+/// (1a) MU=0 on a real row silences its Memory-bus tokens → the bus dangles.
+///
+/// Property guarded: the `MU` selector gates EVERY L2G interaction on the
+/// epoch-local Memory bus. Clearing MU on a genuinely-touched cell means its
+/// init-receive and fini-send never fire; the MEMW-substitute chain still
+/// sends/receives for that cell, leaving both tokens unmatched → bus
+/// imbalance → proof must fail.
+///
+/// Modelled on `test_local_memory_bus_rejects_tamper` (same Memory-bus path)
+/// extended to mutate MU rather than a value column, using the new
+/// `prove_verify_memory_with_trace` helper.
+#[test]
+fn test_l2g_mu_zero_on_real_row_rejects() {
+    // Two touched cells; row 0 is real (MU=1). We forge row 0's MU to 0.
+    let initial_memory = HashMap::from([(10u64, 5u64)]);
+    let epochs = vec![vec![(10, 7, 3), (20, 9, 4)]];
+    let boundaries = epoch_boundaries(&initial_memory, &epochs);
+
+    // Honest round-trip must pass.
+    assert!(
+        prove_verify_memory(&boundaries[0], &boundaries[0]),
+        "baseline must verify before forgery"
+    );
+
+    // Forge: clear MU on the first real row.
+    let mut forged_trace = generate_local_to_global_trace(&boundaries[0]);
+    forged_trace
+        .main_table
+        .set(0, local_to_global::cols::MU, FE::zero());
+
+    // The Memory bus is now unbalanced: L2G's init-receive and fini-send for
+    // cell 10 are silenced (MU=0), but the MEMW-substitute sends cell 10's
+    // init and expects its fini — neither token finds its counterpart.
+    assert!(
+        !prove_verify_memory_with_trace(&mut forged_trace, &boundaries[0]),
+        "MU=0 on a real row must cause the Memory bus to reject"
+    );
+}
+
+/// (1b) MU=1 on a padding row injects phantom tokens → the GlobalMemory bus
+/// cannot balance.
+///
+/// Property guarded: same Design-X property, opposite direction. A padding row
+/// with MU=1 fires a spurious init-receive and fini-send on the GlobalMemory
+/// bus. The two phantom tokens carry different values — the init token carries
+/// originating_epoch=0 (zero-filled padding) while the fini token carries
+/// `fini_epoch=epoch_label` (the per-table constant, always ≥ 1). Because the
+/// epoch field differs, the phantom receive and send do NOT self-cancel; neither
+/// the genesis anchor nor the program-end anchor has a matching row for address 0
+/// → both tokens dangle → bus imbalance → proof fails.
+///
+/// Note: the epoch-local Memory bus would NOT catch this because the phantom
+/// row's init and fini tokens are identical (all columns zero) and self-cancel
+/// in the LogUp. The GlobalMemory bus carries the epoch constant in the fini
+/// token but not the init token, breaking the self-cancellation.
+///
+/// Three real boundaries pad to four rows; row 3 is the padding row (all-zero).
+/// Uses `prove_and_verify_global_with_traces` (same path as test 1c and test 3).
+#[test]
+fn test_l2g_mu_one_on_padding_row_rejects_global_bus() {
+    // 3 real rows → 4-row trace (padding row at index 3).
+    let initial_memory = HashMap::new();
+    let epochs = vec![vec![(10, 7, 3), (20, 9, 4), (30, 1, 5)]];
+    let boundaries = epoch_boundaries(&initial_memory, &epochs);
+    assert_eq!(boundaries[0].len(), 3, "expect 3 real rows");
+
+    // Honest baseline on the GlobalMemory bus.
+    assert!(
+        prove_and_verify(&boundaries),
+        "baseline must verify before forgery"
+    );
+
+    // Forge: set MU=1 on the padding row (row 3, all-zero columns).
+    let mut forged_trace = generate_local_to_global_trace(&boundaries[0]);
+    let num_rows = forged_trace.num_rows();
+    assert_eq!(num_rows, 4, "trace must be padded to 4 rows");
+    forged_trace
+        .main_table
+        .set(3, local_to_global::cols::MU, FE::one());
+    let mut l2g_traces = vec![forged_trace];
+
+    // The phantom row fires on the GlobalMemory bus:
+    //   - init-receive: epoch=0 (zero-filled), addr=0 — no genesis anchor row sends this.
+    //   - fini-send: epoch=epoch_label=1, addr=0 — no program-end anchor receives this.
+    // The two tokens differ in the epoch field, so they do not self-cancel.
+    assert!(
+        !prove_and_verify_global_with_traces(&boundaries, &mut l2g_traces),
+        "MU=1 on a padding row must cause the GlobalMemory bus to reject"
+    );
+}
+
+/// (1c) MU=2 (non-boolean) on a real row unbalances the GlobalMemory bus.
+///
+/// Property guarded: MU is the LogUp multiplicity for ALL bus interactions.
+/// With MU=2 the fini-sender fires twice but the program-end anchor receives
+/// only once, and the init-receiver fires twice but the genesis anchor sends
+/// only once → both sides of the GlobalMemory bus are off by 1 → proof fails.
+///
+/// Uses `prove_and_verify_global_with_traces` (forked from `prove_global`)
+/// to inject the pre-mutated trace. Modelled on
+/// `test_prove_elfs_ecsm_forged_ecdas_mu_rejected` (prove_elfs_tests.rs:1230)
+/// for the "set MU to 2, assert reject" pattern.
+#[test]
+fn test_l2g_mu_nonboolean_rejects_global_bus() {
+    let initial_memory = HashMap::from([(10u64, 5u64)]);
+    let epochs = vec![vec![(10, 7, 3)]];
+    let boundaries = epoch_boundaries(&initial_memory, &epochs);
+
+    // Honest baseline on the GlobalMemory bus.
+    assert!(
+        prove_and_verify(&boundaries),
+        "baseline must verify before forgery"
+    );
+
+    // Forge: set MU=2 on row 0 of epoch 0's L2G trace.
+    let mut l2g_trace = generate_local_to_global_trace(&boundaries[0]);
+    l2g_trace
+        .main_table
+        .set(0, local_to_global::cols::MU, FE::from(2u64));
+    let mut l2g_traces = vec![l2g_trace];
+
+    // Multiplicity 2 on both the init-receiver and fini-sender; genesis and
+    // program-end anchors only send/receive multiplicity 1 → bus imbalance.
+    assert!(
+        !prove_and_verify_global_with_traces(&boundaries, &mut l2g_traces),
+        "MU=2 (non-boolean) must cause the GlobalMemory bus to reject"
+    );
+}
+
+// =========================================================================
+// Soundness regression tests: init_epoch ordering (IsB20)
+// =========================================================================
+
+/// (2) Forged init_epoch violating the ordering constraint is rejected.
+///
+/// Property guarded: `init_epoch < fini_epoch` is enforced via an IsB20
+/// lookup on `fini_epoch − 1 − init_epoch`. A forged row that claims
+/// `init_epoch >= fini_epoch` causes the difference to underflow in the
+/// field to a value far outside [0, 2^20); no matching IsB20 row exists in
+/// the BITWISE table, so the range-check bus cannot balance and the proof
+/// must fail.
+///
+/// The ordering check lives on `range_check_interactions`, which is wired to
+/// the BITWISE table inside the epoch proof. The epoch-local `l2g_memory_air`
+/// in this test file does NOT include `range_check_interactions` — it only
+/// covers the Memory bus. The full range-check path (with a live BITWISE
+/// table) is exercised inside the epoch prover in `continuation.rs`
+/// (`l2g_memory_air` there concatenates both, see line 155-159). Wiring the
+/// complete BITWISE sub-proof here would require replicating `prove_epoch`'s
+/// full table set, which is out of scope for a unit bus test.
+///
+/// This test asserts the arithmetic property that makes the attack fail.
+/// `test_ordering_rejects_future_reference` in
+/// `local_to_global.rs::tests` (line 831) already verifies that the field
+/// value `fini_epoch − 1 − init_epoch` wraps to a value ≥ 2^20 for both
+/// self-references and future-references, so no IsB20 row matches. The
+/// proof-level bus path is covered by
+/// `test_l2g_init_epoch_ordering_live_is_b20_rejects` below.
+///
+/// Variants that ARE expressible without the full bitwise table:
+///   - Self-reference (init_epoch == fini_epoch) and future-reference
+///     (init_epoch > fini_epoch) are both covered by the arithmetic check.
+///   - The GlobalMemory bus itself does NOT enforce the ordering; it only
+///     checks that tokens match across epochs. The IsB20 sender is wired
+///     exclusively on the epoch-local table (which carries the BITWISE provider).
+///
+/// The paired live-bus test wires an L2G range-check AIR to a minimal BITWISE
+/// receiver table and proves that a self-reference rejects through IsB20.
+#[test]
+fn test_l2g_init_epoch_ordering_field_arithmetic() {
+    // Verify the arithmetic property that underlies the IsB20 soundness argument
+    // without running a full proof. The ordering sender computes:
+    //   fini_epoch − 1 − init_epoch   (in the Goldilocks field)
+    // For an honest row this is a small non-negative integer in [0, 2^20).
+    // For a forged row it wraps to a huge field value outside [0, 2^20).
+
+    let order_field_value = |fini_label: u64, init_epoch: u64| -> u64 {
+        // Replicate the field arithmetic: FE::from(fini_label - 1) - FE::from(init_epoch).
+        // The Goldilocks prime is 2^64 - 2^32 + 1.
+        let result = FE::from(fini_label - 1) - FE::from(init_epoch);
+        *result.value()
+    };
+
+    // Honest: epoch 2 consuming genesis (epoch 0) fini → 2 - 1 - 0 = 1.
+    assert!(order_field_value(2, GENESIS_EPOCH) < (1 << 20));
+
+    // Honest: epoch 5 consuming epoch 2's fini → 5 - 1 - 2 = 2.
+    assert!(order_field_value(5, 2) < (1 << 20));
+
+    // Forged self-reference: init_epoch == fini_epoch → 5 - 1 - 5 = -1 in field.
+    let self_ref = order_field_value(5, 5);
+    assert!(
+        self_ref >= (1 << 20),
+        "self-reference must produce a value outside the IsB20 range (got {self_ref})"
+    );
+
+    // Forged future-reference: init_epoch > fini_epoch → 5 - 1 - 9 < 0 in field.
+    let future_ref = order_field_value(5, 9);
+    assert!(
+        future_ref >= (1 << 20),
+        "future-reference must produce a value outside the IsB20 range (got {future_ref})"
+    );
+}
+
+#[test]
+fn test_l2g_init_epoch_ordering_live_is_b20_rejects() {
+    // Epoch 1 consumes epoch 0's fini for cell 10. Honest ordering has
+    // init_epoch=1, fini_epoch=2, so 2 - 1 - 1 = 0 is a valid IsB20 lookup.
+    let initial_memory = HashMap::new();
+    let epochs = vec![vec![(10, 7, 3)], vec![(10, 8, 10)]];
+    let boundaries = epoch_boundaries(&initial_memory, &epochs);
+    let boundary = &boundaries[1];
+    let epoch_label = boundary[0].fini.epoch;
+    assert_eq!(epoch_label, 2);
+
+    let mut honest_trace = generate_local_to_global_trace(boundary);
+    let honest_ops = local_to_global::collect_bitwise_from_l2g(boundary);
+    assert!(
+        prove_verify_l2g_range_with_trace(&mut honest_trace, &honest_ops, epoch_label),
+        "honest L2G range checks must balance against BITWISE receivers"
+    );
+
+    // Forge a self-reference: init_epoch == fini_epoch. The halfword lookups are
+    // still satisfiable, so the receiver table below includes them. The missing
+    // piece is exactly IsB20[2 - 1 - 2], which underflows in the field and has no
+    // valid 20-bit receiver row.
+    let mut forged_trace = generate_local_to_global_trace(boundary);
+    forged_trace.main_table.set(
+        0,
+        local_to_global::cols::INIT_EPOCH_0,
+        FE::from(epoch_label),
+    );
+    forged_trace
+        .main_table
+        .set(0, local_to_global::cols::INIT_EPOCH_1, FE::zero());
+
+    let cell = boundary[0];
+    let forged_ops = vec![
+        BitwiseOperation::byte_op(
+            BitwiseOperationType::AreBytes,
+            (cell.init.value & 0xFF) as u8,
+            (cell.fini.value & 0xFF) as u8,
+        ),
+        BitwiseOperation::halfword(
+            BitwiseOperationType::IsHalf,
+            (epoch_label & 0xFF) as u8,
+            ((epoch_label >> 8) & 0xFF) as u8,
+        ),
+        BitwiseOperation::halfword(BitwiseOperationType::IsHalf, 0, 0),
+    ];
+    assert!(
+        !prove_verify_l2g_range_with_trace(&mut forged_trace, &forged_ops, epoch_label),
+        "self-referential init_epoch must fail through the live IsB20 bus"
+    );
+}
+
+// =========================================================================
+// Soundness regression tests: Design-Y orphan attack
+// =========================================================================
+
+/// (3) Design-Y orphan attack: MU=0 on a later epoch's L2G row truncates the
+/// cross-epoch chain → the GlobalMemory bus rejects.
+///
+/// Property guarded: setting MU=0 on an L2G row for epoch i+1 silences that
+/// epoch's fini-send on the GlobalMemory bus. If the global finalisation
+/// (program-end anchor) still expects the last fini to come from epoch i+1,
+/// the fini token is never sent → program-end anchor receives a token that
+/// nobody sent → bus imbalance.
+///
+/// Concretely: cell 10 is touched in both epoch 0 (label 1) and epoch 1
+/// (label 2). The forged trace sets MU=0 on epoch 1's L2G row for cell 10.
+/// Epoch 1's fini-send is silenced; the program-end anchor still tries to
+/// receive `(10, 8, 2, 10)` (the last honest fini) — but it was never sent.
+/// Separately, epoch 1's init-receive is also silenced, leaving epoch 0's
+/// fini token (which epoch 1 was supposed to consume) dangling. Both
+/// produce bus imbalances.
+///
+/// Modelled on `test_global_memory_bus_rejects_tampered_boundary` (which
+/// tampers a boundary value) and uses the new
+/// `prove_and_verify_global_with_traces` helper to inject the forged epoch-1
+/// trace. `prove_and_verify` (which generates its own traces) is used for the
+/// baseline check; the forged proof is built via the helper.
+#[test]
+fn test_l2g_design_y_orphan_mu_zero_rejects() {
+    // Cell 10 touched in epoch 0 (label 1, fini value=7, ts=3) and epoch 1
+    // (label 2, fini value=8, ts=10). Cell 20 touched in epoch 0 only.
+    let initial_memory = HashMap::from([(10u64, 5u64)]);
+    let epochs = vec![vec![(10, 7, 3), (20, 9, 4)], vec![(10, 8, 10)]];
+    let boundaries = epoch_boundaries(&initial_memory, &epochs);
+
+    // Honest baseline on the GlobalMemory bus.
+    assert!(
+        prove_and_verify(&boundaries),
+        "baseline must verify before forgery"
+    );
+
+    // Build honest traces for both epochs.
+    let epoch0_trace = generate_local_to_global_trace(&boundaries[0]);
+    let mut epoch1_trace = generate_local_to_global_trace(&boundaries[1]);
+
+    // Epoch 1 has exactly one real row (cell 10). Forge MU=0 on that row.
+    // This orphans cell 10's cross-epoch chain at epoch 1: the init-receive
+    // (consuming epoch 0's fini token for cell 10) and the fini-send (which
+    // the program-end anchor expects to receive) both fire with multiplicity 0.
+    assert_eq!(
+        boundaries[1].len(),
+        1,
+        "epoch 1 must have exactly one real row"
+    );
+    epoch1_trace
+        .main_table
+        .set(0, local_to_global::cols::MU, FE::zero());
+
+    let mut l2g_traces = vec![epoch0_trace, epoch1_trace];
+
+    // The GlobalMemory bus cannot balance:
+    //   - Epoch 0's fini token for cell 10 was sent (epoch 0's MU=1) but not
+    //     consumed by epoch 1 (epoch 1's init-receive is silenced → MU=0).
+    //   - The program-end anchor tries to receive epoch 1's fini for cell 10
+    //     (the last honest value), but that fini-send is also silenced.
+    assert!(
+        !prove_and_verify_global_with_traces(&boundaries, &mut l2g_traces),
+        "MU=0 on a later epoch's L2G row (Design-Y orphan) must cause the GlobalMemory bus to reject"
+    );
+}
+
+// =========================================================================
+// Soundness regression tests: private-input continuation
+// =========================================================================
+
+/// (4) Private-input continuation: `test_private_input_xpage` spans multiple
+/// epochs and verifies with non-empty private inputs.
+///
+/// Property guarded: the continuation prover correctly handles private-input
+/// pages (which are touched in the first epoch and potentially persist across
+/// epoch boundaries) and the resulting multi-epoch L2G chain verifies end-to-end.
+///
+/// The fixture reads 16 bytes of private input from 0xFF000000, then commits
+/// bytes 4..12 (8 bytes after the 4-byte length prefix). With `epoch_size_log2=2`
+/// (4 cycles) the 11-cycle program spans three epochs: epoch 0 reads the private-input
+/// page (touching 0xFF000000..), epoch 1 performs the commit syscall, epoch 2
+/// halts. The private-input page's L2G entry (epoch 0 fini → epoch 1+ init)
+/// is the cross-epoch link under test.
+///
+/// Modelled on `continuation::tests::test_prove_and_verify_continuation`
+/// (continuation.rs:896) and `prove_elfs_tests::test_prove_private_input_xpage`
+/// (prove_elfs_tests.rs:2649).
+#[test]
+fn test_continuation_private_input_spans_epochs() {
+    let elf_bytes = crate::test_utils::asm_elf_bytes("test_private_input_xpage");
+
+    // 16-byte private input: 4-byte length prefix (=16) + 8 bytes of payload
+    // that will be committed + 4 padding bytes (the fixture commits bytes 4..12).
+    let mut input: Vec<u8> = Vec::with_capacity(16);
+    // Length prefix: 16 as little-endian u32.
+    input.extend_from_slice(&16u32.to_le_bytes());
+    // 8-byte payload that will be committed.
+    input.extend_from_slice(&[0x11u8, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88]);
+    // 4 trailing padding bytes (not committed).
+    input.extend_from_slice(&[0x00u8, 0x00, 0x00, 0x00]);
+    assert_eq!(input.len(), 16);
+
+    let result = crate::continuation::prove_and_verify_continuation(
+        &elf_bytes,
+        &input,
+        2,
+        &ProofOptions::default_test_options(),
+    );
+
+    // The continuation must prove and verify without error.
+    let output = result.expect("prove_and_verify_continuation must not error");
+
+    // The fixture commits bytes 4..12 of private input (the 8-byte payload).
+    assert_eq!(
+        output.as_deref(),
+        Some(&input[4..12]),
+        "committed output must equal private input bytes 4..12"
+    );
+}
+
+#[test]
+fn test_local_memory_bus_balances() {
+    // For each touched byte, L2G's init-receive (ts=0) + fini-send cancel the
+    // MEMW chain's init-send + fini-receive: the epoch-local Memory bus balances.
+    let initial_memory = HashMap::from([(10u64, 5u64)]);
+    let epochs = vec![vec![(10, 7, 3), (20, 9, 4)]];
+    let boundaries = epoch_boundaries(&initial_memory, &epochs);
+    assert!(prove_verify_memory(&boundaries[0], &boundaries[0]));
+}
+
+#[test]
+fn test_local_memory_bus_rejects_tamper() {
+    // L2G claims the real fini value but the access chain ends on a different
+    // one — the Memory bus no longer balances.
+    let initial_memory = HashMap::from([(10u64, 5u64)]);
+    let epochs = vec![vec![(10, 7, 3)]];
+    let boundaries = epoch_boundaries(&initial_memory, &epochs);
+    assert!(prove_verify_memory(&boundaries[0], &boundaries[0]));
+
+    let mut tampered = boundaries[0].clone();
+    tampered[0].fini.value = 999;
+    assert!(!prove_verify_memory(&boundaries[0], &tampered));
+}
diff --git a/prover/src/tests/mod.rs b/prover/src/tests/mod.rs
index 4d0ac4477..9b32e3b8c 100644
--- a/prover/src/tests/mod.rs
+++ b/prover/src/tests/mod.rs
@@ -43,6 +43,8 @@ pub mod keccak_rnd_tests;
 #[cfg(test)]
 pub mod load_tests;
 #[cfg(test)]
+pub mod local_to_global_bus_tests;
+#[cfg(test)]
 pub mod lt_bus_tests;
 #[cfg(test)]
 pub mod lt_tests;
diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs
index a52383341..10013b5ed 100644
--- a/prover/src/tests/prove_elfs_tests.rs
+++ b/prover/src/tests/prove_elfs_tests.rs
@@ -59,6 +59,9 @@ fn prove_and_verify_vm_minimal(elf: &Elf, traces: &mut Traces) -> bool {
         &traces.page_configs,
         &table_counts,
         None,
+        true,
+        None,
+        None,
         None,
     );
 
@@ -77,6 +80,7 @@ fn prove_and_verify_vm_minimal(elf: &Elf, traces: &mut Traces) -> bool {
         &airs.air_refs(),
         &multi_proof,
         &traces.public_output_bytes,
+        0,
         &mut replay_transcript,
     )
     .expect("fingerprint collision in test");
@@ -110,6 +114,9 @@ fn prove_vm_minimal(elf_bytes: &[u8], private_inputs: &[u8], max_rows: &MaxRowsC
         &traces.page_configs,
         &table_counts,
         None,
+        true,
+        None,
+        None,
         None,
     );
     let runtime_page_ranges = traces.runtime_page_ranges();
@@ -150,6 +157,9 @@ fn verify_vm_minimal(vm_proof: &VmProof, elf_bytes: &[u8]) -> bool {
         &page_configs,
         &vm_proof.table_counts,
         None,
+        true,
+        None,
+        None,
         None,
     );
     let air_refs = airs.air_refs();
@@ -158,6 +168,7 @@ fn verify_vm_minimal(vm_proof: &VmProof, elf_bytes: &[u8]) -> bool {
         &air_refs,
         &vm_proof.proof,
         &vm_proof.public_output,
+        0,
         &mut replay_transcript,
     )
     .expect("fingerprint collision in test");
@@ -1337,6 +1348,9 @@ fn test_prove_elfs_test_commit_4_wrong_pages_rejected() {
         &traces.page_configs,
         &table_counts,
         None,
+        true,
+        None,
+        None,
         None,
     );
     let proof = multi_prove_ram(
@@ -1354,6 +1368,9 @@ fn test_prove_elfs_test_commit_4_wrong_pages_rejected() {
         &wrong_configs,
         &table_counts,
         None,
+        true,
+        None,
+        None,
         None,
     );
     let verifier_air_refs = verifier_airs.air_refs();
@@ -1362,6 +1379,7 @@ fn test_prove_elfs_test_commit_4_wrong_pages_rejected() {
         &verifier_air_refs,
         &proof,
         &traces.public_output_bytes,
+        0,
         &mut replay_transcript,
     )
     .expect("fingerprint collision in test");
@@ -2086,6 +2104,9 @@ fn test_deep_stack_runtime_pages_roundtrip() {
         &traces.page_configs,
         &table_counts,
         None,
+        true,
+        None,
+        None,
         None,
     );
     let proof = multi_prove_ram(
@@ -2102,6 +2123,9 @@ fn test_deep_stack_runtime_pages_roundtrip() {
         &verifier_configs,
         &table_counts,
         None,
+        true,
+        None,
+        None,
         None,
     );
     let verifier_air_refs = verifier_airs.air_refs();
@@ -2110,6 +2134,7 @@ fn test_deep_stack_runtime_pages_roundtrip() {
         &verifier_air_refs,
         &proof,
         &traces.public_output_bytes,
+        0,
         &mut replay_transcript,
     )
     .expect("fingerprint collision in test");
@@ -2152,6 +2177,9 @@ fn test_deep_stack_missing_pages_rejected() {
         &traces.page_configs,
         &table_counts,
         None,
+        true,
+        None,
+        None,
         None,
     );
     let proof = multi_prove_ram(
@@ -2168,6 +2196,9 @@ fn test_deep_stack_missing_pages_rejected() {
         &wrong_configs,
         &table_counts,
         None,
+        true,
+        None,
+        None,
         None,
     );
     let verifier_air_refs = verifier_airs.air_refs();
@@ -2176,6 +2207,7 @@ fn test_deep_stack_missing_pages_rejected() {
         &verifier_air_refs,
         &proof,
         &traces.public_output_bytes,
+        0,
         &mut replay_transcript,
     )
     .expect("fingerprint collision in test");
@@ -2253,6 +2285,9 @@ fn test_heap_alloc_runtime_pages_roundtrip() {
         &traces.page_configs,
         &table_counts,
         None,
+        true,
+        None,
+        None,
         None,
     );
     let proof = multi_prove_ram(
@@ -2269,6 +2304,9 @@ fn test_heap_alloc_runtime_pages_roundtrip() {
         &verifier_configs,
         &table_counts,
         None,
+        true,
+        None,
+        None,
         None,
     );
     let verifier_air_refs = verifier_airs.air_refs();
@@ -2277,6 +2315,7 @@ fn test_heap_alloc_runtime_pages_roundtrip() {
         &verifier_air_refs,
         &proof,
         &traces.public_output_bytes,
+        0,
         &mut replay_transcript,
     )
     .expect("fingerprint collision in test");
@@ -2427,7 +2466,18 @@ fn test_crafted_zero_count_proof_must_not_verify() {
         store: 0,
         cpu32: 0,
     };
-    let airs = VmAirs::new(&elf, &proof_options, true, &[], &zero_counts, None, None);
+    let airs = VmAirs::new(
+        &elf,
+        &proof_options,
+        true,
+        &[],
+        &zero_counts,
+        None,
+        true,
+        None,
+        None,
+        None,
+    );
 
     let verifier_air_refs = airs.air_refs();
     assert_eq!(verifier_air_refs.len(), crate::FIXED_TABLE_COUNT);
@@ -2855,3 +2905,560 @@ fn test_count_elements_nonzero() {
         "total_auxiliary_field_elements should be nonzero (got {aux})"
     );
 }
+
+/// Prove and verify the FIRST continuation epoch in isolation. Epoch 0 starts
+/// from the program's initial memory/registers (so its init is correct) and does
+/// not terminate, so it is proven with the HALT table excluded (`include_halt = false`).
+#[test]
+fn test_prove_first_epoch_without_halt() {
+    use crate::compute_expected_commit_bus_balance;
+    use crate::tables::trace_builder::build_initial_image;
+    use crate::test_utils::asm_elf_bytes;
+
+    let _ = env_logger::builder().is_test(true).try_init();
+    let elf_bytes = asm_elf_bytes("arith_8");
+    let elf = Elf::load(&elf_bytes).unwrap();
+
+    // arith_8 is ~10 cycles; a power-of-two epoch_size of 4 makes epoch 0 an
+    // intermediate epoch (4 cycles → no CPU padding rows) with the program
+    // continuing past it.
+    let epoch_size = 4;
+    let epochs = Executor::new(&elf, vec![])
+        .unwrap()
+        .run_epochs(epoch_size)
+        .unwrap();
+    assert!(epochs.len() >= 2);
+
+    // Epoch 0's starting memory/registers are the program-start image; it does
+    // not halt (is_final=false).
+    let image = build_initial_image(&elf, &[]);
+    let register_init = crate::tables::register::register_init_from_entry_point(elf.entry_point);
+    let mut traces = Traces::from_image_and_logs(
+        &elf,
+        &image,
+        &register_init,
+        &epochs[0].logs,
+        &MaxRowsConfig::default(),
+        &[],
+        false,
+        false,
+        #[cfg(feature = "disk-spill")]
+        stark::storage_mode::StorageMode::Ram,
+    )
+    .unwrap();
+
+    let proof_options = ProofOptions::default_test_options();
+    let table_counts = traces.table_counts();
+    let airs = VmAirs::new(
+        &elf,
+        &proof_options,
+        true,
+        &traces.page_configs,
+        &table_counts,
+        None,
+        false,
+        None,
+        None,
+        None,
+    );
+
+    let multi_proof = multi_prove_ram(
+        airs.air_trace_pairs(&mut traces),
+        &mut DefaultTranscript::<E>::new(&[]),
+    )
+    .expect("first epoch failed to prove");
+
+    let mut replay = DefaultTranscript::<E>::new(&[]);
+    let expected_bus_balance = compute_expected_commit_bus_balance(
+        &airs.air_refs(),
+        &multi_proof,
+        &traces.public_output_bytes,
+        0,
+        &mut replay,
+    )
+    .expect("fingerprint collision in test");
+
+    assert!(
+        Verifier::multi_verify(
+            &airs.air_refs(),
+            &multi_proof,
+            &mut DefaultTranscript::<E>::new(&[]),
+            &expected_bus_balance,
+        ),
+        "first epoch (HALT excluded) failed to verify"
+    );
+}
+
+/// Prove and verify a NON-first continuation epoch (epoch 1) in isolation. Its
+/// starting memory and registers come from epoch 0's boundary snapshot, and it
+/// does not terminate (HALT excluded).
+#[test]
+fn test_prove_second_epoch_from_snapshot() {
+    use crate::compute_expected_commit_bus_balance;
+    use crate::tables::register;
+    use crate::test_utils::asm_elf_bytes;
+
+    let _ = env_logger::builder().is_test(true).try_init();
+    let elf_bytes = asm_elf_bytes("arith_8");
+    let elf = Elf::load(&elf_bytes).unwrap();
+
+    // arith_8 is ~10 cycles; epoch_size 4 (power of two) yields epochs 4/4/2, so
+    // epoch 1 is intermediate (4 cycles → no CPU padding rows).
+    let epoch_size = 4;
+    let epochs = Executor::new(&elf, vec![])
+        .unwrap()
+        .run_epochs(epoch_size)
+        .unwrap();
+    assert!(epochs.len() >= 3, "need an intermediate epoch 1");
+
+    // Epoch 1 starts from epoch 0's ending memory + register snapshot.
+    let image: std::collections::HashMap<u64, u8> = epochs[0].end_memory.iter_bytes().collect();
+    let register_init =
+        register::register_init_from_snapshot(&epochs[0].end_registers, epochs[0].end_pc);
+
+    let mut traces = Traces::from_image_and_logs(
+        &elf,
+        &image,
+        &register_init,
+        &epochs[1].logs,
+        &MaxRowsConfig::default(),
+        &[],
+        false,
+        false,
+        #[cfg(feature = "disk-spill")]
+        stark::storage_mode::StorageMode::Ram,
+    )
+    .unwrap();
+
+    let proof_options = ProofOptions::default_test_options();
+    let table_counts = traces.table_counts();
+    // The REGISTER commitment is built from this epoch's boundary register init.
+    let airs = VmAirs::new(
+        &elf,
+        &proof_options,
+        true,
+        &traces.page_configs,
+        &table_counts,
+        None,
+        false,
+        Some(&register_init),
+        None,
+        None,
+    );
+
+    let multi_proof = multi_prove_ram(
+        airs.air_trace_pairs(&mut traces),
+        &mut DefaultTranscript::<E>::new(&[]),
+    )
+    .expect("second epoch failed to prove");
+
+    let mut replay = DefaultTranscript::<E>::new(&[]);
+    let expected_bus_balance = compute_expected_commit_bus_balance(
+        &airs.air_refs(),
+        &multi_proof,
+        &traces.public_output_bytes,
+        0,
+        &mut replay,
+    )
+    .expect("fingerprint collision in test");
+
+    assert!(
+        Verifier::multi_verify(
+            &airs.air_refs(),
+            &multi_proof,
+            &mut DefaultTranscript::<E>::new(&[]),
+            &expected_bus_balance,
+        ),
+        "second epoch (register init from snapshot) failed to verify"
+    );
+}
+
+/// An epoch proof can COMMIT the local-to-global table inertly — committed
+/// columns, but no GlobalMemory bus and no constraints in the epoch proof — and
+/// still verify, exposing the L2G commitment root that the final proof (Step 4)
+/// will bind to. The cross-epoch GlobalMemory matching is proven separately.
+#[test]
+fn test_epoch_proof_commits_l2g() {
+    use crate::compute_expected_commit_bus_balance;
+    use crate::tables::local_to_global;
+    use crate::tables::register;
+    use crate::tables::trace_builder::{build_initial_image, epoch_touched_cells};
+    use crate::test_utils::asm_elf_bytes;
+    use std::collections::HashMap;
+
+    let _ = env_logger::builder().is_test(true).try_init();
+    let elf_bytes = asm_elf_bytes("all_loadstore_32");
+    let elf = Elf::load(&elf_bytes).unwrap();
+
+    // Power-of-two epoch size: all_loadstore_32 is ~34 cycles, so epoch_size 8
+    // makes epoch 0 an intermediate epoch with no CPU padding rows.
+    let epoch_size = 8;
+    let epochs = Executor::new(&elf, vec![])
+        .unwrap()
+        .run_epochs(epoch_size)
+        .unwrap();
+    assert!(epochs.len() >= 2);
+
+    let image = build_initial_image(&elf, &[]);
+    let register_init = register::register_init_from_entry_point(elf.entry_point);
+    let mut traces = Traces::from_image_and_logs(
+        &elf,
+        &image,
+        &register_init,
+        &epochs[0].logs,
+        &MaxRowsConfig::default(),
+        &[],
+        false,
+        false,
+        #[cfg(feature = "disk-spill")]
+        stark::storage_mode::StorageMode::Ram,
+    )
+    .unwrap();
+
+    // Epoch 0's local-to-global trace, committed inertly below.
+    let register_init0 = register::register_init_from_entry_point(elf.entry_point);
+    let touched = epoch_touched_cells(&elf, &image, &register_init0, &epochs[0].logs).unwrap();
+    let initial_memory: HashMap<u64, u64> = image.iter().map(|(&a, &v)| (a, v as u64)).collect();
+    let boundaries = local_to_global::epoch_boundaries(&initial_memory, &[touched]);
+    let mut l2g_trace = local_to_global::generate_local_to_global_trace(&boundaries[0]);
+
+    let proof_options = ProofOptions::default_test_options();
+    let table_counts = traces.table_counts();
+    let airs = VmAirs::new(
+        &elf,
+        &proof_options,
+        true,
+        &traces.page_configs,
+        &table_counts,
+        None,
+        false,
+        None,
+        None,
+        None,
+    );
+
+    // Inert L2G AIR: commits the trace columns, but no bus and no constraints.
+    let transition_constraints: Vec<Box<dyn TransitionConstraintEvaluator<F, E>>> = vec![];
+    let inert_l2g_air: AirWithBuses<F, E, stark::lookup::NullBoundaryConstraintBuilder, ()> =
+        AirWithBuses::new(
+            local_to_global::cols::NUM_COLUMNS,
+            AuxiliaryTraceBuildData {
+                interactions: vec![],
+            },
+            &proof_options,
+            1,
+            transition_constraints,
+        );
+
+    let mut pairs = airs.air_trace_pairs(&mut traces);
+    pairs.push((&inert_l2g_air, &mut l2g_trace, &()));
+
+    let multi_proof = multi_prove_ram(pairs, &mut DefaultTranscript::<E>::new(&[]))
+        .expect("epoch proof with inert L2G failed to prove");
+
+    let mut refs = airs.air_refs();
+    refs.push(&inert_l2g_air);
+
+    let mut replay = DefaultTranscript::<E>::new(&[]);
+    let expected_bus_balance = compute_expected_commit_bus_balance(
+        &refs,
+        &multi_proof,
+        &traces.public_output_bytes,
+        0,
+        &mut replay,
+    )
+    .expect("fingerprint collision in test");
+
+    assert!(
+        Verifier::multi_verify(
+            &refs,
+            &multi_proof,
+            &mut DefaultTranscript::<E>::new(&[]),
+            &expected_bus_balance,
+        ),
+        "epoch proof with inert L2G failed to verify"
+    );
+
+    // The L2G table (pushed last) is committed: its Merkle root is exposed and
+    // non-zero — this is the `R_i` the final proof will be bound to in Step 4.
+    let l2g_root = multi_proof
+        .proofs
+        .last()
+        .unwrap()
+        .lde_trace_main_merkle_root;
+    assert_ne!(
+        l2g_root, [0u8; 32],
+        "L2G commitment root should be non-zero"
+    );
+}
+
+/// End-to-end continuation pipeline over a real ELF: split execution into epochs,
+/// prove+verify each epoch (each committing its local-to-global table inertly and
+/// exposing a root R_i), prove the cross-epoch GlobalMemory bus balances over the
+/// real per-epoch boundaries, and finally bind the cross-epoch proof to the REAL
+/// per-epoch roots. The R_i collected from the independent epoch proofs equal the
+/// per-epoch L2G sub-table roots in the cross-epoch proof — that root equality is
+/// the shared-commitment linkage between the epoch proofs and the global memory
+/// argument.
+#[test]
+fn test_continuation_pipeline_end_to_end() {
+    use crate::compute_expected_commit_bus_balance;
+    use crate::tables::local_to_global;
+    use crate::tables::register;
+    use crate::tables::trace_builder::{build_initial_image, epoch_touched_cells};
+    use crate::test_utils::asm_elf_bytes;
+    use std::collections::HashMap;
+
+    let _ = env_logger::builder().is_test(true).try_init();
+    let elf_bytes = asm_elf_bytes("all_loadstore_32");
+    let elf = Elf::load(&elf_bytes).unwrap();
+
+    // Split execution into power-of-two epochs (all_loadstore_32 is ~34 cycles, so
+    // epoch_size 8 gives intermediate epochs with no CPU padding rows).
+    let epoch_size = 8;
+    let epochs = Executor::new(&elf, vec![])
+        .unwrap()
+        .run_epochs(epoch_size)
+        .unwrap();
+    assert!(epochs.len() >= 2);
+
+    let image0 = build_initial_image(&elf, &[]);
+    let initial_memory: HashMap<u64, u64> = image0.iter().map(|(&a, &v)| (a, v as u64)).collect();
+
+    // Pass 1: each epoch's starting state + the cells it touches. Epoch 0 starts
+    // from the program image; epoch i>0 from epoch i-1's boundary snapshot.
+    let mut images: Vec<HashMap<u64, u8>> = Vec::with_capacity(epochs.len());
+    let mut register_inits: Vec<Vec<u32>> = Vec::with_capacity(epochs.len());
+    let mut all_touched: Vec<Vec<(u64, u64, u64)>> = Vec::with_capacity(epochs.len());
+    for (i, epoch) in epochs.iter().enumerate() {
+        let (image_i, register_init_i) = if i == 0 {
+            (
+                image0.clone(),
+                register::register_init_from_entry_point(elf.entry_point),
+            )
+        } else {
+            let image_i: HashMap<u64, u8> = epochs[i - 1].end_memory.iter_bytes().collect();
+            let register_init_i = register::register_init_from_snapshot(
+                &epochs[i - 1].end_registers,
+                epochs[i - 1].end_pc,
+            );
+            (image_i, register_init_i)
+        };
+        let touched_i = epoch_touched_cells(&elf, &image_i, &register_init_i, &epoch.logs).unwrap();
+        images.push(image_i);
+        register_inits.push(register_init_i);
+        all_touched.push(touched_i);
+    }
+    let boundaries = local_to_global::epoch_boundaries(&initial_memory, &all_touched);
+
+    let proof_options = ProofOptions::default_test_options();
+
+    // Pass 2: prove+verify each epoch, committing boundaries[i] inertly, and
+    // collect the L2G commitment root each epoch proof exposes.
+    let mut epoch_roots = Vec::with_capacity(epochs.len());
+    for (i, epoch) in epochs.iter().enumerate() {
+        let is_final = i == epochs.len() - 1;
+        let mut traces = Traces::from_image_and_logs(
+            &elf,
+            &images[i],
+            &register_inits[i],
+            &epoch.logs,
+            &MaxRowsConfig::default(),
+            &[],
+            is_final,
+            false,
+            #[cfg(feature = "disk-spill")]
+            stark::storage_mode::StorageMode::Ram,
+        )
+        .unwrap();
+
+        let table_counts = traces.table_counts();
+        let register_init_arg = if i == 0 {
+            None
+        } else {
+            Some(register_inits[i].as_slice())
+        };
+        let airs = VmAirs::new(
+            &elf,
+            &proof_options,
+            true,
+            &traces.page_configs,
+            &table_counts,
+            None,
+            is_final,
+            register_init_arg,
+            None,
+            None,
+        );
+
+        let mut l2g_trace = local_to_global::generate_local_to_global_trace(&boundaries[i]);
+        let transition_constraints: Vec<Box<dyn TransitionConstraintEvaluator<F, E>>> = vec![];
+        let inert_l2g_air: AirWithBuses<F, E, stark::lookup::NullBoundaryConstraintBuilder, ()> =
+            AirWithBuses::new(
+                local_to_global::cols::NUM_COLUMNS,
+                AuxiliaryTraceBuildData {
+                    interactions: vec![],
+                },
+                &proof_options,
+                1,
+                transition_constraints,
+            );
+
+        let mut pairs = airs.air_trace_pairs(&mut traces);
+        pairs.push((&inert_l2g_air, &mut l2g_trace, &()));
+        let multi_proof = multi_prove_ram(pairs, &mut DefaultTranscript::<E>::new(&[]))
+            .expect("epoch proof failed to prove");
+
+        let mut refs = airs.air_refs();
+        refs.push(&inert_l2g_air);
+        let mut replay = DefaultTranscript::<E>::new(&[]);
+        let expected_bus_balance = compute_expected_commit_bus_balance(
+            &refs,
+            &multi_proof,
+            &traces.public_output_bytes,
+            0,
+            &mut replay,
+        )
+        .expect("fingerprint collision in test");
+        assert!(
+            Verifier::multi_verify(
+                &refs,
+                &multi_proof,
+                &mut DefaultTranscript::<E>::new(&[]),
+                &expected_bus_balance,
+            ),
+            "epoch {i} failed to verify"
+        );
+
+        epoch_roots.push(
+            multi_proof
+                .proofs
+                .last()
+                .unwrap()
+                .lde_trace_main_merkle_root,
+        );
+    }
+
+    // The cross-epoch GlobalMemory bus balances over the real per-epoch boundaries.
+    assert!(
+        crate::tests::local_to_global_bus_tests::prove_and_verify(&boundaries),
+        "final GlobalMemory bus must balance over real epoch data"
+    );
+
+    // The cross-epoch proof is bound to the REAL per-epoch roots: the L2G root each
+    // epoch proof exposed equals the per-epoch L2G sub-table root in the final proof.
+    let final_proof = crate::tests::local_to_global_bus_tests::prove_global(&boundaries);
+    assert!(
+        crate::verify_l2g_commitment_binding(&epoch_roots, &final_proof),
+        "final proof must be bound to the real per-epoch L2G roots"
+    );
+}
+
+/// A continuation epoch built with `l2g_memory_bookend = true` proves and verifies:
+/// PAGE no longer bookends the touched RAM bytes (they self-cancel), and the
+/// local-to-global table provides their `Memory`-bus init/fini instead. The epoch
+/// `Memory` bus still nets to zero — L2G has replaced PAGE as the bookend.
+#[test]
+fn test_epoch_memory_bus_with_l2g_bookend() {
+    use crate::compute_expected_commit_bus_balance;
+    use crate::tables::local_to_global;
+    use crate::tables::register;
+    use crate::tables::trace_builder::build_initial_image;
+    use crate::test_utils::asm_elf_bytes;
+    use std::collections::HashMap;
+
+    let _ = env_logger::builder().is_test(true).try_init();
+    let elf_bytes = asm_elf_bytes("all_loadstore_32");
+    let elf = Elf::load(&elf_bytes).unwrap();
+
+    // Power-of-two epoch size: all_loadstore_32 is ~34 cycles, so epoch_size 8
+    // makes epoch 0 an intermediate epoch with no CPU padding rows.
+    let epoch_size = 8;
+    let epochs = Executor::new(&elf, vec![])
+        .unwrap()
+        .run_epochs(epoch_size)
+        .unwrap();
+    assert!(epochs.len() >= 2);
+
+    // Epoch 0 starts from the program image; build it with the L2G memory bookend.
+    let image = build_initial_image(&elf, &[]);
+    let register_init = register::register_init_from_entry_point(elf.entry_point);
+    let mut traces = Traces::from_image_and_logs(
+        &elf,
+        &image,
+        &register_init,
+        &epochs[0].logs,
+        &MaxRowsConfig::default(),
+        &[],
+        false,
+        true,
+        #[cfg(feature = "disk-spill")]
+        stark::storage_mode::StorageMode::Ram,
+    )
+    .unwrap();
+    let initial_memory: HashMap<u64, u64> = image.iter().map(|(&a, &v)| (a, v as u64)).collect();
+    let boundaries =
+        local_to_global::epoch_boundaries(&initial_memory, &[traces.touched_memory_cells.clone()]);
+    traces.local_to_global = local_to_global::generate_local_to_global_trace(&boundaries[0]);
+
+    let proof_options = ProofOptions::default_test_options();
+    let table_counts = traces.table_counts();
+    let airs = VmAirs::new(
+        &elf,
+        &proof_options,
+        true,
+        &traces.page_configs,
+        &table_counts,
+        None,
+        false,
+        None,
+        None,
+        None,
+    );
+
+    // L2G air on the epoch-local Memory bus (the bookend that replaces PAGE).
+    let transition_constraints: Vec<Box<dyn TransitionConstraintEvaluator<F, E>>> = vec![];
+    let l2g_air: AirWithBuses<F, E, stark::lookup::NullBoundaryConstraintBuilder, ()> =
+        AirWithBuses::new(
+            local_to_global::cols::NUM_COLUMNS,
+            AuxiliaryTraceBuildData {
+                interactions: local_to_global::memory_bus_interactions(),
+            },
+            &proof_options,
+            1,
+            transition_constraints,
+        );
+
+    // Take the L2G trace out of `traces` so `air_trace_pairs` can borrow the rest.
+    let mut l2g_trace = std::mem::replace(
+        &mut traces.local_to_global,
+        local_to_global::generate_local_to_global_trace(&[]),
+    );
+
+    let mut pairs = airs.air_trace_pairs(&mut traces);
+    pairs.push((&l2g_air, &mut l2g_trace, &()));
+    let multi_proof = multi_prove_ram(pairs, &mut DefaultTranscript::<E>::new(&[]))
+        .expect("epoch with L2G memory bookend failed to prove");
+
+    let mut refs = airs.air_refs();
+    refs.push(&l2g_air);
+    let mut replay = DefaultTranscript::<E>::new(&[]);
+    let expected_bus_balance = compute_expected_commit_bus_balance(
+        &refs,
+        &multi_proof,
+        &traces.public_output_bytes,
+        0,
+        &mut replay,
+    )
+    .expect("fingerprint collision in test");
+
+    assert!(
+        Verifier::multi_verify(
+            &refs,
+            &multi_proof,
+            &mut DefaultTranscript::<E>::new(&[]),
+            &expected_bus_balance,
+        ),
+        "epoch Memory bus must balance with L2G bookend + PAGE excluding touched cells"
+    );
+}
diff --git a/prover/src/tests/register_tests.rs b/prover/src/tests/register_tests.rs
index 1baa55eda..433968ab5 100644
--- a/prover/src/tests/register_tests.rs
+++ b/prover/src/tests/register_tests.rs
@@ -1,5 +1,7 @@
 //! Tests for the REGISTER table.
 
+use stark::proof::options::ProofOptions;
+
 use crate::tables::register::*;
 use crate::tables::types::*;
 
@@ -17,7 +19,7 @@ fn test_register_base_address() {
 fn test_generate_register_trace_empty() {
     let entry_point = 0x1000u64;
     let final_state = FinalRegisterStateMap::new();
-    let trace = generate_register_trace(&final_state, entry_point);
+    let trace = generate_register_trace(&final_state, &register_init_from_entry_point(entry_point));
 
     // Should have power-of-2 rows >= 67 (x0-x31, x254, x255)
     assert!(trace.num_rows() >= NUM_REGISTER_ADDRESSES);
@@ -66,7 +68,7 @@ fn test_generate_register_trace_with_access() {
         },
     );
 
-    let trace = generate_register_trace(&final_state, entry_point);
+    let trace = generate_register_trace(&final_state, &register_init_from_entry_point(entry_point));
 
     // Row 10 (address 10) should have the final state
     assert_eq!(*trace.main_table.get(10, cols::OFFSET), FE::from(10u64));
@@ -83,3 +85,51 @@ fn test_bus_interactions() {
     let interactions = bus_interactions();
     assert_eq!(interactions.len(), 2); // C1, C2
 }
+
+#[test]
+fn test_fini_from_trace_reads_every_register() {
+    let mut final_state = FinalRegisterStateMap::new();
+    final_state.insert(
+        register_base_address(5), // addr 10
+        FinalRegisterWordState {
+            timestamp: 100,
+            value: 0x42,
+        },
+    );
+    let trace = generate_register_trace(&final_state, &register_init_from_entry_point(0x1000));
+
+    let fini = fini_from_trace(&trace);
+    // One entry per real register Word address, in register_word_address_list order
+    // (positions 0..63 are addresses 0..63, so addr a is at index a for a < 64).
+    assert_eq!(fini.len(), NUM_REGISTER_ADDRESSES);
+    // Accessed register reflects its written value; PC (x255, addr 510 = index 65)
+    // reflects entry point; x0 (addr 0 = index 0) ends at 0.
+    assert_eq!(fini[10], 0x42);
+    assert_eq!(fini[65], 0x1000);
+    assert_eq!(fini[0], 0);
+}
+
+#[test]
+fn test_precomputed_commitment_with_fini_binds_fini() {
+    let opts = ProofOptions::default_test_options();
+    let init = register_init_from_entry_point(0x1000);
+    // Fini vectors in register_word_address_list order (index = address for a < 64).
+    let mut fini_a = vec![0u32; NUM_REGISTER_ADDRESSES];
+    fini_a[10] = 0x42;
+    fini_a[12] = 7;
+    let mut fini_b = fini_a.clone();
+    fini_b[10] = 0x43; // a different final value at one address
+
+    let root_a = compute_precomputed_commitment_with_fini(&opts, &init, &fini_a);
+    let root_a2 = compute_precomputed_commitment_with_fini(&opts, &init, &fini_a);
+    let root_b = compute_precomputed_commitment_with_fini(&opts, &init, &fini_b);
+
+    // Deterministic, and sensitive to every fini value: a tampered R_{i+1} yields a
+    // different preprocessed root, so a trace whose FINI != the committed R_{i+1}
+    // fails the verifier's preprocessed-root check.
+    assert_eq!(root_a, root_a2);
+    assert_ne!(root_a, root_b);
+
+    // The 3-column (with-fini) commitment differs from the 2-column monolithic one.
+    assert_ne!(root_a, compute_precomputed_commitment(&opts, &init));
+}
diff --git a/prover/src/tests/statement_tests.rs b/prover/src/tests/statement_tests.rs
index 55ac5a15b..73944e262 100644
--- a/prover/src/tests/statement_tests.rs
+++ b/prover/src/tests/statement_tests.rs
@@ -3,7 +3,7 @@
 use crypto::fiat_shamir::default_transcript::DefaultTranscript;
 use crypto::fiat_shamir::is_transcript::IsTranscript;
 
-use crate::statement::absorb_statement;
+use crate::statement::{StatementKind, absorb_continuation_global_statement, absorb_statement};
 use crate::test_utils::E;
 use crate::{RuntimePageRange, TableCounts};
 
@@ -47,7 +47,15 @@ fn state_after_absorb(
     ranges: &[RuntimePageRange],
 ) -> [u8; 32] {
     let mut t = DefaultTranscript::<E>::new(&[]);
-    absorb_statement(&mut t, elf, out, counts, priv_pages, ranges);
+    absorb_statement(
+        &mut t,
+        StatementKind::Monolithic,
+        elf,
+        out,
+        counts,
+        priv_pages,
+        ranges,
+    );
     t.state()
 }
 
@@ -120,3 +128,51 @@ fn public_output_length_prefix_prevents_collision() {
         state_after_absorb(b"elf", b"\x41", &counts_b, 0, &[]),
     );
 }
+
+fn epoch_state(elf: &[u8], label: u64) -> [u8; 32] {
+    let mut t = DefaultTranscript::<E>::new(&[]);
+    absorb_statement(
+        &mut t,
+        StatementKind::ContinuationEpoch { epoch_label: label },
+        elf,
+        b"out",
+        &sample_counts(),
+        1,
+        &sample_ranges(),
+    );
+    t.state()
+}
+
+#[test]
+fn continuation_epoch_state_binds_label_and_program() {
+    let baseline = epoch_state(b"elf", 1);
+    // Deterministic.
+    assert_eq!(baseline, epoch_state(b"elf", 1));
+    // Pinned to the epoch's position: a different label diverges (replay across
+    // positions is rejected).
+    assert_ne!(baseline, epoch_state(b"elf", 2), "must bind epoch_label");
+    // Pinned to the program.
+    assert_ne!(baseline, epoch_state(b"other-elf", 1), "must bind the ELF");
+}
+
+#[test]
+fn continuation_epoch_differs_from_monolithic_statement() {
+    // A monolithic proof and a continuation epoch proof must never share a
+    // transcript seed, even with the same base statement.
+    let monolithic = state_after_absorb(b"elf", b"out", &sample_counts(), 1, &sample_ranges());
+    assert_ne!(monolithic, epoch_state(b"elf", 1));
+}
+
+fn global_state(elf: &[u8], num_epochs: usize) -> [u8; 32] {
+    let mut t = DefaultTranscript::<E>::new(&[]);
+    absorb_continuation_global_statement(&mut t, elf, num_epochs);
+    t.state()
+}
+
+#[test]
+fn continuation_global_state_binds_program_and_epoch_count() {
+    let baseline = global_state(b"elf", 3);
+    assert_eq!(baseline, global_state(b"elf", 3)); // deterministic
+    assert_ne!(baseline, global_state(b"elf", 4), "must bind epoch count");
+    assert_ne!(baseline, global_state(b"other-elf", 3), "must bind the ELF");
+}
diff --git a/prover/src/tests/trace_builder_tests.rs b/prover/src/tests/trace_builder_tests.rs
index b3c1e1514..b23da43bf 100644
--- a/prover/src/tests/trace_builder_tests.rs
+++ b/prover/src/tests/trace_builder_tests.rs
@@ -823,3 +823,278 @@ mod routing_tests {
         );
     }
 }
+
+/// `from_image_and_logs` is a faithful generalization of `from_elf_and_logs`:
+/// fed the ELF-derived image, it must produce identical traces.
+#[test]
+fn test_from_image_and_logs_matches_from_elf_and_logs() {
+    use crate::tables::MaxRowsConfig;
+    use crate::tables::trace_builder::build_initial_image;
+    use crate::test_utils::asm_elf_bytes;
+    use executor::elf::Elf;
+    use executor::vm::execution::Executor;
+
+    let elf_bytes = asm_elf_bytes("basic_program");
+    let program = Elf::load(&elf_bytes).unwrap();
+    let logs = Executor::new(&program, vec![]).unwrap().run().unwrap().logs;
+    let max_rows = MaxRowsConfig::default();
+
+    let from_elf = Traces::from_elf_and_logs(
+        &program,
+        &logs,
+        &max_rows,
+        &[],
+        #[cfg(feature = "disk-spill")]
+        stark::storage_mode::StorageMode::Ram,
+    )
+    .unwrap();
+
+    let image = build_initial_image(&program, &[]);
+    let register_init =
+        crate::tables::register::register_init_from_entry_point(program.entry_point);
+    let from_image = Traces::from_image_and_logs(
+        &program,
+        &image,
+        &register_init,
+        &logs,
+        &max_rows,
+        &[],
+        true,
+        false,
+        #[cfg(feature = "disk-spill")]
+        stark::storage_mode::StorageMode::Ram,
+    )
+    .unwrap();
+
+    assert_eq!(
+        from_elf.total_field_elements(),
+        from_image.total_field_elements()
+    );
+    assert_eq!(
+        format!("{:?}", from_elf.table_counts()),
+        format!("{:?}", from_image.table_counts())
+    );
+}
+
+/// A memory snapshot at an epoch boundary converts into a non-empty initial
+/// image (the input `from_image_and_logs` consumes for the next epoch).
+#[test]
+fn test_epoch_end_memory_converts_to_image() {
+    use crate::test_utils::asm_elf_bytes;
+    use executor::elf::Elf;
+    use executor::vm::execution::Executor;
+    use std::collections::HashMap;
+
+    let elf_bytes = asm_elf_bytes("basic_program");
+    let program = Elf::load(&elf_bytes).unwrap();
+
+    let total = Executor::new(&program, vec![])
+        .unwrap()
+        .run()
+        .unwrap()
+        .logs
+        .len();
+    let epoch_size = (total / 3).max(1);
+    let epochs = Executor::new(&program, vec![])
+        .unwrap()
+        .run_epochs(epoch_size)
+        .unwrap();
+    assert!(epochs.len() >= 2);
+
+    let image: HashMap<u64, u8> = epochs[0].end_memory.iter_bytes().collect();
+    assert!(!image.is_empty());
+}
+
+/// Every epoch builds traces: intermediate epochs (`is_final = false`) skip HALT
+/// and start from the previous epoch's memory; the last epoch terminates.
+#[test]
+fn test_build_traces_for_all_epochs() {
+    use crate::tables::MaxRowsConfig;
+    use crate::tables::trace_builder::build_initial_image;
+    use crate::test_utils::asm_elf_bytes;
+    use executor::elf::Elf;
+    use executor::vm::execution::Executor;
+    use std::collections::HashMap;
+
+    let elf_bytes = asm_elf_bytes("basic_program");
+    let program = Elf::load(&elf_bytes).unwrap();
+
+    let total = Executor::new(&program, vec![])
+        .unwrap()
+        .run()
+        .unwrap()
+        .logs
+        .len();
+    let epoch_size = (total / 3).max(1);
+    let epochs = Executor::new(&program, vec![])
+        .unwrap()
+        .run_epochs(epoch_size)
+        .unwrap();
+    assert!(epochs.len() >= 2);
+
+    let max_rows = MaxRowsConfig::default();
+    let last = epochs.len() - 1;
+
+    for (i, epoch) in epochs.iter().enumerate() {
+        // Epoch 0 starts from the program-start image; later epochs from the
+        // previous epoch's ending memory + register snapshot.
+        let (image, register_init): (HashMap<u64, u8>, Vec<u32>) = if i == 0 {
+            (
+                build_initial_image(&program, &[]),
+                crate::tables::register::register_init_from_entry_point(program.entry_point),
+            )
+        } else {
+            (
+                epochs[i - 1].end_memory.iter_bytes().collect(),
+                crate::tables::register::register_init_from_snapshot(
+                    &epochs[i - 1].end_registers,
+                    epochs[i - 1].end_pc,
+                ),
+            )
+        };
+
+        let traces = Traces::from_image_and_logs(
+            &program,
+            &image,
+            &register_init,
+            &epoch.logs,
+            &max_rows,
+            &[],
+            i == last,
+            false,
+            #[cfg(feature = "disk-spill")]
+            stark::storage_mode::StorageMode::Ram,
+        )
+        .unwrap_or_else(|e| panic!("epoch {i} (is_final={}) failed to build: {e:?}", i == last));
+
+        assert!(
+            traces.table_counts().cpu > 0,
+            "epoch {i} produced an empty CPU trace"
+        );
+    }
+}
+
+/// A non-final epoch carrying the program-terminating instruction is rejected
+/// (rather than silently producing an unverifiable proof).
+#[test]
+fn test_terminating_epoch_rejected_when_not_final() {
+    use crate::tables::MaxRowsConfig;
+    use crate::tables::register::register_init_from_snapshot;
+    use crate::test_utils::asm_elf_bytes;
+    use executor::elf::Elf;
+    use executor::vm::execution::Executor;
+    use std::collections::HashMap;
+
+    let elf_bytes = asm_elf_bytes("basic_program");
+    let program = Elf::load(&elf_bytes).unwrap();
+
+    let total = Executor::new(&program, vec![])
+        .unwrap()
+        .run()
+        .unwrap()
+        .logs
+        .len();
+    let epoch_size = (total / 3).max(1);
+    let epochs = Executor::new(&program, vec![])
+        .unwrap()
+        .run_epochs(epoch_size)
+        .unwrap();
+    assert!(epochs.len() >= 2);
+
+    // The last epoch holds the terminating instruction; building it as a
+    // non-final epoch (is_final = false) must error.
+    let last = epochs.len() - 1;
+    let image: HashMap<u64, u8> = epochs[last - 1].end_memory.iter_bytes().collect();
+    let register_init =
+        register_init_from_snapshot(&epochs[last - 1].end_registers, epochs[last - 1].end_pc);
+
+    let result = Traces::from_image_and_logs(
+        &program,
+        &image,
+        &register_init,
+        &epochs[last].logs,
+        &MaxRowsConfig::default(),
+        &[],
+        false,
+        false,
+        #[cfg(feature = "disk-spill")]
+        stark::storage_mode::StorageMode::Ram,
+    );
+
+    assert!(
+        matches!(result, Err(crate::Error::HaltInNonFinalEpoch)),
+        "expected HaltInNonFinalEpoch error for a non-final terminating epoch"
+    );
+}
+
+/// End to end: extract real per-epoch touched cells from execution, feed them
+/// through the local-to-global boundary logic, and render each epoch's trace.
+#[test]
+fn test_local_to_global_traces_from_real_execution() {
+    use crate::tables::local_to_global::{epoch_boundaries, generate_local_to_global_trace};
+    use crate::tables::trace_builder::{build_initial_image, epoch_touched_cells};
+    use crate::test_utils::asm_elf_bytes;
+    use executor::elf::Elf;
+    use executor::vm::execution::Executor;
+    use std::collections::HashMap;
+
+    // A program that exercises memory (loads/stores), so some cells are touched.
+    let elf_bytes = asm_elf_bytes("all_loadstore_32");
+    let program = Elf::load(&elf_bytes).unwrap();
+
+    let total = Executor::new(&program, vec![])
+        .unwrap()
+        .run()
+        .unwrap()
+        .logs
+        .len();
+    let epoch_size = (total / 3).max(1);
+    let epochs = Executor::new(&program, vec![])
+        .unwrap()
+        .run_epochs(epoch_size)
+        .unwrap();
+    assert!(epochs.len() >= 2);
+
+    let elf_image = build_initial_image(&program, &[]);
+    let total_memory = elf_image.len();
+
+    // Per-epoch touched cells from real execution (epoch 0 from the ELF image,
+    // later epochs from the previous epoch's ending memory).
+    let mut per_epoch_touches: Vec<Vec<(u64, u64, u64)>> = Vec::new();
+    for (i, epoch) in epochs.iter().enumerate() {
+        let image: HashMap<u64, u8> = if i == 0 {
+            elf_image.clone()
+        } else {
+            epochs[i - 1].end_memory.iter_bytes().collect()
+        };
+        let register_init = if i == 0 {
+            crate::tables::register::register_init_from_entry_point(program.entry_point)
+        } else {
+            crate::tables::register::register_init_from_snapshot(
+                &epochs[i - 1].end_registers,
+                epochs[i - 1].end_pc,
+            )
+        };
+        per_epoch_touches
+            .push(epoch_touched_cells(&program, &image, &register_init, &epoch.logs).unwrap());
+    }
+
+    // The program touches memory somewhere, and every per-epoch touched set is
+    // sparse (far smaller than the whole memory image).
+    let total_touched: usize = per_epoch_touches.iter().map(Vec::len).sum();
+    assert!(total_touched > 0);
+    for touched in &per_epoch_touches {
+        assert!(touched.len() < total_memory);
+    }
+
+    // Boundary claims + rendered L2G trace per epoch.
+    let initial_memory: HashMap<u64, u64> =
+        elf_image.iter().map(|(&a, &v)| (a, v as u64)).collect();
+    let boundaries = epoch_boundaries(&initial_memory, &per_epoch_touches);
+
+    for (i, boundary_set) in boundaries.iter().enumerate() {
+        let trace = generate_local_to_global_trace(boundary_set);
+        let expected_rows = per_epoch_touches[i].len().next_power_of_two().max(1);
+        assert_eq!(trace.num_rows(), expected_rows);
+    }
+}