diff --git a/Cargo.lock b/Cargo.lock
index da2929c9d..fb9b09070 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -253,6 +253,12 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf"
 
+[[package]]
+name = "base16ct"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fd307490d624467aa6f74b0eabb77633d1f758a7b25f12bceb0b22e08d9726f6"
+
 [[package]]
 name = "base64"
 version = "0.22.1"
@@ -344,8 +350,8 @@ version = "0.8.0"
 source = "git+https://github.com/lambdaclass/bls12_381?branch=expose-affine-constructors#78cad0378b17fc3157b83f514be192bf46edf9a1"
 dependencies = [
  "digest",
- "ff",
- "group",
+ "ff 0.13.1",
+ "group 0.13.0",
  "pairing",
  "rand_core 0.6.4",
  "subtle",
@@ -543,6 +549,12 @@ dependencies = [
  "tikv-jemallocator",
 ]
 
+[[package]]
+name = "cmov"
+version = "0.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c9ea0ac24bc397ab3c98583a3c9ba74fa56b09a4449bbe172b9b1ddb016027a"
+
 [[package]]
 name = "colorchoice"
 version = "1.0.4"
@@ -555,6 +567,12 @@ version = "0.9.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
 
+[[package]]
+name = "const-oid"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c"
+
 [[package]]
 name = "const_format"
 version = "0.2.35"
@@ -590,6 +608,12 @@ version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
 
+[[package]]
+name = "cpubits"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "15b85f9c39137c3a891689859392b1bd49812121d0d61c9caf00d46ed5ce06ae"
+
 [[package]]
 name = "cpufeatures"
 version = "0.2.17"
@@ -760,6 +784,21 @@ dependencies = [
  "zeroize",
 ]
 
+[[package]]
+name = "crypto-bigint"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a52aa3fcda4e6302a9f48734f234d35d4721b96f8fe07d073f07ce9df4f0271"
+dependencies = [
+ "cpubits",
+ "ctutils",
+ "hybrid-array",
+ "num-traits",
+ "rand_core 0.10.1",
+ "subtle",
+ "zeroize",
+]
+
 [[package]]
 name = "crypto-common"
 version = "0.1.7"
@@ -770,6 +809,26 @@ dependencies = [
  "typenum",
 ]
 
+[[package]]
+name = "crypto-common"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce6e4c961d6cd6c9a86db418387425e8bdeaf05b3c8bc1411e6dca4c252f1453"
+dependencies = [
+ "hybrid-array",
+ "rand_core 0.10.1",
+]
+
+[[package]]
+name = "ctutils"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d5515a3834141de9eafb9717ad39eea8247b5674e6066c404e8c4b365d2a29e"
+dependencies = [
+ "cmov",
+ "subtle",
+]
+
 [[package]]
 name = "cudarc"
 version = "0.19.4"
@@ -820,7 +879,17 @@ version = "0.7.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb"
 dependencies = [
- "const-oid",
+ "const-oid 0.9.6",
+ "zeroize",
+]
+
+[[package]]
+name = "der"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "71fd89660b2dc699704064e59e9dba0147b903e85319429e131620d022be411b"
+dependencies = [
+ "const-oid 0.10.2",
  "zeroize",
 ]
 
@@ -863,8 +932,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
 dependencies = [
  "block-buffer",
- "const-oid",
- "crypto-common",
+ "const-oid 0.9.6",
+ "crypto-common 0.1.7",
  "subtle",
 ]
 
@@ -880,9 +949,9 @@ version = "0.16.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca"
 dependencies = [
- "der",
+ "der 0.7.10",
  "digest",
- "elliptic-curve",
+ "elliptic-curve 0.13.8",
  "rfc6979",
  "signature",
  "spki",
@@ -892,9 +961,8 @@ dependencies = [
 name = "ecsm"
 version = "0.1.0"
 dependencies = [
- "k256",
- "num-bigint",
- "num-traits",
+ "crypto-bigint 0.7.5",
+ "k256 0.14.0-rc.14",
 ]
 
 [[package]]
@@ -921,15 +989,33 @@ version = "0.13.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47"
 dependencies = [
- "base16ct",
- "crypto-bigint",
+ "base16ct 0.2.0",
+ "crypto-bigint 0.5.5",
  "digest",
- "ff",
+ "ff 0.13.1",
  "generic-array",
- "group",
+ "group 0.13.0",
  "pkcs8",
  "rand_core 0.6.4",
- "sec1",
+ "sec1 0.7.3",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "elliptic-curve"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3273f1195b6f6253ebda493d6742c8baa9b26a291674cd96d92a0f09e90e9b46"
+dependencies = [
+ "base16ct 1.0.0",
+ "crypto-bigint 0.7.5",
+ "crypto-common 0.2.2",
+ "ff 0.14.0",
+ "group 0.14.0",
+ "hybrid-array",
+ "rand_core 0.10.1",
+ "sec1 0.8.1",
  "subtle",
  "zeroize",
 ]
@@ -1060,9 +1146,9 @@ dependencies = [
  "ark-ff",
  "bls12_381",
  "ethereum-types",
- "ff",
+ "ff 0.13.1",
  "hex-literal",
- "k256",
+ "k256 0.13.4",
  "malachite",
  "num-bigint",
  "p256",
@@ -1101,7 +1187,7 @@ dependencies = [
  "ethereum-types",
  "ethrex-common",
  "ethrex-crypto",
- "k256",
+ "k256 0.13.4",
  "lambdaworks-crypto",
  "rkyv",
  "secp256k1",
@@ -1208,6 +1294,16 @@ dependencies = [
  "subtle",
 ]
 
+[[package]]
+name = "ff"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1f686ab92a9fb0eaf188f6c6c87b89490baa6fdb0db4544ba4dc47f7942489f"
+dependencies = [
+ "rand_core 0.10.1",
+ "subtle",
+]
+
 [[package]]
 name = "find-msvc-tools"
 version = "0.1.5"
@@ -1292,11 +1388,22 @@ version = "0.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63"
 dependencies = [
- "ff",
+ "ff 0.13.1",
  "rand_core 0.6.4",
  "subtle",
 ]
 
+[[package]]
+name = "group"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fd1a1c7a5206c5b7a3f5a0d7ccd3ff85d0c8f5133d62a02680255b0004af5f4"
+dependencies = [
+ "ff 0.14.0",
+ "rand_core 0.10.1",
+ "subtle",
+]
+
 [[package]]
 name = "half"
 version = "1.8.3"
@@ -1408,6 +1515,17 @@ dependencies = [
  "digest",
 ]
 
+[[package]]
+name = "hybrid-array"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9155a582abd142abc056962c29e3ce5ff2ad5469f4246b537ed42c5deba857da"
+dependencies = [
+ "subtle",
+ "typenum",
+ "zeroize",
+]
+
 [[package]]
 name = "iana-time-zone"
 version = "0.1.64"
@@ -1600,12 +1718,24 @@ checksum = "f6e3919bbaa2945715f0bb6d3934a173d1e9a59ac23767fbaaef277265a7411b"
 dependencies = [
  "cfg-if",
  "ecdsa",
- "elliptic-curve",
+ "elliptic-curve 0.13.8",
  "once_cell",
  "sha2",
  "signature",
 ]
 
+[[package]]
+name = "k256"
+version = "0.14.0-rc.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "905d38bdbb43bb506efa0a428b3e969ff244549832a86b18591492f503adfe37"
+dependencies = [
+ "cpubits",
+ "elliptic-curve 0.14.0",
+ "primeorder 0.14.0-rc.14",
+ "wnaf",
+]
+
 [[package]]
 name = "keccak"
 version = "0.1.5"
@@ -1923,8 +2053,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c9863ad85fa8f4460f9c48cb909d38a0d689dba1f6f6988a5e3e0d31071bcd4b"
 dependencies = [
  "ecdsa",
- "elliptic-curve",
- "primeorder",
+ "elliptic-curve 0.13.8",
+ "primeorder 0.13.6",
  "sha2",
 ]
 
@@ -1934,7 +2064,7 @@ version = "0.23.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "81fec4625e73cf41ef4bb6846cafa6d44736525f442ba45e407c4a000a13996f"
 dependencies = [
- "group",
+ "group 0.13.0",
 ]
 
 [[package]]
@@ -1983,7 +2113,7 @@ version = "0.10.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7"
 dependencies = [
- "der",
+ "der 0.7.10",
  "spki",
 ]
 
@@ -2045,13 +2175,37 @@ dependencies = [
  "zerocopy",
 ]
 
+[[package]]
+name = "primefield"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c555a6e4eb7d4e158fcb028c835c3b8642206ddc279b5c6b202ef9a8bdb592f4"
+dependencies = [
+ "crypto-bigint 0.7.5",
+ "crypto-common 0.2.2",
+ "ff 0.14.0",
+ "rand_core 0.10.1",
+ "subtle",
+ "zeroize",
+]
+
 [[package]]
 name = "primeorder"
 version = "0.13.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "353e1ca18966c16d9deb1c69278edbc5f194139612772bd9537af60ac231e1e6"
 dependencies = [
- "elliptic-curve",
+ "elliptic-curve 0.13.8",
+]
+
+[[package]]
+name = "primeorder"
+version = "0.14.0-rc.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e56e6d67fdf5744e9e245ae571450fe584b91f5af261d0e40163b618e53a1f6"
+dependencies = [
+ "elliptic-curve 0.14.0",
+ "primefield",
 ]
 
 [[package]]
@@ -2219,6 +2373,12 @@ dependencies = [
  "getrandom 0.3.4",
 ]
 
+[[package]]
+name = "rand_core"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69"
+
 [[package]]
 name = "rand_xorshift"
 version = "0.4.0"
@@ -2468,14 +2628,28 @@ version = "0.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc"
 dependencies = [
- "base16ct",
- "der",
+ "base16ct 0.2.0",
+ "der 0.7.10",
  "generic-array",
  "pkcs8",
  "subtle",
  "zeroize",
 ]
 
+[[package]]
+name = "sec1"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d56d437c2f19203ce5f7122e507831de96f3d2d4d3be5af44a0b0a09d8a80e4d"
+dependencies = [
+ "base16ct 1.0.0",
+ "ctutils",
+ "der 0.8.0",
+ "hybrid-array",
+ "subtle",
+ "zeroize",
+]
+
 [[package]]
 name = "secp256k1"
 version = "0.30.0"
@@ -2650,7 +2824,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d"
 dependencies = [
  "base64ct",
- "der",
+ "der 0.7.10",
 ]
 
 [[package]]
@@ -3029,9 +3203,9 @@ dependencies = [
 
 [[package]]
 name = "typenum"
-version = "1.19.0"
+version = "1.20.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
+checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20"
 
 [[package]]
 name = "uint"
@@ -3380,6 +3554,17 @@ version = "0.46.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
 
+[[package]]
+name = "wnaf"
+version = "0.14.0-rc.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f86421f2a70c9e6cab8d84c99fb62d8761d355bd1285443a7e7ccad15aa515f2"
+dependencies = [
+ "ff 0.14.0",
+ "group 0.14.0",
+ "hybrid-array",
+]
+
 [[package]]
 name = "wyz"
 version = "0.5.1"
diff --git a/crypto/ecsm/Cargo.toml b/crypto/ecsm/Cargo.toml
index 4d2800b2c..ebdca1962 100644
--- a/crypto/ecsm/Cargo.toml
+++ b/crypto/ecsm/Cargo.toml
@@ -6,9 +6,9 @@ edition = "2024"
 license.workspace = true
 
 [dependencies]
-num-bigint = "0.4.6"
-num-traits = "0.2.19"
+crypto-bigint = { version = "0.7.5", default-features = false }
 # Audited secp256k1 arithmetic (host-side witness generation only; never in the
 # constraint system). Used for executor scalar multiplication and for the projective
 # double-and-add replay + batch inversion that builds ECDAS step witnesses efficiently.
-k256 = { version = "0.13", default-features = false, features = ["arithmetic", "expose-field"] }
+k256 = { version = "0.14.0-rc.14", default-features = false, features = ["arithmetic"] }
+
diff --git a/crypto/ecsm/src/curve.rs b/crypto/ecsm/src/curve.rs
index 2f2acb0e1..535a5b6bd 100644
--- a/crypto/ecsm/src/curve.rs
+++ b/crypto/ecsm/src/curve.rs
@@ -6,13 +6,36 @@
 //! `k in [1, N)` (see `ecsm.typ` "Point at infinity" / ECDAS soundness argument), so the
 //! affine formulas below are always well defined.
 
-use num_bigint::BigUint;
+use crypto_bigint::U256;
+use crypto_bigint::modular::ConstMontyForm;
+
+// Compile-time Montgomery parameters for secp256k1 p.
+crypto_bigint::const_monty_params!(
+    Secp256k1Field,
+    U256,
+    "fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f"
+);
+
+type Fp = ConstMontyForm<Secp256k1Field, 4>;
 
 /// An affine curve point. Never the point at infinity.
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct AffinePoint {
-    pub x: BigUint,
-    pub y: BigUint,
+    pub x: U256,
+    pub y: U256,
+}
+
+fn fe_from_u256(v: &U256) -> Fp {
+    ConstMontyForm::new(v)
+}
+
+fn u256_from_fe(f: &Fp) -> U256 {
+    f.retrieve()
+}
+
+fn fp_invert(f: Fp) -> Option<Fp> {
+    // safegcd inversion; `None` for a zero input (which has no inverse).
+    Option::from(f.invert())
 }
 
 /// Recovers the canonical (even) `y` for a given `x` such that `y^2 = x^3 + b mod p`.
@@ -23,13 +46,14 @@ pub struct AffinePoint {
 ///
 /// Returns `None` when `x` is not a valid curve x-coordinate (`x^3 + b` is not a quadratic
 /// residue, or `x` is not a canonical field element).
-pub fn recover_y_canonical(x: &BigUint) -> Option<BigUint> {
-    // SEC1 compressed encoding: the `0x02` prefix selects the even-`y` root, delegated to k256.
+pub fn recover_y_canonical(x: &U256) -> Option<U256> {
+    use k256::elliptic_curve::sec1::{FromSec1Point, Sec1Point};
+    let x_bytes: [u8; 32] = x.to_be_bytes().into();
     let mut enc = [0u8; 33];
     enc[0] = 0x02;
-    enc[1..33].copy_from_slice(&be32(x));
-    let ep = EncodedPoint::from_bytes(enc).ok()?;
-    let affine: K256Affine = Option::from(K256Affine::from_encoded_point(&ep))?;
+    enc[1..33].copy_from_slice(&x_bytes);
+    let ep = Sec1Point::<k256::Secp256k1>::from_bytes(enc).ok()?;
+    let affine: K256Affine = Option::from(K256Affine::from_sec1_point(&ep))?;
     Some(from_k256_affine(&affine).y)
 }
 
@@ -47,14 +71,14 @@ pub struct StepPts {
     pub r: AffinePoint,
     /// Slope of this step: add => (yG-yA)/(xG-xA), double => 3xA^2/(2yA).
     /// Precomputed here (batched) so the witness builder never inverts per step.
-    pub lambda: BigUint,
+    pub lambda: U256,
 }
 
 /// Bit length minus one = position of the most significant set bit (`len_k`).
 /// Requires `k >= 1`.
-pub fn msb_position(k: &BigUint) -> u32 {
-    debug_assert!(k > &BigUint::from(0u8));
-    (k.bits() as u32) - 1
+pub fn msb_position(k: &U256) -> u32 {
+    debug_assert!(*k != U256::ZERO);
+    k.bits_vartime() - 1
 }
 
 // =========================================================================
@@ -67,54 +91,41 @@ pub fn msb_position(k: &BigUint) -> u32 {
 // ~2*len_k Fermat inversions of the reference with two batched inversions.
 // =========================================================================
 
-use k256::elliptic_curve::ff::PrimeField as _;
 use k256::elliptic_curve::group::Curve as _;
-use k256::elliptic_curve::sec1::{FromEncodedPoint, ToEncodedPoint};
-use k256::{AffinePoint as K256Affine, EncodedPoint, FieldElement, ProjectivePoint, Scalar};
-
-/// 32 big-endian bytes of a value known to fit in 256 bits (left zero-padded).
-fn be32(v: &BigUint) -> [u8; 32] {
-    let b = v.to_bytes_be();
-    debug_assert!(b.len() <= 32, "value exceeds 256 bits");
-    let mut out = [0u8; 32];
-    out[32 - b.len()..].copy_from_slice(&b);
-    out
-}
-
-fn fe_from_biguint(v: &BigUint) -> FieldElement {
-    Option::from(FieldElement::from_bytes(&be32(v).into()))
-        .expect("ECSM: field element must be < p")
-}
-
-fn biguint_from_fe(f: &FieldElement) -> BigUint {
-    BigUint::from_bytes_be(&f.to_bytes())
-}
+use k256::elliptic_curve::sec1::{FromSec1Point, Sec1Point, ToSec1Point};
+use k256::{AffinePoint as K256Affine, ProjectivePoint, Scalar};
+use k256::elliptic_curve::PrimeField as _;
 
 fn to_k256_affine(a: &AffinePoint) -> K256Affine {
-    let ep = EncodedPoint::from_affine_coordinates(&be32(&a.x).into(), &be32(&a.y).into(), false);
-    Option::from(K256Affine::from_encoded_point(&ep)).expect("ECSM: point must be on the curve")
+    let x_bytes: [u8; 32] = a.x.to_be_bytes().into();
+    let y_bytes: [u8; 32] = a.y.to_be_bytes().into();
+    let ep = Sec1Point::<k256::Secp256k1>::from_affine_coordinates(
+        <&k256::elliptic_curve::FieldBytes<k256::Secp256k1>>::from(&x_bytes),
+        <&k256::elliptic_curve::FieldBytes<k256::Secp256k1>>::from(&y_bytes),
+        false,
+    );
+    Option::from(K256Affine::from_sec1_point(&ep)).expect("ECSM: point must be on the curve")
 }
 
 fn from_k256_affine(p: &K256Affine) -> AffinePoint {
-    let ep = p.to_encoded_point(false);
+    let ep = p.to_sec1_point(false);
     AffinePoint {
-        x: BigUint::from_bytes_be(ep.x().expect("ECSM: affine point has x")),
-        y: BigUint::from_bytes_be(ep.y().expect("ECSM: affine point has y")),
+        x: U256::from_be_slice(ep.x().expect("ECSM: affine point has x")),
+        y: U256::from_be_slice(ep.y().expect("ECSM: affine point has y")),
     }
 }
 
-/// Montgomery's batch inversion over `FieldElement`: one real inversion total.
-fn batch_invert(xs: &[FieldElement]) -> Vec<FieldElement> {
+/// Montgomery's batch inversion over `Fp`: one real inversion total.
+fn batch_invert(xs: &[Fp]) -> Vec<Fp> {
     let n = xs.len();
     let mut prefix = Vec::with_capacity(n);
-    let mut acc = FieldElement::ONE;
+    let mut acc = Fp::ONE;
     for x in xs {
         prefix.push(acc);
         acc *= *x;
     }
-    let mut inv =
-        Option::<FieldElement>::from(acc.invert()).expect("ECSM: batch denominator is nonzero");
-    let mut out = vec![FieldElement::ONE; n];
+    let mut inv = fp_invert(acc).expect("ECSM: batch denominator is nonzero");
+    let mut out = vec![Fp::ONE; n];
     for i in (0..n).rev() {
         out[i] = prefix[i] * inv;
         inv *= xs[i];
@@ -125,14 +136,14 @@ fn batch_invert(xs: &[FieldElement]) -> Vec<FieldElement> {
 /// The double-and-add schedule for `k`: one `(round, op, next_op)` per ECDAS row.
 /// Pure bit logic (data-independent of point values), identical control flow to
 /// the reference replay.
-fn schedule(k: &BigUint) -> Vec<(u8, u8, u8)> {
+fn schedule(k: &U256) -> Vec<(u8, u8, u8)> {
     let m = msb_position(k) as i64;
     let mut sched = Vec::new();
     let mut round: i64 = m - 1;
     let mut op: u8 = 0;
     while round >= 0 {
         let next_op = if op == 0 {
-            if k.bit(round as u64) { 1u8 } else { 0u8 }
+            if k.bit_vartime(round as u32) { 1u8 } else { 0u8 }
         } else {
             0u8
         };
@@ -150,8 +161,9 @@ fn schedule(k: &BigUint) -> Vec<(u8, u8, u8)> {
 /// Executor fast path: the x-coordinate of `k·g`, via k256's optimized scalar
 /// multiplication. Needs no step list or slopes, so it skips all witness work.
 /// `k` must be in `[1, N)` (guaranteed by `prepare`).
-pub fn scalar_mul_affine_x(k: &BigUint, g: &AffinePoint) -> BigUint {
-    let scalar = Option::<Scalar>::from(Scalar::from_repr(be32(k).into()))
+pub fn scalar_mul_affine_x(k: &U256, g: &AffinePoint) -> U256 {
+    let k_bytes: [u8; 32] = k.to_be_bytes().into();
+    let scalar = Option::<Scalar>::from(Scalar::from_repr(k_bytes.into()))
         .expect("ECSM: scalar k must be < N");
     let g_proj = ProjectivePoint::from(to_k256_affine(g));
     let r = (g_proj * scalar).to_affine();
@@ -162,7 +174,7 @@ pub fn scalar_mul_affine_x(k: &BigUint, g: &AffinePoint) -> BigUint {
 /// batched inversion. Produces the identical `StepPts` sequence as the BigUint
 /// reference replay (validated by the parity test in `tests::curve_tests`), but with
 /// two batched inversions instead of one per double/add step.
-pub fn replay_double_and_add(k: &BigUint, g: &AffinePoint) -> (Vec<StepPts>, AffinePoint) {
+pub fn replay_double_and_add(k: &U256, g: &AffinePoint) -> (Vec<StepPts>, AffinePoint) {
     let sched = schedule(k);
     if sched.is_empty() {
         return (Vec::new(), g.clone()); // k == 1: result is g, no steps
@@ -193,14 +205,14 @@ pub fn replay_double_and_add(k: &BigUint, g: &AffinePoint) -> (Vec<StepPts>, Aff
     let r_aff: Vec<AffinePoint> = affine[n..].iter().map(from_k256_affine).collect();
 
     // 3. batch-invert all slope denominators (add: xG-xA, double: 2yA).
-    let gx_fe = fe_from_biguint(&g.x);
-    let gy_fe = fe_from_biguint(&g.y);
-    let denoms: Vec<FieldElement> = (0..n)
+    let gx_fe = fe_from_u256(&g.x);
+    let gy_fe = fe_from_u256(&g.y);
+    let denoms: Vec<Fp> = (0..n)
         .map(|i| {
             if sched[i].1 == 1 {
-                gx_fe - fe_from_biguint(&a_aff[i].x)
+                gx_fe - fe_from_u256(&a_aff[i].x)
             } else {
-                let ya = fe_from_biguint(&a_aff[i].y);
+                let ya = fe_from_u256(&a_aff[i].y);
                 ya + ya
             }
         })
@@ -211,10 +223,10 @@ pub fn replay_double_and_add(k: &BigUint, g: &AffinePoint) -> (Vec<StepPts>, Aff
     let steps: Vec<StepPts> = (0..n)
         .map(|i| {
             let num = if sched[i].1 == 1 {
-                gy_fe - fe_from_biguint(&a_aff[i].y)
+                gy_fe - fe_from_u256(&a_aff[i].y)
             } else {
                 let x2 = {
-                    let xa = fe_from_biguint(&a_aff[i].x);
+                    let xa = fe_from_u256(&a_aff[i].x);
                     xa * xa
                 };
                 x2 + x2 + x2 // 3 xA^2
@@ -226,7 +238,7 @@ pub fn replay_double_and_add(k: &BigUint, g: &AffinePoint) -> (Vec<StepPts>, Aff
                 op: sched[i].1,
                 next_op: sched[i].2,
                 r: r_aff[i].clone(),
-                lambda: biguint_from_fe(&(num * inv_denoms[i])),
+                lambda: u256_from_fe(&(num * inv_denoms[i])),
             }
         })
         .collect();
diff --git a/crypto/ecsm/src/lib.rs b/crypto/ecsm/src/lib.rs
index 3a0a44dff..0ed8b8c05 100644
--- a/crypto/ecsm/src/lib.rs
+++ b/crypto/ecsm/src/lib.rs
@@ -10,7 +10,7 @@
 //!
 //! Curve point operations are delegated to the RustCrypto `k256` crate; witness generation
 //! replays the schedule in `k256` projective coordinates and batch-inverts the slope
-//! denominators, while `num-bigint` carries the coordinate/limb representation the trace
+//! denominators, while `crypto-bigint` carries the coordinate/limb representation the trace
 //! needs. All of this runs once per `ECALL`, so it is not performance critical.
 //!
 //! Curve: secp256k1, `y^2 = x^3 + 7 mod p`, `p = 2^256 - 2^32 - 977`, order `N`.
@@ -21,7 +21,7 @@ pub mod witness;
 #[cfg(test)]
 mod tests;
 
-use num_bigint::BigUint;
+use crypto_bigint::U256;
 
 pub use curve::{AffinePoint, recover_y_canonical, replay_double_and_add};
 pub use witness::{EcdasStep, EcsmWitness, compute_witness};
@@ -48,14 +48,22 @@ pub const R_BYTES: [u8; 33] = [
     0x02,
 ];
 
-/// The prime field modulus `p` as a `BigUint`.
-pub fn p() -> BigUint {
-    BigUint::from_bytes_le(&P_BYTES)
+/// The prime field modulus `p` as a `U256`.
+pub const P: U256 =
+    U256::from_be_hex("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F");
+
+/// The curve group order `N` as a `U256`.
+pub const N: U256 =
+    U256::from_be_hex("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141");
+
+/// The prime field modulus `p` as a `U256`.
+pub const fn p() -> U256 {
+    P
 }
 
-/// The curve order `N` as a `BigUint`.
-pub fn n() -> BigUint {
-    BigUint::from_bytes_le(&N_BYTES)
+/// The curve order `N` as a `U256`.
+pub const fn n() -> U256 {
+    N
 }
 
 /// Errors that prevent a sound ECSM witness from existing for the given inputs.
@@ -86,16 +94,6 @@ impl core::fmt::Display for EcsmError {
 
 impl std::error::Error for EcsmError {}
 
-/// Converts a `BigUint` to 32 little-endian bytes (zero-padded / truncated to 32).
-pub fn to_le_32(v: &BigUint) -> [u8; 32] {
-    debug_assert!(v.bits() <= 256, "to_le_32: value exceeds 256 bits");
-    let mut bytes = v.to_bytes_le();
-    bytes.resize(32, 0);
-    let mut out = [0u8; 32];
-    out.copy_from_slice(&bytes[..32]);
-    out
-}
-
 /// Validates the scalar and recovers the generator point from `(xG, k)`.
 ///
 /// Shared front-end for both entry points: checks `0 < k < N`, rebuilds `xG`, and recovers
@@ -103,15 +101,15 @@ pub fn to_le_32(v: &BigUint) -> [u8; 32] {
 pub(crate) fn prepare(
     k_le: &[u8; 32],
     xg_le: &[u8; 32],
-) -> Result<(BigUint, AffinePoint), EcsmError> {
-    let k = BigUint::from_bytes_le(k_le);
-    if k == BigUint::from(0u8) {
+) -> Result<(U256, AffinePoint), EcsmError> {
+    let k = U256::from_le_slice(k_le);
+    if k == U256::ZERO {
         return Err(EcsmError::ScalarIsZero);
     }
     if k >= n() {
         return Err(EcsmError::ScalarOutOfRange);
     }
-    let xg = BigUint::from_bytes_le(xg_le);
+    let xg = U256::from_le_slice(xg_le);
     if xg >= p() {
         return Err(EcsmError::CoordinateOutOfRange);
     }
@@ -124,5 +122,5 @@ pub(crate) fn prepare(
 /// to guest memory at `addr_xR`.
 pub fn scalar_mul_x(k_le: &[u8; 32], xg_le: &[u8; 32]) -> Result<[u8; 32], EcsmError> {
     let (k, g) = prepare(k_le, xg_le)?;
-    Ok(to_le_32(&curve::scalar_mul_affine_x(&k, &g)))
+    Ok(curve::scalar_mul_affine_x(&k, &g).to_le_bytes().into())
 }
diff --git a/crypto/ecsm/src/tests/curve_tests.rs b/crypto/ecsm/src/tests/curve_tests.rs
index 2065c658a..140a8f0ec 100644
--- a/crypto/ecsm/src/tests/curve_tests.rs
+++ b/crypto/ecsm/src/tests/curve_tests.rs
@@ -1,7 +1,7 @@
-//! Parity tests pinning the production k256 fast path to the BigUint reference
+//! Parity tests pinning the production k256 fast path to the U256 reference
 //! replay (relocated from `curve.rs::parity_tests`).
 
-use num_bigint::BigUint;
+use crypto_bigint::{NonZero, U256};
 
 use crate::curve::{AffinePoint, recover_y_canonical, replay_double_and_add, scalar_mul_affine_x};
 use crate::n;
@@ -9,26 +9,18 @@ use crate::tests::reference::replay_double_and_add_reference;
 
 /// secp256k1 generator (even y), via the canonical y recovery.
 fn generator() -> AffinePoint {
-    let gx = BigUint::parse_bytes(
-        b"79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798",
-        16,
-    )
-    .expect("valid generator x hex");
+    let gx = U256::from_be_hex("79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798");
     let gy = recover_y_canonical(&gx).expect("G on curve");
     AffinePoint { x: gx, y: gy }
 }
 
-fn be(hex: &[u8]) -> BigUint {
-    BigUint::parse_bytes(hex, 16).expect("valid hex literal")
-}
-
 /// The k256 fast path must produce byte-identical `StepPts` (points + λ) and the
-/// same final point as the BigUint reference, across small, structured, large and
+/// same final point as the U256 reference, across small, structured, large and
 /// near-order scalars. This pins the audited fast path to the spec-faithful reference.
 #[test]
 fn k256_replay_matches_reference() {
     let g = generator();
-    let mut scalars: Vec<BigUint> = (1u64..40).map(BigUint::from).collect();
+    let mut scalars: Vec<U256> = (1u64..40).map(U256::from).collect();
     for &kv in &[
         0xFFu64,
         0x101,
@@ -39,43 +31,44 @@ fn k256_replay_matches_reference() {
         123_456_789,
         u64::MAX,
     ] {
-        scalars.push(BigUint::from(kv));
+        scalars.push(U256::from(kv));
     }
     // large 256-bit scalars (must stay < N) and the order boundary
-    scalars.push(be(
-        b"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF",
+    scalars.push(U256::from_be_hex(
+        "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF",
     ));
-    scalars.push(be(
-        b"7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF5D576E7357A4501DDFE92F46681B20A0",
+    scalars.push(U256::from_be_hex(
+        "7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF5D576E7357A4501DDFE92F46681B20A0",
     ));
-    scalars.push(&n() / BigUint::from(2u8));
-    scalars.push(&n() - BigUint::from(1u8));
+    let two = NonZero::new(U256::from(2u32)).expect("2 != 0");
+    scalars.push(n().div_rem(&two).0);
+    scalars.push(n().wrapping_sub(&U256::ONE));
 
     for k in scalars {
         let (steps, result) = replay_double_and_add(&k, &g);
         let (steps_ref, result_ref) = replay_double_and_add_reference(&k, &g);
-        assert_eq!(result, result_ref, "final point mismatch for k = {k}");
-        assert_eq!(steps, steps_ref, "step list mismatch for k = {k}");
+        assert_eq!(result, result_ref, "final point mismatch for k = {k:?}");
+        assert_eq!(steps, steps_ref, "step list mismatch for k = {k:?}");
     }
 }
 
 /// The executor's fast path (`scalar_mul_affine_x`) and the prover's replay must agree
 /// on `x(k·G)`: the executor writes it to guest memory and the prover proves it, so any
-/// divergence would make a correct execution unprovable. They run through two distinct
-/// k256 entry points (native scalar-mul vs projective double-and-add), so pin them here.
+/// divergence would make a correct execution unprovable.
 #[test]
 fn executor_and_replay_agree_on_result_x() {
     let g = generator();
-    let mut scalars: Vec<BigUint> = (1u64..40).map(BigUint::from).collect();
+    let mut scalars: Vec<U256> = (1u64..40).map(U256::from).collect();
     for &kv in &[0xFFu64, 0xABCD, 1 << 20, 123_456_789, u64::MAX] {
-        scalars.push(BigUint::from(kv));
+        scalars.push(U256::from(kv));
     }
-    scalars.push(&n() / BigUint::from(2u8));
-    scalars.push(&n() - BigUint::from(1u8));
+    let two = NonZero::new(U256::from(2u32)).expect("2 != 0");
+    scalars.push(n().div_rem(&two).0);
+    scalars.push(n().wrapping_sub(&U256::ONE));
 
     for k in scalars {
         let (_steps, result) = replay_double_and_add(&k, &g);
         let exec_x = scalar_mul_affine_x(&k, &g);
-        assert_eq!(result.x, exec_x, "executor/replay x mismatch for k = {k}");
+        assert_eq!(result.x, exec_x, "executor/replay x mismatch for k = {k:?}");
     }
 }
diff --git a/crypto/ecsm/src/tests/lib_tests.rs b/crypto/ecsm/src/tests/lib_tests.rs
index 8819a00b6..5661b50a0 100644
--- a/crypto/ecsm/src/tests/lib_tests.rs
+++ b/crypto/ecsm/src/tests/lib_tests.rs
@@ -1,42 +1,38 @@
 //! Unit tests for the crate's public entry points (relocated from `lib.rs`).
 
-use num_bigint::BigUint;
+use crypto_bigint::{NonZero, U256, U512};
 
-use crate::{B, EcsmError, n, p, recover_y_canonical, scalar_mul_x, to_le_32};
-
-/// Parses a big-endian hex string into a `BigUint`.
-fn be_hex(s: &str) -> BigUint {
-    BigUint::parse_bytes(s.as_bytes(), 16).expect("valid hex literal")
-}
+use crate::{B, EcsmError, n, p, recover_y_canonical, scalar_mul_x};
 
 // secp256k1 generator G.
 const GX_HEX: &str = "79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798";
 const GY_HEX: &str = "483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8";
 
-fn gx() -> BigUint {
-    be_hex(GX_HEX)
+fn gx() -> U256 {
+    U256::from_be_hex(GX_HEX)
 }
 
 #[test]
 fn constants_match_known_secp256k1_values() {
     assert_eq!(
         p(),
-        be_hex("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F")
+        U256::from_be_hex("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F")
     );
     assert_eq!(
         n(),
-        be_hex("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141")
+        U256::from_be_hex("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141")
     );
     // p ≡ 3 mod 4 (a known secp256k1 property).
-    assert_eq!(&p() % 4u32, BigUint::from(3u8));
+    let four = NonZero::new(U256::from(4u32)).expect("4 != 0");
+    assert_eq!(p().div_rem(&four).1, U256::from(3u32));
 }
 
 #[test]
 fn generator_is_on_curve_and_y_is_canonical() {
     // Gy ends in 0xB8 (even), so the canonical (even) root is Gy itself.
     let y = recover_y_canonical(&gx()).expect("G is on the curve");
-    assert_eq!(y, be_hex(GY_HEX));
-    assert!(!y.bit(0), "canonical root must be even");
+    assert_eq!(y, U256::from_be_hex(GY_HEX));
+    assert!(!y.bit_vartime(0), "canonical root must be even");
 }
 
 #[test]
@@ -47,14 +43,25 @@ fn recover_y_handles_residues_and_non_residues() {
     let mut saw_none = false;
     let mut saw_some = false;
     for x in 1u32..40 {
-        let xb = BigUint::from(x);
+        let xb = U256::from(x);
         match recover_y_canonical(&xb) {
             Some(y) => {
                 saw_some = true;
-                assert!(!y.bit(0), "recovered y must be even");
-                // y^2 == x^3 + b mod p
-                let lhs = (&y * &y) % p();
-                let rhs = (&xb * &xb % p() * &xb + BigUint::from(B)) % p();
+                assert!(!y.bit_vartime(0), "recovered y must be even");
+                // y^2 == x^3 + b mod p  (using U512 for the products)
+                let (yy_lo, yy_hi) = y.widening_mul(&y);
+                let yy: U512 = yy_lo.concat(&yy_hi);
+                let mut p_le64 = [0u8; 64];
+                p_le64[..32].copy_from_slice(&p().to_le_bytes());
+                let p512 = NonZero::new(U512::from_le_slice(&p_le64)).expect("p != 0");
+                let lhs = yy.div_rem(&p512).1;
+                let (xx_lo, xx_hi) = xb.widening_mul(&xb);
+                let xx: U512 = xx_lo.concat(&xx_hi);
+                let x2_512 = xx.div_rem(&p512).1;
+                let x2 = U256::from_le_slice(&x2_512.to_le_bytes()[..32]);
+                let (x3_lo, x3_hi) = xb.widening_mul(&x2);
+                let x3: U512 = x3_lo.concat(&x3_hi);
+                let rhs = x3.wrapping_add(&U512::from(B)).div_rem(&p512).1;
                 assert_eq!(lhs, rhs);
             }
             None => saw_none = true,
@@ -68,51 +75,52 @@ fn recover_y_handles_residues_and_non_residues() {
 
 #[test]
 fn scalar_mul_one_is_identity() {
-    let k = to_le_32(&BigUint::from(1u8));
-    let xg = to_le_32(&gx());
+    let k: [u8; 32] = U256::ONE.to_le_bytes().into();
+    let xg: [u8; 32] = gx().to_le_bytes().into();
     assert_eq!(scalar_mul_x(&k, &xg).expect("1·G is valid"), xg);
 }
 
 #[test]
 fn scalar_mul_two_matches_known_2g() {
-    // x(2G) for secp256k1.
-    let expected = be_hex("C6047F9441ED7D6D3045406E95C07CD85C778E4B8CEF3CA7ABAC09B95C709EE5");
-    let k = to_le_32(&BigUint::from(2u8));
-    let xg = to_le_32(&gx());
+    let expected =
+        U256::from_be_hex("C6047F9441ED7D6D3045406E95C07CD85C778E4B8CEF3CA7ABAC09B95C709EE5");
+    let k: [u8; 32] = U256::from(2u32).to_le_bytes().into();
+    let xg: [u8; 32] = gx().to_le_bytes().into();
     assert_eq!(
         scalar_mul_x(&k, &xg).expect("2·G is valid"),
-        to_le_32(&expected)
+        <[u8; 32]>::from(expected.to_le_bytes())
     );
 }
 
 #[test]
 fn scalar_mul_three_matches_known_3g() {
-    let expected = be_hex("F9308A019258C31049344F85F89D5229B531C845836F99B08601F113BCE036F9");
-    let k = to_le_32(&BigUint::from(3u8));
-    let xg = to_le_32(&gx());
+    let expected =
+        U256::from_be_hex("F9308A019258C31049344F85F89D5229B531C845836F99B08601F113BCE036F9");
+    let k: [u8; 32] = U256::from(3u32).to_le_bytes().into();
+    let xg: [u8; 32] = gx().to_le_bytes().into();
     assert_eq!(
         scalar_mul_x(&k, &xg).expect("3·G is valid"),
-        to_le_32(&expected)
+        <[u8; 32]>::from(expected.to_le_bytes())
     );
 }
 
 #[test]
 fn scalar_mul_n_minus_one_shares_x_with_g() {
     // (N-1)·G = -G, which has the same x-coordinate as G.
-    let k = to_le_32(&(n() - BigUint::from(1u8)));
-    let xg = to_le_32(&gx());
+    let k: [u8; 32] = n().wrapping_sub(&U256::ONE).to_le_bytes().into();
+    let xg: [u8; 32] = gx().to_le_bytes().into();
     assert_eq!(scalar_mul_x(&k, &xg).expect("(N-1)·G is valid"), xg);
 }
 
 #[test]
 fn rejects_zero_and_out_of_range_scalars() {
-    let xg = to_le_32(&gx());
+    let xg: [u8; 32] = gx().to_le_bytes().into();
     assert_eq!(
-        scalar_mul_x(&to_le_32(&BigUint::from(0u8)), &xg),
+        scalar_mul_x(&U256::ZERO.to_le_bytes().into(), &xg),
         Err(EcsmError::ScalarIsZero)
     );
     assert_eq!(
-        scalar_mul_x(&to_le_32(&n()), &xg),
+        scalar_mul_x(&n().to_le_bytes().into(), &xg),
         Err(EcsmError::ScalarOutOfRange)
     );
 }
@@ -122,10 +130,10 @@ fn rejects_non_canonical_xg() {
     // xG = p and xG = p + 1 (the alias of x = 1) must be rejected, not
     // silently reduced: with k = 1 the input bytes would be echoed back as
     // xR, which the prover's xR < p range check cannot prove.
-    let k = to_le_32(&BigUint::from(1u8));
-    for delta in [0u8, 1] {
+    let k: [u8; 32] = U256::ONE.to_le_bytes().into();
+    for delta in [0u32, 1] {
         assert_eq!(
-            scalar_mul_x(&k, &to_le_32(&(p() + BigUint::from(delta)))),
+            scalar_mul_x(&k, &p().wrapping_add(&U256::from(delta)).to_le_bytes().into()),
             Err(EcsmError::CoordinateOutOfRange),
             "xG = p + {delta} must be rejected"
         );
@@ -133,7 +141,7 @@ fn rejects_non_canonical_xg() {
     // p − 1 is below the bound, so it must NOT hit the canonicity check
     // (it is not on the curve, which is a different error).
     assert_eq!(
-        scalar_mul_x(&k, &to_le_32(&(p() - BigUint::from(1u8)))),
+        scalar_mul_x(&k, &p().wrapping_sub(&U256::ONE).to_le_bytes().into()),
         Err(EcsmError::NotOnCurve)
     );
 }
diff --git a/crypto/ecsm/src/tests/reference.rs b/crypto/ecsm/src/tests/reference.rs
index 0621f9545..2d771958e 100644
--- a/crypto/ecsm/src/tests/reference.rs
+++ b/crypto/ecsm/src/tests/reference.rs
@@ -1,17 +1,17 @@
-//! Spec-faithful reference double-and-add over secp256k1 in affine `BigUint`
+//! Spec-faithful reference double-and-add over secp256k1 in affine `U256`
 //! arithmetic. Test-only: it cross-checks the production k256-backed
 //! [`replay_double_and_add`](crate::curve::replay_double_and_add) fast path,
 //! which the parity test pins to this reference.
 
-use num_bigint::BigUint;
+use crypto_bigint::U256;
 
 use crate::curve::{AffinePoint, StepPts, msb_position};
 use crate::tests::reference_field::Fp;
 
 /// `2·a` on the curve. Requires `a.y != 0` (always true on secp256k1).
 pub fn point_double(a: &AffinePoint) -> AffinePoint {
-    let x = Fp::new(a.x.clone());
-    let y = Fp::new(a.y.clone());
+    let x = Fp::new(a.x);
+    let y = Fp::new(a.y);
     // λ = 3x² / 2y
     let three_x2 = x.mul(&x).mul(&Fp::from_u64(3));
     let two_y = y.add(&y);
@@ -25,10 +25,10 @@ pub fn point_double(a: &AffinePoint) -> AffinePoint {
 
 /// `a + g` on the curve. Requires `a.x != g.x` (always true in the chip's add steps).
 pub fn point_add(a: &AffinePoint, g: &AffinePoint) -> AffinePoint {
-    let xa = Fp::new(a.x.clone());
-    let ya = Fp::new(a.y.clone());
-    let xg = Fp::new(g.x.clone());
-    let yg = Fp::new(g.y.clone());
+    let xa = Fp::new(a.x);
+    let ya = Fp::new(a.y);
+    let xg = Fp::new(g.x);
+    let yg = Fp::new(g.y);
     // λ = (yg - ya) / (xg - xa)
     let lambda = yg.sub(&ya).mul(&xg.sub(&xa).inv());
     // xr = λ² - xa - xg
@@ -38,14 +38,14 @@ pub fn point_add(a: &AffinePoint, g: &AffinePoint) -> AffinePoint {
     AffinePoint { x: xr.0, y: yr.0 }
 }
 
-/// Reference slope `lambda` for one step, computed in `BigUint` `F_p`.
+/// Reference slope `lambda` for one step, computed in `U256` `F_p`.
 /// Used by the reference replay.
-pub fn step_lambda(a: &AffinePoint, g: &AffinePoint, op: u8) -> BigUint {
-    let xa = Fp::new(a.x.clone());
-    let ya = Fp::new(a.y.clone());
+pub fn step_lambda(a: &AffinePoint, g: &AffinePoint, op: u8) -> U256 {
+    let xa = Fp::new(a.x);
+    let ya = Fp::new(a.y);
     if op == 1 {
-        let xg = Fp::new(g.x.clone());
-        let yg = Fp::new(g.y.clone());
+        let xg = Fp::new(g.x);
+        let yg = Fp::new(g.y);
         yg.sub(&ya).mul(&xg.sub(&xa).inv()).0
     } else {
         let three_x2 = xa.mul(&xa).mul(&Fp::from_u64(3));
@@ -64,7 +64,7 @@ pub fn step_lambda(a: &AffinePoint, g: &AffinePoint, op: u8) -> BigUint {
 /// the round. The MSB itself is represented by the initial `A = g` (consumed by ECSM via
 /// the `BIT[len_k]` interaction), so it is never processed as an add here.
 pub fn replay_double_and_add_reference(
-    k: &BigUint,
+    k: &U256,
     g: &AffinePoint,
 ) -> (Vec<StepPts>, AffinePoint) {
     let m = msb_position(k) as i64; // len_k
@@ -76,7 +76,7 @@ pub fn replay_double_and_add_reference(
     while round >= 0 {
         let (r, next_op) = if op == 0 {
             let r = point_double(&a);
-            let bit = if k.bit(round as u64) { 1u8 } else { 0u8 };
+            let bit = if k.bit_vartime(round as u32) { 1u8 } else { 0u8 };
             (r, bit)
         } else {
             let r = point_add(&a, g);
diff --git a/crypto/ecsm/src/tests/reference_field.rs b/crypto/ecsm/src/tests/reference_field.rs
index fb819f312..47606f376 100644
--- a/crypto/ecsm/src/tests/reference_field.rs
+++ b/crypto/ecsm/src/tests/reference_field.rs
@@ -1,45 +1,53 @@
 //! Arithmetic in the secp256k1 base field `F_p` with `p = 2^256 - 2^32 - 977`.
 //!
-//! Elements are stored as `BigUint` always reduced into `[0, p)`. This is test-only
+//! Elements are stored as `U256` always reduced into `[0, p)`. This is test-only
 //! reference arithmetic for cross-checking the k256-backed witness generator.
 
-use num_bigint::BigUint;
+use crypto_bigint::modular::ConstMontyForm;
+use crypto_bigint::{NonZero, U256};
 
 use crate::p;
 
+crypto_bigint::const_monty_params!(
+    Secp256k1Field,
+    U256,
+    "fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f"
+);
+
+type FpMonty = ConstMontyForm<Secp256k1Field, 4>;
+
 /// An element of the secp256k1 base field, kept reduced into `[0, p)`.
 #[derive(Clone, Debug, PartialEq, Eq)]
-pub(crate) struct Fp(pub(crate) BigUint);
+pub(crate) struct Fp(pub(crate) U256);
 
 impl Fp {
     /// Reduces an arbitrary value into the field.
-    pub(crate) fn new(v: BigUint) -> Self {
-        Fp(v % p())
+    pub(crate) fn new(v: U256) -> Self {
+        let nz = NonZero::new(p()).expect("p != 0");
+        let (_, r) = v.div_rem(&nz);
+        Fp(r)
     }
 
     pub(crate) fn from_u64(v: u64) -> Self {
-        Fp(BigUint::from(v) % p())
+        Fp::new(U256::from(v))
     }
 
-    /// `self + other mod p`. Both operands must already be reduced.
     pub(crate) fn add(&self, other: &Fp) -> Fp {
-        Fp((&self.0 + &other.0) % p())
+        Fp((FpMonty::new(&self.0) + FpMonty::new(&other.0)).retrieve())
     }
 
-    /// `self - other mod p`. Both operands must already be reduced.
     pub(crate) fn sub(&self, other: &Fp) -> Fp {
-        let t = &self.0 + p(); // in [p, 2p)
-        Fp((t - &other.0) % p())
+        Fp((FpMonty::new(&self.0) - FpMonty::new(&other.0)).retrieve())
     }
 
-    /// `self * other mod p`. Both operands must already be reduced.
     pub(crate) fn mul(&self, other: &Fp) -> Fp {
-        Fp((&self.0 * &other.0) % p())
+        Fp((FpMonty::new(&self.0) * FpMonty::new(&other.0)).retrieve())
     }
 
     /// Multiplicative inverse via Fermat's little theorem (`p` is prime): `self^(p-2)`.
     /// Returns zero for a zero input (which never occurs for valid curve arithmetic).
     pub(crate) fn inv(&self) -> Fp {
-        Fp(self.0.modpow(&(p() - BigUint::from(2u32)), &p()))
+        let exp = p().wrapping_sub(&U256::from(2u32));
+        Fp(FpMonty::new(&self.0).pow(&exp).retrieve())
     }
 }
diff --git a/crypto/ecsm/src/tests/witness_tests.rs b/crypto/ecsm/src/tests/witness_tests.rs
index f083a1536..ade12ee7f 100644
--- a/crypto/ecsm/src/tests/witness_tests.rs
+++ b/crypto/ecsm/src/tests/witness_tests.rs
@@ -1,17 +1,12 @@
 //! Unit tests for ECSM/ECDAS witness generation (relocated from `witness.rs`).
 
-use num_bigint::BigUint;
+use crypto_bigint::{NonZero, U256, U512, U1024};
 
 use crate::witness::compute_witness;
-use crate::{n, scalar_mul_x, to_le_32};
+use crate::{n, scalar_mul_x};
 
 fn gx_le() -> [u8; 32] {
-    let gx = BigUint::parse_bytes(
-        b"79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798",
-        16,
-    )
-    .expect("valid generator x hex");
-    to_le_32(&gx)
+    U256::from_be_hex("79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798").to_le_bytes().into()
 }
 
 /// Drives `compute_witness` (whose internal asserts validate every carry/quotient)
@@ -22,7 +17,7 @@ fn witness_is_self_consistent_for_many_scalars() {
     // small scalars plus bit patterns that exercise add/double scheduling
     let scalars: &[u64] = &[1, 2, 3, 4, 5, 7, 8, 0xFF, 0x101, 0xABCD, 0xFFFF, 123456789];
     for &kv in scalars {
-        let k = to_le_32(&BigUint::from(kv));
+        let k: [u8; 32] = U256::from(kv).to_le_bytes().into();
         let w = compute_witness(&k, &gx).expect("witness");
         // final point matches reference
         assert_eq!(
@@ -37,7 +32,7 @@ fn witness_is_self_consistent_for_many_scalars() {
 
 #[test]
 fn k_one_has_no_ecdas_steps() {
-    let w = compute_witness(&to_le_32(&BigUint::from(1u8)), &gx_le()).expect("witness");
+    let w = compute_witness(&U256::ONE.to_le_bytes().into(), &gx_le()).expect("witness");
     assert!(w.steps.is_empty());
     assert_eq!(w.x_r, w.x_g); // 1·G = G
     assert_eq!(w.len_k, 0);
@@ -46,7 +41,7 @@ fn k_one_has_no_ecdas_steps() {
 #[test]
 fn ecdas_step_schedule_matches_double_and_add() {
     // k = 5 = 0b101: double(G)->2G [bit1=0], double(2G)->4G [bit0=1], add(4G,G)->5G.
-    let w = compute_witness(&to_le_32(&BigUint::from(5u8)), &gx_le()).expect("witness");
+    let w = compute_witness(&U256::from(5u32).to_le_bytes().into(), &gx_le()).expect("witness");
     assert_eq!(w.len_k, 2);
     let ops: Vec<(u8, u8, u8)> = w.steps.iter().map(|s| (s.round, s.op, s.next_op)).collect();
     assert_eq!(ops, vec![(1, 0, 0), (0, 0, 1), (0, 1, 0)]);
@@ -55,7 +50,94 @@ fn ecdas_step_schedule_matches_double_and_add() {
 #[test]
 fn witness_works_near_curve_order() {
     let gx = gx_le();
-    let w = compute_witness(&to_le_32(&(n() - BigUint::from(1u8))), &gx).expect("witness");
+    let w = compute_witness(&n().wrapping_sub(&U256::ONE).to_le_bytes().into(), &gx).expect("witness");
     assert_eq!(w.x_r, gx); // (N-1)·G = -G shares x with G
     assert_eq!(w.len_k, 255);
 }
+
+/// Verifies the shifted_quotient identity: (pos - neg) is divisible by p,
+/// and the result q + 3p is positive and fits in 33 bytes.
+/// Uses the double case (2λyA - 3xA²) from k=5's first step as a concrete example.
+#[test]
+fn shifted_quotient_satisfies_division_identity() {
+    use crypto_bigint::{Int, NonZero, Uint};
+    use crate::{p, R_BYTES};
+    use crate::curve::{recover_y_canonical, replay_double_and_add};
+
+    let gx = U256::from_be_hex("79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798");
+    let g = crate::curve::AffinePoint { x: gx, y: recover_y_canonical(&gx).unwrap() };
+    let (steps, _) = replay_double_and_add(&U256::from(5u32), &g);
+    let s = &steps[0]; // first step: double (op=0)
+    assert_eq!(s.op, 0);
+
+    let mul512 = |a: &U256, b: &U256| -> U512 { let (lo, hi) = a.widening_mul(b); lo.concat(&hi) };
+    // Work in Uint<9> to avoid overflow on 2x/3x sums.
+    let pos: Uint<9> = { let t: Uint<9> = mul512(&s.lambda, &s.a.y).resize(); t.wrapping_add(&t) };
+    let neg: Uint<9> = { let t: Uint<9> = mul512(&s.a.x, &s.a.x).resize(); t.wrapping_add(&t).wrapping_add(&t) };
+
+    // p as Int<5>: p < 2^256 < 2^320, so positive as signed Int<5>.
+    let p_nz: NonZero<Int<5>> = NonZero::new(*p().resize::<5>().as_int()).unwrap();
+    // 3p as Uint<5> from R_BYTES.
+    let r_3p: Uint<5> = { let mut b = [0u8; 40]; b[..33].copy_from_slice(&R_BYTES); Uint::<5>::from_le_slice(&b) };
+
+    let num: crate::witness::I576 = pos.as_int().wrapping_sub(neg.as_int());
+    let (q_opt, r) = num.checked_div_rem(&p_nz);
+    assert_eq!(r, Int::<5>::ZERO, "2λyA - 3xA² must be divisible by p");
+    let q: crate::witness::I576 = q_opt.unwrap();
+    // Final result q + 3p must be positive and fit in 33 bytes.
+    let result: crate::witness::I576 = q.wrapping_add(r_3p.resize::<9>().as_int());
+    let (result_abs, result_is_neg) = result.abs_sign();
+    assert!(!bool::from(result_is_neg), "final quotient must be positive");
+    let result_bytes = result_abs.to_le_bytes();
+    assert!(result_bytes[33..].iter().all(|&b| b == 0), "quotient must fit in 33 bytes");
+}
+
+/// Cross-checks the shifted_quotient result for the lambda double case.
+///
+/// Verifies that `q * p == 3p² + 2λ*yA - 3*xA²` exactly, confirming both
+/// divisibility and that the quotient round-trips correctly through U1024 division.
+#[test]
+fn shifted_quotient_double_matches_identity() {
+    use crate::P_BYTES;
+    use crate::curve::{recover_y_canonical, replay_double_and_add};
+
+    let gx = U256::from_be_hex("79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798");
+    let g = crate::curve::AffinePoint { x: gx, y: recover_y_canonical(&gx).unwrap() };
+    let (steps, _) = replay_double_and_add(&U256::from(5u32), &g);
+    let s = &steps[0]; // first step: double (op=0)
+    assert_eq!(s.op, 0);
+
+    let mut p_le128 = [0u8; 128];
+    p_le128[..32].copy_from_slice(&P_BYTES);
+    let p1024 = NonZero::new(U1024::from_le_slice(&p_le128)).unwrap();
+
+    let mul512 = |a: &U256, b: &U256| -> U512 { let (lo, hi) = a.widening_mul(b); lo.concat(&hi) };
+    let widen = |v: U512| -> U1024 { let mut b = [0u8; 128]; b[..64].copy_from_slice(&v.to_le_bytes()); U1024::from_le_slice(&b) };
+
+    let pos = { let t = mul512(&s.lambda, &s.a.y); t.wrapping_add(&t) }; // 2λ*yA
+    let neg = { let t = mul512(&s.a.x, &s.a.x); t.wrapping_add(&t).wrapping_add(&t) }; // 3xA²
+
+    let (p_sq_lo, p_sq_hi) = crate::p().widening_mul(&crate::p());
+    let p_sq: U1024 = {
+        let mut b = [0u8; 128];
+        let lo_bytes: [u8; 32] = p_sq_lo.to_le_bytes().into();
+        let hi_bytes: [u8; 32] = p_sq_hi.to_le_bytes().into();
+        let mut lo64 = [0u8; 64];
+        lo64[..32].copy_from_slice(&lo_bytes);
+        lo64[32..64].copy_from_slice(&hi_bytes);
+        b[..64].copy_from_slice(&lo64);
+        U1024::from_le_slice(&b)
+    };
+    let r_3p_sq = p_sq.wrapping_add(&p_sq).wrapping_add(&p_sq);
+
+    let total = r_3p_sq.wrapping_add(&widen(pos)).wrapping_sub(&widen(neg));
+    let (q, r) = total.div_rem(&p1024);
+    assert_eq!(r, U1024::ZERO, "3p² + 2λyA - 3xA² must be divisible by p");
+
+    // q * p must equal total exactly.
+    let q_bytes = q.to_le_bytes();
+    assert!(q_bytes[64..].iter().all(|&b| b == 0), "quotient must fit in U512");
+    let q512 = U512::from_le_slice(&q_bytes[..64]);
+    let q512_bytes = q512.to_le_bytes();
+    assert!(q512_bytes[33..].iter().all(|&b| b == 0), "quotient must fit in 33 bytes");
+}
diff --git a/crypto/ecsm/src/witness.rs b/crypto/ecsm/src/witness.rs
index 9322cba7e..43499dfe4 100644
--- a/crypto/ecsm/src/witness.rs
+++ b/crypto/ecsm/src/witness.rs
@@ -16,11 +16,41 @@
 //! negative; the chip range-checks `c_i + offset` as a halfword. We reproduce the exact
 //! integer recurrence here; the prover converts the resulting integers to field elements.
 
-use num_bigint::{BigInt, BigUint};
-use num_traits::{Signed, Zero};
+use crypto_bigint::{Int, NonZero, U256, U512, Uint};
+
+// 9 limbs = 576 bits — just wide enough to hold pos or neg (each < p² < 2^512)
+// and their signed difference (< 2^513 in magnitude).
+pub(crate) type I576 = Int<9>;
 
 use crate::curve::{StepPts, replay_double_and_add};
-use crate::{B, EcsmError, P_BYTES, R_BYTES, n, p, prepare, to_le_32};
+use crate::{B, EcsmError, N, P, P_BYTES, R_BYTES, prepare};
+
+/// `p` as a `NonZero<U512>` — divisor for the ECSM `x2` quotient (`xG² mod p`).
+const P_512: NonZero<U512> = NonZero::<U512>::new_unwrap(P.resize::<8>());
+
+/// `p` widened to a 320-bit `Uint<5>` — the `r_offset` for the ECSM `yG` quotient.
+const P_5: Uint<5> = P.resize::<5>();
+
+/// `p` as a `NonZero<Int<5>>` (320-bit signed) — divisor for every shifted quotient.
+/// `p < 2^256 < 2^319`, so it is positive as a signed `Int<5>`.
+const P_INT5: NonZero<Int<5>> = NonZero::<Int<5>>::new_unwrap(*P_5.as_int());
+
+/// `3p` as a 320-bit `Uint<5>` — the `r_offset` for every ECDAS step quotient.
+/// Compile-time constant; equals `R_BYTES` interpreted little-endian.
+const R_3P: Uint<5> = P_5.wrapping_add(&P_5).wrapping_add(&P_5);
+
+/// `p` zero-extended to 64 limb-bytes — the shared modulus operand in carry builders.
+const PP: [i32; 64] = ext64(&P_BYTES);
+
+/// `3p` (= `R_BYTES`) zero-extended to 64 limb-bytes — the `r` operand in step carries.
+const R_EXT: [i32; 64] = ext64(&R_BYTES);
+
+/// Curve coefficient `b` zero-extended to 64 limb-bytes.
+const B_EXT: [i32; 64] = ext64(&{
+    let mut a = [0u8; 32];
+    a[0] = B as u8;
+    a
+});
 
 /// Full ECSM-chip witness for one scalar multiplication (one ECSM row).
 #[derive(Debug, Clone)]
@@ -80,18 +110,25 @@ pub struct EcdasStep {
 // Limb helpers
 // =========================================================================
 
-/// Zero-extends a little-endian byte slice (≤ 64 bytes) to 64 `i128` limbs.
-fn ext64(bytes: &[u8]) -> [i128; 64] {
-    let mut a = [0i128; 64];
-    for (i, &b) in bytes.iter().enumerate() {
-        a[i] = b as i128;
+/// Zero-extends a little-endian byte slice (≤ 64 bytes) to 64 `i32` limbs.
+///
+/// `i32` is ample: every per-limb term is a sum of ≤ 64 byte products with small
+/// integer coefficients, so its magnitude stays below `~2^25` — comfortably within
+/// `i32`'s `2^31` range. Keeping these 64-element arrays 4-wide rather than 16-wide
+/// (`i128`) cuts the working set ~4× so `build_step`'s ~10 live limb arrays stay in cache.
+const fn ext64(bytes: &[u8]) -> [i32; 64] {
+    let mut a = [0i32; 64];
+    let mut i = 0;
+    while i < bytes.len() {
+        a[i] = bytes[i] as i32;
+        i += 1;
     }
     a
 }
 
-/// Convolution `Σ_{j=0}^{i} a[j]·b[i-j]`.
-fn conv(a: &[i128; 64], b: &[i128; 64], i: usize) -> i128 {
-    let mut s = 0i128;
+/// Convolution `Σ_{j=0}^{i} a[j]·b[i-j]`. Bounded by `64·255² < 2^22`, so it fits `i32`.
+fn conv(a: &[i32; 64], b: &[i32; 64], i: usize) -> i32 {
+    let mut s = 0i32;
     for j in 0..=i {
         s += a[j] * b[i - j];
     }
@@ -103,11 +140,11 @@ fn conv(a: &[i128; 64], b: &[i128; 64], i: usize) -> i128 {
 ///
 /// These asserts catch any transcription error in the `terms` builders: for valid inputs
 /// the relation `LHS − RHS = 0` holds exactly, so every partial sum is divisible by 256.
-fn limb_carries(relation: &str, terms: &[i128; 64]) -> [i64; 64] {
+fn limb_carries(relation: &str, terms: &[i32; 64]) -> [i64; 64] {
     let mut c = [0i64; 64];
-    let mut carry: i128 = 0;
+    let mut carry: i64 = 0;
     for i in 0..64 {
-        let s = carry + terms[i];
+        let s = carry + terms[i] as i64;
         assert!(
             (s & 0xFF) == 0,
             "ECSM witness {relation}: limb {i} not divisible by 256"
@@ -115,7 +152,7 @@ fn limb_carries(relation: &str, terms: &[i128; 64]) -> [i64; 64] {
         // `s` is a multiple of 256 (asserted), so the arithmetic shift equals the
         // truncating division `s / 256` even when `s` is negative.
         carry = s >> 8;
-        c[i] = carry as i64;
+        c[i] = carry;
     }
     assert!(
         c[63] == 0,
@@ -129,8 +166,8 @@ fn limb_carries(relation: &str, terms: &[i128; 64]) -> [i64; 64] {
 // =========================================================================
 
 /// ECSM `x2` relation: `xG^2 − x2 − q0·p = 0`.
-fn carries_x2(xg: &[i128; 64], x2: &[i128; 64], q0: &[i128; 64], pp: &[i128; 64]) -> [i64; 64] {
-    let mut terms = [0i128; 64];
+fn carries_x2(xg: &[i32; 64], x2: &[i32; 64], q0: &[i32; 64], pp: &[i32; 64]) -> [i64; 64] {
+    let mut terms = [0i32; 64];
     for i in 0..64 {
         terms[i] = conv(xg, xg, i) - x2[i] - conv(q0, pp, i);
     }
@@ -139,14 +176,14 @@ fn carries_x2(xg: &[i128; 64], x2: &[i128; 64], q0: &[i128; 64], pp: &[i128; 64]
 
 /// ECSM `yG` relation: `yG^2 + p^2 − xG·x2 − b − q1·p = 0`.
 fn carries_yg(
-    yg: &[i128; 64],
-    pp: &[i128; 64],
-    x2: &[i128; 64],
-    xg: &[i128; 64],
-    q1: &[i128; 64],
-    b: &[i128; 64],
+    yg: &[i32; 64],
+    pp: &[i32; 64],
+    x2: &[i32; 64],
+    xg: &[i32; 64],
+    q1: &[i32; 64],
+    b: &[i32; 64],
 ) -> [i64; 64] {
-    let mut terms = [0i128; 64];
+    let mut terms = [0i32; 64];
     for i in 0..64 {
         terms[i] = conv(yg, yg, i) + conv(pp, pp, i) - conv(x2, xg, i) - conv(q1, pp, i) - b[i];
     }
@@ -158,16 +195,16 @@ fn carries_yg(
 #[allow(clippy::too_many_arguments)]
 fn carries_lambda(
     op: u8,
-    lam: &[i128; 64],
-    xg: &[i128; 64],
-    xa: &[i128; 64],
-    ya: &[i128; 64],
-    yg: &[i128; 64],
-    r: &[i128; 64],
-    pp: &[i128; 64],
-    q0: &[i128; 64],
+    lam: &[i32; 64],
+    xg: &[i32; 64],
+    xa: &[i32; 64],
+    ya: &[i32; 64],
+    yg: &[i32; 64],
+    r: &[i32; 64],
+    pp: &[i32; 64],
+    q0: &[i32; 64],
 ) -> [i64; 64] {
-    let mut terms = [0i128; 64];
+    let mut terms = [0i32; 64];
     for i in 0..64 {
         let branch = if op == 1 {
             // op · (Σ_j λ_j (xG_{i-j} − xA_{i-j}) + (yA_i − yG_i))
@@ -178,7 +215,7 @@ fn carries_lambda(
             s
         } else {
             // (1−op) · Σ_j (2 λ_j yA_{i-j} − 3 xA_j xA_{i-j})
-            let mut s = 0i128;
+            let mut s = 0i32;
             for j in 0..=i {
                 s += 2 * lam[j] * ya[i - j] - 3 * xa[j] * xa[i - j];
             }
@@ -194,15 +231,15 @@ fn carries_lambda(
 #[allow(clippy::too_many_arguments)]
 fn carries_xr(
     op: u8,
-    lam: &[i128; 64],
-    xa: &[i128; 64],
-    xg: &[i128; 64],
-    xr: &[i128; 64],
-    r: &[i128; 64],
-    pp: &[i128; 64],
-    q1: &[i128; 64],
+    lam: &[i32; 64],
+    xa: &[i32; 64],
+    xg: &[i32; 64],
+    xr: &[i32; 64],
+    r: &[i32; 64],
+    pp: &[i32; 64],
+    q1: &[i32; 64],
 ) -> [i64; 64] {
-    let mut terms = [0i128; 64];
+    let mut terms = [0i32; 64];
     for i in 0..64 {
         let op_term = if op == 0 { xa[i] - xg[i] } else { 0 };
         terms[i] =
@@ -214,18 +251,18 @@ fn carries_xr(
 /// ECDAS `yR` relation: `λ(xA − xR) − yA − yR + (r − q2)p = 0`.
 #[allow(clippy::too_many_arguments)]
 fn carries_yr(
-    lam: &[i128; 64],
-    xa: &[i128; 64],
-    xr: &[i128; 64],
-    ya: &[i128; 64],
-    yr: &[i128; 64],
-    r: &[i128; 64],
-    pp: &[i128; 64],
-    q2: &[i128; 64],
+    lam: &[i32; 64],
+    xa: &[i32; 64],
+    xr: &[i32; 64],
+    ya: &[i32; 64],
+    yr: &[i32; 64],
+    r: &[i32; 64],
+    pp: &[i32; 64],
+    q2: &[i32; 64],
 ) -> [i64; 64] {
-    let mut terms = [0i128; 64];
+    let mut terms = [0i32; 64];
     for i in 0..64 {
-        let mut conv_lam = 0i128;
+        let mut conv_lam = 0i32;
         for j in 0..=i {
             conv_lam += lam[j] * (xa[i - j] - xr[i - j]);
         }
@@ -235,37 +272,45 @@ fn carries_yr(
 }
 
 // =========================================================================
-// BigInt helpers
+// Shifted quotient
 // =========================================================================
 
-/// Little-endian 33 bytes of a non-negative value that fits in 264 bits.
-fn to_le_33(relation: &str, v: &BigUint) -> [u8; 33] {
-    let mut bytes = v.to_bytes_le();
+/// Computes `r_offset + (pos - neg) / p` where `pos - neg` is divisible by `p`,
+/// returning the result as 33 little-endian bytes (the quotient's witness layout).
+///
+/// `pos` and `neg` are products of 256-bit values (each < p²) widened to `Uint<9>`.
+/// Uses signed 576-bit (`Int<9>`) arithmetic: `pos - neg` fits in 513 bits, so 576
+/// bits is sufficient. Divides by `p` as a signed 256-bit value, then adds `r_offset`
+/// (= p or 3p) to produce a positive ~264-bit result, asserted to fit in 33 bytes.
+fn shifted_quotient(
+    relation: &str,
+    pos: Uint<9>,
+    neg: Uint<9>,
+    p_nz: &NonZero<Int<5>>,
+    r_offset: Uint<5>,  // p or 3p; both fit in 320 bits
+) -> [u8; 33] {
+    let num: I576 = pos.as_int().wrapping_sub(neg.as_int());
+    // Witness generation is variable-time throughout (see the `bit_vartime` schedule),
+    // so use the faster variable-time division.
+    let (q_opt, r) = num.checked_div_rem_vartime(p_nz);
+    let q: I576 = q_opt.expect("divisor is nonzero");
+    assert!(r == Int::<5>::ZERO, "ECSM witness {relation}: numerator not divisible by p");
+    // q ∈ [-2, 2]; add r_offset (p or 3p) widened to I576 to get a positive result.
+    let offset: I576 = *r_offset.resize::<9>().as_int();
+    let result: I576 = q.wrapping_add(&offset);
+    // Result is positive and fits in 33 bytes (≤ 3p + 2 < 2^265).
+    let (abs, is_neg) = result.abs_sign();
+    assert!(!bool::from(is_neg), "ECSM witness {relation}: quotient unexpectedly negative");
+    let bytes = abs.to_le_bytes(); // [u8; 72]
     assert!(
-        bytes.len() <= 33,
+        bytes[33..].iter().all(|&b| b == 0),
         "ECSM witness {relation}: quotient exceeds 33 bytes"
     );
-    bytes.resize(33, 0);
     let mut out = [0u8; 33];
     out.copy_from_slice(&bytes[..33]);
     out
 }
 
-/// `r + numerator / p`, where `numerator` must be divisible by `p`. Asserts divisibility
-/// and that the result is non-negative (guaranteed by the spec quotient ranges).
-fn shifted_quotient(relation: &str, numerator: &BigInt, p_big: &BigInt, r_big: &BigInt) -> BigUint {
-    assert!(
-        (numerator % p_big).is_zero(),
-        "ECSM witness {relation}: numerator not divisible by p"
-    );
-    let q = r_big + numerator / p_big;
-    assert!(
-        !q.is_negative(),
-        "ECSM witness {relation}: quotient unexpectedly negative"
-    );
-    q.to_biguint().expect("non-negative")
-}
-
 // =========================================================================
 // Witness construction
 // =========================================================================
@@ -275,57 +320,49 @@ fn shifted_quotient(relation: &str, numerator: &BigInt, p_big: &BigInt, r_big: &
 pub fn compute_witness(k_le: &[u8; 32], xg_le: &[u8; 32]) -> Result<EcsmWitness, EcsmError> {
     let (k, g) = prepare(k_le, xg_le)?;
 
-    let p_big = BigInt::from(p());
-    let r_big = BigInt::from(BigUint::from_bytes_le(&R_BYTES)); // r = 3p
-
-    // Common zero-extended constants.
-    let pp = ext64(&P_BYTES);
-    let r_ext = ext64(&R_BYTES);
-    let b_bytes = {
-        let mut a = [0u8; 32];
-        a[0] = B as u8;
-        a
-    };
-    let b_ext = ext64(&b_bytes);
-
     // --- ECSM: x2 = xG^2 mod p, quotient q0 ---
-    let xg_sq = &g.x * &g.x;
-    let x2_big = &xg_sq % p();
-    let q0_big = (&xg_sq - &x2_big) / p(); // exact
-    let xg_b = to_le_32(&g.x);
-    let yg_b = to_le_32(&g.y);
-    let x2_b = to_le_32(&x2_big);
-    let q0_b = to_le_32(&q0_big);
-    let c0 = carries_x2(&ext64(&xg_b), &ext64(&x2_b), &ext64(&q0_b), &pp);
+    // xg_sq = xG * xG as U512 (widening multiply).
+    let (xg_sq_lo, xg_sq_hi) = g.x.widening_mul(&g.x);
+    let xg_sq = xg_sq_lo.concat(&xg_sq_hi);
+    let (q0_512, x2_512) = xg_sq.div_rem(&P_512);
+    let x2 = U256::from_le_slice(&x2_512.to_le_bytes()[..32]);
+    let q0 = U256::from_le_slice(&q0_512.to_le_bytes()[..32]);
+    let xg_b: [u8; 32] = g.x.to_le_bytes().into();
+    let yg_b: [u8; 32] = g.y.to_le_bytes().into();
+    let x2_b: [u8; 32] = x2.to_le_bytes().into();
+    let q0_b: [u8; 32] = q0.to_le_bytes().into();
+    let c0 = carries_x2(&ext64(&xg_b), &ext64(&x2_b), &ext64(&q0_b), &PP);
 
     // --- ECSM: yG relation, quotient q1 = (yG^2 − xG·x2 − b)/p + p ---
-    let num_yg = BigInt::from(&g.y * &g.y) - BigInt::from(&g.x * &x2_big) - BigInt::from(B);
-    let q1_big = shifted_quotient("yG", &num_yg, &p_big, &p_big);
-    let q1_b = to_le_33("yG", &q1_big);
+    // pos = yG^2, neg = xG·x2 + b. r_offset = p.
+    let (yg_sq_lo, yg_sq_hi) = g.y.widening_mul(&g.y);
+    let yg_sq: Uint<9> = yg_sq_lo.concat(&yg_sq_hi).resize();
+    let (xg_x2_lo, xg_x2_hi) = g.x.widening_mul(&x2);
+    let xg_x2: Uint<9> = xg_x2_lo.concat(&xg_x2_hi).resize();
+    let neg_yg: Uint<9> = xg_x2.wrapping_add(&Uint::<9>::from(B));
+    let q1_b = shifted_quotient("yG", yg_sq, neg_yg, &P_INT5, P_5);
     let c1 = carries_yg(
         &ext64(&yg_b),
-        &pp,
+        &PP,
         &ext64(&x2_b),
         &ext64(&xg_b),
         &ext64(&q1_b),
-        &b_ext,
+        &B_EXT,
     );
 
     // --- scalar range data ---
     let len_k = crate::curve::msb_position(&k) as u8;
-    let two_256 = BigUint::from(1u8) << 256u32;
-    let k_sub_n = to_le_32(&((&two_256 + &k) - n())); // k < N
+    // k_sub_n = (k - N) mod 2^256; since k < N this wraps: 2^256 + k - N.
+    let k_sub_n: [u8; 32] = k.wrapping_sub(&N).to_le_bytes().into();
 
     // --- double/add replay ---
     let (steps_pts, result) = replay_double_and_add(&k, &g);
-    let x_r = to_le_32(&result.x);
-    let y_r = to_le_32(&result.y);
-    let x_r_sub_p = to_le_32(&((&two_256 + &result.x) - p()));
+    let x_r: [u8; 32] = result.x.to_le_bytes().into();
+    let y_r: [u8; 32] = result.y.to_le_bytes().into();
+    // x_r_sub_p = (xR - p) mod 2^256; since xR < p this wraps: 2^256 + xR - p.
+    let x_r_sub_p: [u8; 32] = result.x.wrapping_sub(&P).to_le_bytes().into();
 
-    let steps = steps_pts
-        .iter()
-        .map(|s| build_step(s, &p_big, &r_big, &r_ext, &pp))
-        .collect();
+    let steps = steps_pts.iter().map(build_step).collect();
 
     Ok(EcsmWitness {
         x_g: xg_b,
@@ -346,21 +383,16 @@ pub fn compute_witness(k_le: &[u8; 32], xg_le: &[u8; 32]) -> Result<EcsmWitness,
 }
 
 /// Builds one ECDAS step witness (λ, quotients, carries) from a point-level step.
-fn build_step(
-    s: &StepPts,
-    p_big: &BigInt,
-    r_big: &BigInt,
-    r_ext: &[i128; 64],
-    pp: &[i128; 64],
-) -> EcdasStep {
+/// All modulus operands (`P_INT5`, `R_3P`, `R_EXT`, `PP`) are compile-time constants.
+fn build_step(s: &StepPts) -> EcdasStep {
     // λ is precomputed (batched) during the double-and-add replay.
-    let lam_b = to_le_32(&s.lambda);
-    let xa_b = to_le_32(&s.a.x);
-    let ya_b = to_le_32(&s.a.y);
-    let xg_b = to_le_32(&s.g.x);
-    let yg_b = to_le_32(&s.g.y);
-    let xr_b = to_le_32(&s.r.x);
-    let yr_b = to_le_32(&s.r.y);
+    let lam_b: [u8; 32] = s.lambda.to_le_bytes().into();
+    let xa_b: [u8; 32] = s.a.x.to_le_bytes().into();
+    let ya_b: [u8; 32] = s.a.y.to_le_bytes().into();
+    let xg_b: [u8; 32] = s.g.x.to_le_bytes().into();
+    let yg_b: [u8; 32] = s.g.y.to_le_bytes().into();
+    let xr_b: [u8; 32] = s.r.x.to_le_bytes().into();
+    let yr_b: [u8; 32] = s.r.y.to_le_bytes().into();
 
     let (lam_ext, xa_ext, ya_ext, xg_ext, yg_ext, xr_ext, yr_ext) = (
         ext64(&lam_b),
@@ -372,35 +404,46 @@ fn build_step(
         ext64(&yr_b),
     );
 
-    let lam_i = BigInt::from(s.lambda.clone());
-    let xa_i = BigInt::from(s.a.x.clone());
-    let ya_i = BigInt::from(s.a.y.clone());
-    let xg_i = BigInt::from(s.g.x.clone());
-    let yg_i = BigInt::from(s.g.y.clone());
-    let xr_i = BigInt::from(s.r.x.clone());
-    let yr_i = BigInt::from(s.r.y.clone());
-
-    // q0: λ relation numerator.
-    let num0 = if s.op == 1 {
-        (&xg_i - &xa_i) * &lam_i - &yg_i + &ya_i
+    // Multiply two U256 values, result as U512 (no overflow: product < p² < 2^512).
+    let mul512 = |a: &U256, b: &U256| -> U512 {
+        let (lo, hi) = a.widening_mul(b);
+        lo.concat(&hi)
+    };
+    // Widen a U256 or U512 to Uint<9> = 576 bits, avoiding overflow on 2x/3x sums.
+    let w9_u256 = |v: &U256| -> Uint<9> { v.resize::<9>() };
+    let w9_u512 = |v: U512| -> Uint<9> { v.resize::<9>() };
+
+    // q0: λ relation.
+    //   add: pos = λ*xG + yA,  neg = λ*xA + yG       (each < p²+p < 2^513)
+    //   dbl: pos = 2*λ*yA,     neg = 3*xA²            (each < 2p² < 2^513)
+    // Work in Uint<9> (576 bits) so 2x/3x multiplications don't overflow.
+    let (pos0, neg0) = if s.op == 1 {
+        let lam_xg = w9_u512(mul512(&s.lambda, &s.g.x));
+        let lam_xa = w9_u512(mul512(&s.lambda, &s.a.x));
+        (lam_xg.wrapping_add(&w9_u256(&s.a.y)), lam_xa.wrapping_add(&w9_u256(&s.g.y)))
     } else {
-        2 * &lam_i * &ya_i - 3 * &xa_i * &xa_i
+        let lam_ya = w9_u512(mul512(&s.lambda, &s.a.y));
+        let xa_sq  = w9_u512(mul512(&s.a.x, &s.a.x));
+        (lam_ya.wrapping_add(&lam_ya), xa_sq.wrapping_add(&xa_sq).wrapping_add(&xa_sq))
     };
-    let q0_big = shifted_quotient("lambda", &num0, p_big, r_big);
-    let q0_b = to_le_33("lambda", &q0_big);
-
-    // q1: xR relation numerator  λ² − xA − xG − xR + (1−op)(xG − xA).
-    let mut num1 = &lam_i * &lam_i - &xa_i - &xg_i - &xr_i;
-    if s.op == 0 {
-        num1 += &xg_i - &xa_i;
-    }
-    let q1_big = shifted_quotient("xR", &num1, p_big, r_big);
-    let q1_b = to_le_33("xR", &q1_big);
+    let q0_b = shifted_quotient("lambda", pos0, neg0, &P_INT5, R_3P);
+
+    // q1: xR relation.
+    //   add: pos = λ²,  neg = xA + xG + xR             (neg < 3p, no overflow)
+    //   dbl: pos = λ²,  neg = 2*xA + xR
+    let lam_sq = w9_u512(mul512(&s.lambda, &s.lambda));
+    let (pos1, neg1) = if s.op == 1 {
+        (lam_sq, w9_u256(&s.a.x).wrapping_add(&w9_u256(&s.g.x)).wrapping_add(&w9_u256(&s.r.x)))
+    } else {
+        (lam_sq, w9_u256(&s.a.x).wrapping_add(&w9_u256(&s.a.x)).wrapping_add(&w9_u256(&s.r.x)))
+    };
+    let q1_b = shifted_quotient("xR", pos1, neg1, &P_INT5, R_3P);
 
-    // q2: yR relation numerator  λ(xA − xR) − yA − yR.
-    let num2 = &lam_i * (&xa_i - &xr_i) - &ya_i - &yr_i;
-    let q2_big = shifted_quotient("yR", &num2, p_big, r_big);
-    let q2_b = to_le_33("yR", &q2_big);
+    // q2: yR relation — pos = λ*xA,  neg = λ*xR + yA + yR
+    let lam_xa2 = w9_u512(mul512(&s.lambda, &s.a.x));
+    let lam_xr  = w9_u512(mul512(&s.lambda, &s.r.x));
+    let neg2 = lam_xr.wrapping_add(&w9_u256(&s.a.y)).wrapping_add(&w9_u256(&s.r.y));
+    let q2_b = shifted_quotient("yR", lam_xa2, neg2, &P_INT5, R_3P);
 
     let c0 = carries_lambda(
         s.op,
@@ -409,8 +452,8 @@ fn build_step(
         &xa_ext,
         &ya_ext,
         &yg_ext,
-        r_ext,
-        pp,
+        &R_EXT,
+        &PP,
         &ext64(&q0_b),
     );
     let c1 = carries_xr(
@@ -419,8 +462,8 @@ fn build_step(
         &xa_ext,
         &xg_ext,
         &xr_ext,
-        r_ext,
-        pp,
+        &R_EXT,
+        &PP,
         &ext64(&q1_b),
     );
     let c2 = carries_yr(
@@ -429,8 +472,8 @@ fn build_step(
         &xr_ext,
         &ya_ext,
         &yr_ext,
-        r_ext,
-        pp,
+        &R_EXT,
+        &PP,
         &ext64(&q2_b),
     );
 
diff --git a/prover/src/tests/ecdas_tests.rs b/prover/src/tests/ecdas_tests.rs
index 38a413ab0..196f1fcba 100644
--- a/prover/src/tests/ecdas_tests.rs
+++ b/prover/src/tests/ecdas_tests.rs
@@ -56,11 +56,17 @@ fn row_view(
 #[test]
 fn r_bytes_is_three_p() {
     // 3·p as 33 little-endian bytes, cross-checked against the ecsm field modulus.
-    let p = ecsm::p();
-    let three_p = &p * 3u32;
-    let mut bytes = three_p.to_bytes_le();
-    bytes.resize(33, 0);
-    assert_eq!(&bytes[..], &R_BYTES[..]);
+    // R_BYTES encodes 3p as 33 LE bytes; compute 3*P_BYTES using u128 carry arithmetic.
+    let p = ecsm::P_BYTES;
+    let mut three_p = [0u8; 33];
+    let mut carry: u16 = 0;
+    for i in 0..32 {
+        let s = p[i] as u16 * 3 + carry;
+        three_p[i] = s as u8;
+        carry = s >> 8;
+    }
+    three_p[32] = carry as u8;
+    assert_eq!(&three_p[..], &R_BYTES[..]);
 }
 
 /// Every ECDAS constraint evaluates to zero on a generated trace across many scalars