Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
3714569
add ram bus
hero78119 Oct 8, 2025
0da1c22
wip add rambus impl
hero78119 Oct 8, 2025
33b47ec
add shardcontext
hero78119 Oct 10, 2025
d0d2471
separate init/final/ramchip
hero78119 Oct 13, 2025
a0c7b91
local ram circuit
hero78119 Oct 13, 2025
4963848
add shard info to public io
hero78119 Oct 13, 2025
c542201
wip config as trait
hero78119 Oct 14, 2025
b84f74e
separate circuit into init/final
hero78119 Oct 14, 2025
03092e9
complete local finalized mem chip logic
hero78119 Oct 14, 2025
d32c71f
aligned step cycle and prev_cycle to local version
hero78119 Oct 15, 2025
f347310
with mem bus chip build pass
hero78119 Oct 15, 2025
4d5a421
cleanup
hero78119 Oct 15, 2025
3c21158
add table circuit cpu sumcheck
hero78119 Oct 15, 2025
ea6f8ed
one shard prover pass
hero78119 Oct 16, 2025
82403f2
fix most of local final table issue in e2e
hero78119 Oct 17, 2025
aeea15d
chores: cosmetics
hero78119 Oct 17, 2025
40887fc
gkr iop support table circuit
hero78119 Oct 19, 2025
08783ca
wip convert local final ram circuit to gkr-iop circuit
hero78119 Oct 19, 2025
6ac69fc
chores: rename config
hero78119 Oct 19, 2025
48d5f93
fix few bugs in e2e
hero78119 Oct 19, 2025
306e0df
debug log
hero78119 Oct 20, 2025
8e9c2d2
chores: mock_proving non-static ram type
hero78119 Oct 20, 2025
dc15ff4
chores: mock_proving non-static ram type
hero78119 Oct 20, 2025
c2fee65
e2e test pass
hero78119 Oct 20, 2025
74ca4f1
cosmetics and fix lint
hero78119 Oct 20, 2025
c177609
merged with upstream
hero78119 Oct 20, 2025
e819dc7
chores: fix test and ci
hero78119 Oct 20, 2025
2632e5b
log cleanup
hero78119 Oct 20, 2025
ee51964
fix goldilocks circuit
hero78119 Oct 20, 2025
0ffa915
refactor and clippy
hero78119 Oct 20, 2025
1739d4a
chores fix missing padding of DynVolatileRamTableConfig
hero78119 Oct 20, 2025
0163be6
optimise tracer performance
hero78119 Oct 21, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/lints.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,5 @@ jobs:
run: taplo --version || cargo install taplo-cli
- name: Run taplo
run: taplo fmt --check --diff
- name: Ensure Cargo.lock not modified by build
run: git diff --exit-code Cargo.lock
23 changes: 15 additions & 8 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,17 @@ rand_chacha = { version = "0.3", features = ["serde1"] }
rand_core = "0.6"
rayon = "1.10"
rkyv = { version = "0.8", features = ["pointer_width_32"] }
rustc-hash = "2.0.0"
secp = "0.4.1"
serde = { version = "1.0", features = ["derive", "rc"] }
serde_json = "1.0"
smallvec = { version = "1.13.2", features = [
"const_generics",
"const_new",
"serde",
"union",
"write",
] }
strum = "0.26"
strum_macros = "0.26"
substrate-bn = { version = "0.6.0" }
Expand Down
10 changes: 10 additions & 0 deletions ceno_cli/src/commands/common_args/ceno.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ pub struct CenoOptions {
#[arg(long)]
pub out_vk: Option<PathBuf>,

/// shard id
#[arg(long, default_value = "0")]
shard_id: u32,

/// number of total shards.
#[arg(long, default_value = "1")]
max_num_shards: u32,

/// Profiling granularity.
/// Setting any value restricts logs to profiling information
#[arg(long)]
Expand Down Expand Up @@ -337,6 +345,7 @@ fn run_elf_inner<
std::fs::read(elf_path).context(format!("failed to read {}", elf_path.display()))?;
let program = Program::load_elf(&elf_bytes, u32::MAX).context("failed to load elf")?;
print_cargo_message("Loaded", format_args!("{}", elf_path.display()));
let shards = Shards::new(options.shard_id as usize, options.max_num_shards as usize);

let public_io = options
.read_public_io()
Expand Down Expand Up @@ -385,6 +394,7 @@ fn run_elf_inner<
create_prover(backend.clone()),
program,
platform,
shards,
&hints,
&public_io,
options.max_steps,
Expand Down
3 changes: 3 additions & 0 deletions ceno_emul/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@ itertools.workspace = true
multilinear_extensions.workspace = true
num-derive.workspace = true
num-traits.workspace = true
rayon.workspace = true
rrs_lib = { package = "rrs-succinct", version = "0.1.0" }
rustc-hash.workspace = true
secp.workspace = true
serde.workspace = true
smallvec.workspace = true
strum.workspace = true
strum_macros.workspace = true
substrate-bn.workspace = true
Expand Down
89 changes: 89 additions & 0 deletions ceno_emul/src/chunked_vec.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use std::ops::{Index, IndexMut};

/// a chunked vector that grows in fixed-size chunks.
#[derive(Default, Debug, Clone)]
pub struct ChunkedVec<T> {
chunks: Vec<Vec<T>>,
chunk_size: usize,
len: usize,
}

impl<T: Default + Send> ChunkedVec<T> {
/// create a new ChunkedVec with a given chunk size.
pub fn new(chunk_size: usize) -> Self {
assert!(chunk_size > 0, "chunk_size must be > 0");
Self {
chunks: Vec::new(),
chunk_size,
len: 0,
}
}

/// get the current number of elements.
pub fn len(&self) -> usize {
self.len
}

/// returns true if the vector is empty.
pub fn is_empty(&self) -> bool {
self.len == 0
}

/// access element by index (immutable).
pub fn get(&self, index: usize) -> Option<&T> {
if index >= self.len {
return None;
}
let chunk_idx = index / self.chunk_size;
let within_idx = index % self.chunk_size;
self.chunks.get(chunk_idx)?.get(within_idx)
}

/// access element by index (mutable).
/// get mutable reference to element at index, auto-creating chunks as needed
pub fn get_or_create(&mut self, index: usize) -> &mut T {
let chunk_idx = index / self.chunk_size;
let within_idx = index % self.chunk_size;

// Ensure enough chunks exist
if chunk_idx >= self.chunks.len() {
let to_create = chunk_idx + 1 - self.chunks.len();

// Use rayon to create all missing chunks in parallel
let mut new_chunks: Vec<Vec<T>> = (0..to_create)
.map(|_| {
(0..self.chunk_size)
.into_par_iter()
.map(|_| Default::default())
.collect::<Vec<_>>()
})
.collect();

self.chunks.append(&mut new_chunks);
}

let chunk = &mut self.chunks[chunk_idx];

// Update the overall length
if index >= self.len {
self.len = index + 1;
}

&mut chunk[within_idx]
}
}

impl<T: Default + Send> Index<usize> for ChunkedVec<T> {
type Output = T;

fn index(&self, index: usize) -> &Self::Output {
self.get(index).expect("index out of bounds")
}
}

impl<T: Default + Send> IndexMut<usize> for ChunkedVec<T> {
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
self.get_or_create(index)
}
}
4 changes: 3 additions & 1 deletion ceno_emul/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ mod platform;
pub use platform::{CENO_PLATFORM, Platform};

mod tracer;
pub use tracer::{Change, MemOp, ReadOp, StepRecord, Tracer, WriteOp};
pub use tracer::{Change, MemOp, NextAccessPair, ReadOp, StepRecord, Tracer, WriteOp};

mod vm_state;
pub use vm_state::VMState;
Expand Down Expand Up @@ -44,4 +44,6 @@ pub mod utils;

pub mod test_utils;

mod chunked_vec;
pub use chunked_vec::ChunkedVec as NextCycleAccess;
pub mod host_utils;
32 changes: 22 additions & 10 deletions ceno_emul/src/tracer.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
use std::{
collections::{BTreeMap, HashMap},
fmt, mem,
};
use rustc_hash::FxHashMap;
use smallvec::SmallVec;
use std::{collections::BTreeMap, fmt, mem};

use ceno_rt::WORD_SIZE;

use crate::{
CENO_PLATFORM, InsnKind, Instruction, PC_STEP_SIZE, Platform,
addr::{ByteAddr, Cycle, RegIdx, Word, WordAddr},
chunked_vec::ChunkedVec,
encode_rv32,
syscalls::{SyscallEffects, SyscallWitness},
};
Expand Down Expand Up @@ -39,6 +39,8 @@ pub struct StepRecord {
syscall: Option<SyscallWitness>,
}

pub type NextAccessPair = SmallVec<[(WordAddr, Cycle); 1]>;

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct MemOp<T> {
/// Virtual Memory Address.
Expand Down Expand Up @@ -305,7 +307,8 @@ pub struct Tracer {
// record each section max access address
// (start_addr -> (start_addr, end_addr, min_access_addr, max_access_addr))
mmio_min_max_access: Option<BTreeMap<WordAddr, (WordAddr, WordAddr, WordAddr, WordAddr)>>,
latest_accesses: HashMap<WordAddr, Cycle>,
latest_accesses: FxHashMap<WordAddr, Cycle>,
next_accesses: ChunkedVec<NextAccessPair>,
}

impl Default for Tracer {
Expand Down Expand Up @@ -362,7 +365,8 @@ impl Tracer {
cycle: Self::SUBCYCLES_PER_INSN,
..StepRecord::default()
},
latest_accesses: HashMap::new(),
latest_accesses: FxHashMap::default(),
next_accesses: ChunkedVec::new(1 << 20),
}
}

Expand Down Expand Up @@ -471,16 +475,24 @@ impl Tracer {
/// - Record the current instruction as the origin of the latest access.
/// - Accesses within the same instruction are distinguished by `subcycle ∈ [0, 3]`.
pub fn track_access(&mut self, addr: WordAddr, subcycle: Cycle) -> Cycle {
self.latest_accesses
.insert(addr, self.record.cycle + subcycle)
.unwrap_or(0)
let cur_cycle = self.record.cycle + subcycle;
let prev_cycle = self.latest_accesses.insert(addr, cur_cycle).unwrap_or(0);
self.next_accesses
.get_or_create(prev_cycle as usize)
.push((addr, cur_cycle));
prev_cycle
}

/// Return all the addresses that were accessed and the cycle when they were last accessed.
pub fn final_accesses(&self) -> &HashMap<WordAddr, Cycle> {
pub fn final_accesses(&self) -> &FxHashMap<WordAddr, Cycle> {
&self.latest_accesses
}

/// Return all the addresses that were accessed and the cycle when they were last accessed.
pub fn next_accesses(self) -> ChunkedVec<NextAccessPair> {
self.next_accesses
}

/// Return the cycle of the pending instruction (after the last completed step).
pub fn cycle(&self) -> Cycle {
self.record.cycle
Expand Down
4 changes: 4 additions & 0 deletions ceno_emul/src/vm_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ impl VMState {
&self.tracer
}

pub fn take_tracer(self) -> Tracer {
self.tracer
}

pub fn platform(&self) -> &Platform {
&self.platform
}
Expand Down
10 changes: 4 additions & 6 deletions ceno_emul/tests/test_vm_trace.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
#![allow(clippy::unusual_byte_groupings)]
use anyhow::Result;
use std::{
collections::{BTreeMap, HashMap},
sync::Arc,
};
use rustc_hash::FxHashMap;
use std::{collections::BTreeMap, sync::Arc};

use ceno_emul::{
CENO_PLATFORM, Cycle, EmuContext, InsnKind, Instruction, Platform, Program, StepRecord, Tracer,
Expand Down Expand Up @@ -111,8 +109,8 @@ fn expected_ops_fibonacci_20() -> Vec<InsnKind> {
}

/// Reconstruct the last access of each register.
fn expected_final_accesses_fibonacci_20() -> HashMap<WordAddr, Cycle> {
let mut accesses = HashMap::new();
fn expected_final_accesses_fibonacci_20() -> FxHashMap<WordAddr, Cycle> {
let mut accesses = FxHashMap::default();
let x = |i| WordAddr::from(Platform::register_vma(i));
const C: Cycle = Tracer::SUBCYCLES_PER_INSN;

Expand Down
1 change: 1 addition & 0 deletions ceno_zkvm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ witness.workspace = true
itertools.workspace = true
ndarray.workspace = true
prettytable-rs.workspace = true
rustc-hash.workspace = true
strum.workspace = true
strum_macros.workspace = true
tracing.workspace = true
Expand Down
Loading