From e8b2d89cb9eb4d9eb21681741715f154afcfc6de Mon Sep 17 00:00:00 2001 From: Gary Rong Date: Sat, 6 Sep 2025 20:21:19 +0800 Subject: [PATCH 1/3] core/rawdb, triedb/pathdb: introduce trienode history --- core/rawdb/accessors_history.go | 95 +++- core/rawdb/accessors_state.go | 73 +++ core/rawdb/ancient_scheme.go | 50 +- core/rawdb/schema.go | 36 ++ triedb/pathdb/database.go | 4 +- triedb/pathdb/history.go | 57 +- triedb/pathdb/history_index.go | 12 + triedb/pathdb/history_indexer.go | 76 ++- triedb/pathdb/history_state.go | 6 +- triedb/pathdb/history_state_test.go | 20 +- triedb/pathdb/history_trienode.go | 680 ++++++++++++++++++++++++ triedb/pathdb/history_trienode_test.go | 688 +++++++++++++++++++++++++ triedb/pathdb/metrics.go | 21 +- 13 files changed, 1754 insertions(+), 64 deletions(-) create mode 100644 triedb/pathdb/history_trienode.go create mode 100644 triedb/pathdb/history_trienode_test.go diff --git a/core/rawdb/accessors_history.go b/core/rawdb/accessors_history.go index cf1073f387a8..95a8907edc01 100644 --- a/core/rawdb/accessors_history.go +++ b/core/rawdb/accessors_history.go @@ -46,6 +46,27 @@ func DeleteStateHistoryIndexMetadata(db ethdb.KeyValueWriter) { } } +// ReadTrienodeHistoryIndexMetadata retrieves the metadata of trienode history index. +func ReadTrienodeHistoryIndexMetadata(db ethdb.KeyValueReader) []byte { + data, _ := db.Get(headTrienodeHistoryIndexKey) + return data +} + +// WriteTrienodeHistoryIndexMetadata stores the metadata of trienode history index +// into database. +func WriteTrienodeHistoryIndexMetadata(db ethdb.KeyValueWriter, blob []byte) { + if err := db.Put(headTrienodeHistoryIndexKey, blob); err != nil { + log.Crit("Failed to store the metadata of trienode history index", "err", err) + } +} + +// DeleteTrienodeHistoryIndexMetadata removes the metadata of trienode history index. +func DeleteTrienodeHistoryIndexMetadata(db ethdb.KeyValueWriter) { + if err := db.Delete(headTrienodeHistoryIndexKey); err != nil { + log.Crit("Failed to delete the metadata of trienode history index", "err", err) + } +} + // ReadAccountHistoryIndex retrieves the account history index with the provided // account address. func ReadAccountHistoryIndex(db ethdb.KeyValueReader, addressHash common.Hash) []byte { @@ -95,6 +116,30 @@ func DeleteStorageHistoryIndex(db ethdb.KeyValueWriter, addressHash common.Hash, } } +// ReadTrienodeHistoryIndex retrieves the trienode history index with the provided +// account address and storage key hash. +func ReadTrienodeHistoryIndex(db ethdb.KeyValueReader, addressHash common.Hash, path []byte) []byte { + data, err := db.Get(trienodeHistoryIndexKey(addressHash, path)) + if err != nil || len(data) == 0 { + return nil + } + return data +} + +// WriteTrienodeHistoryIndex writes the provided trienode history index into database. +func WriteTrienodeHistoryIndex(db ethdb.KeyValueWriter, addressHash common.Hash, path []byte, data []byte) { + if err := db.Put(trienodeHistoryIndexKey(addressHash, path), data); err != nil { + log.Crit("Failed to store trienode history index", "err", err) + } +} + +// DeleteTrienodeHistoryIndex deletes the specified trienode index from the database. +func DeleteTrienodeHistoryIndex(db ethdb.KeyValueWriter, addressHash common.Hash, path []byte) { + if err := db.Delete(trienodeHistoryIndexKey(addressHash, path)); err != nil { + log.Crit("Failed to delete trienode history index", "err", err) + } +} + // ReadAccountHistoryIndexBlock retrieves the index block with the provided // account address along with the block id. func ReadAccountHistoryIndexBlock(db ethdb.KeyValueReader, addressHash common.Hash, blockID uint32) []byte { @@ -143,6 +188,30 @@ func DeleteStorageHistoryIndexBlock(db ethdb.KeyValueWriter, addressHash common. } } +// ReadTrienodeHistoryIndexBlock retrieves the index block with the provided state +// identifier along with the block id. +func ReadTrienodeHistoryIndexBlock(db ethdb.KeyValueReader, addressHash common.Hash, path []byte, blockID uint32) []byte { + data, err := db.Get(trienodeHistoryIndexBlockKey(addressHash, path, blockID)) + if err != nil || len(data) == 0 { + return nil + } + return data +} + +// WriteTrienodeHistoryIndexBlock writes the provided index block into database. +func WriteTrienodeHistoryIndexBlock(db ethdb.KeyValueWriter, addressHash common.Hash, path []byte, id uint32, data []byte) { + if err := db.Put(trienodeHistoryIndexBlockKey(addressHash, path, id), data); err != nil { + log.Crit("Failed to store trienode index block", "err", err) + } +} + +// DeleteTrienodeHistoryIndexBlock deletes the specified index block from the database. +func DeleteTrienodeHistoryIndexBlock(db ethdb.KeyValueWriter, addressHash common.Hash, path []byte, id uint32) { + if err := db.Delete(trienodeHistoryIndexBlockKey(addressHash, path, id)); err != nil { + log.Crit("Failed to delete trienode index block", "err", err) + } +} + // increaseKey increase the input key by one bit. Return nil if the entire // addition operation overflows. func increaseKey(key []byte) []byte { @@ -155,14 +224,26 @@ func increaseKey(key []byte) []byte { return nil } -// DeleteStateHistoryIndex completely removes all history indexing data, including +// DeleteStateHistoryIndexes completely removes all history indexing data, including // indexes for accounts and storages. -// -// Note, this method assumes the storage space with prefix `StateHistoryIndexPrefix` -// is exclusively occupied by the history indexing data! -func DeleteStateHistoryIndex(db ethdb.KeyValueRangeDeleter) { - start := StateHistoryIndexPrefix - limit := increaseKey(bytes.Clone(StateHistoryIndexPrefix)) +func DeleteStateHistoryIndexes(db ethdb.KeyValueRangeDeleter) { + DeleteHistoryByRange(db, StateHistoryAccountMetadataPrefix) + DeleteHistoryByRange(db, StateHistoryStorageMetadataPrefix) + DeleteHistoryByRange(db, StateHistoryAccountBlockPrefix) + DeleteHistoryByRange(db, StateHistoryStorageBlockPrefix) +} + +// DeleteTrienodeHistoryIndexes completely removes all trienode history indexing data. +func DeleteTrienodeHistoryIndexes(db ethdb.KeyValueRangeDeleter) { + DeleteHistoryByRange(db, TrienodeHistoryMetadataPrefix) + DeleteHistoryByRange(db, TrienodeHistoryBlockPrefix) +} + +// DeleteHistoryByRange completely removes all database entries with the specific prefix. +// Note, this method assumes the space with the given prefix is exclusively occupied! +func DeleteHistoryByRange(db ethdb.KeyValueRangeDeleter, prefix []byte) { + start := prefix + limit := increaseKey(bytes.Clone(prefix)) // Try to remove the data in the range by a loop, as the leveldb // doesn't support the native range deletion. diff --git a/core/rawdb/accessors_state.go b/core/rawdb/accessors_state.go index 46aa5fd070ce..298ad04f40b3 100644 --- a/core/rawdb/accessors_state.go +++ b/core/rawdb/accessors_state.go @@ -299,3 +299,76 @@ func WriteStateHistory(db ethdb.AncientWriter, id uint64, meta []byte, accountIn }) return err } + +// ReadTrienodeHistory retrieves the trienode history corresponding to the specified id. +// Compute the position of trienode history in freezer by minus one since the id of first +// trienode history starts from one(zero for initial state). +func ReadTrienodeHistory(db ethdb.AncientReaderOp, id uint64) ([]byte, []byte, []byte, error) { + header, err := db.Ancient(trienodeHistoryHeaderTable, id-1) + if err != nil { + return nil, nil, nil, err + } + keySection, err := db.Ancient(trienodeHistoryKeySectionTable, id-1) + if err != nil { + return nil, nil, nil, err + } + valueSection, err := db.Ancient(trienodeHistoryValueSectionTable, id-1) + if err != nil { + return nil, nil, nil, err + } + return header, keySection, valueSection, nil +} + +// ReadTrienodeHistoryHeader retrieves the header section of trienode history. +func ReadTrienodeHistoryHeader(db ethdb.AncientReaderOp, id uint64) ([]byte, error) { + return db.Ancient(trienodeHistoryHeaderTable, id-1) +} + +// ReadTrienodeHistoryKeySection retrieves the key section of trienode history. +func ReadTrienodeHistoryKeySection(db ethdb.AncientReaderOp, id uint64) ([]byte, error) { + return db.Ancient(trienodeHistoryKeySectionTable, id-1) +} + +// ReadTrienodeHistoryValueSection retrieves the value section of trienode history. +func ReadTrienodeHistoryValueSection(db ethdb.AncientReaderOp, id uint64) ([]byte, error) { + return db.Ancient(trienodeHistoryValueSectionTable, id-1) +} + +// ReadTrienodeHistoryList retrieves the a list of trienode history corresponding +// to the specified range. +// Compute the position of trienode history in freezer by minus one since the id +// of first trienode history starts from one(zero for initial state). +func ReadTrienodeHistoryList(db ethdb.AncientReaderOp, start uint64, count uint64) ([][]byte, [][]byte, [][]byte, error) { + header, err := db.AncientRange(trienodeHistoryHeaderTable, start-1, count, 0) + if err != nil { + return nil, nil, nil, err + } + keySection, err := db.AncientRange(trienodeHistoryKeySectionTable, start-1, count, 0) + if err != nil { + return nil, nil, nil, err + } + valueSection, err := db.AncientRange(trienodeHistoryValueSectionTable, start-1, count, 0) + if err != nil { + return nil, nil, nil, err + } + if len(header) != len(keySection) || len(header) != len(valueSection) { + return nil, nil, nil, errors.New("trienode history is corrupted") + } + return header, keySection, valueSection, nil +} + +// WriteTrienodeHistory writes the provided trienode history to database. +// Compute the position of trienode history in freezer by minus one since +// the id of first state history starts from one(zero for initial state). +func WriteTrienodeHistory(db ethdb.AncientWriter, id uint64, header []byte, keySection []byte, valueSection []byte) error { + _, err := db.ModifyAncients(func(op ethdb.AncientWriteOp) error { + if err := op.AppendRaw(trienodeHistoryHeaderTable, id-1, header); err != nil { + return err + } + if err := op.AppendRaw(trienodeHistoryKeySectionTable, id-1, keySection); err != nil { + return err + } + return op.AppendRaw(trienodeHistoryValueSectionTable, id-1, valueSection) + }) + return err +} diff --git a/core/rawdb/ancient_scheme.go b/core/rawdb/ancient_scheme.go index 1ffebed3e7cb..afec7848c882 100644 --- a/core/rawdb/ancient_scheme.go +++ b/core/rawdb/ancient_scheme.go @@ -75,15 +75,38 @@ var stateFreezerTableConfigs = map[string]freezerTableConfig{ stateHistoryStorageData: {noSnappy: false, prunable: true}, } +const ( + trienodeHistoryHeaderTable = "trienode.header" + trienodeHistoryKeySectionTable = "trienode.key" + trienodeHistoryValueSectionTable = "trienode.value" +) + +// trienodeFreezerTableConfigs configures the settings for tables in the trienode freezer. +var trienodeFreezerTableConfigs = map[string]freezerTableConfig{ + trienodeHistoryHeaderTable: {noSnappy: false, prunable: true}, + + // Disable snappy compression to allow efficient partial read. + trienodeHistoryKeySectionTable: {noSnappy: true, prunable: true}, + + // Disable snappy compression to allow efficient partial read. + trienodeHistoryValueSectionTable: {noSnappy: true, prunable: true}, +} + // The list of identifiers of ancient stores. var ( - ChainFreezerName = "chain" // the folder name of chain segment ancient store. - MerkleStateFreezerName = "state" // the folder name of state history ancient store. - VerkleStateFreezerName = "state_verkle" // the folder name of state history ancient store. + ChainFreezerName = "chain" // the folder name of chain segment ancient store. + MerkleStateFreezerName = "state" // the folder name of state history ancient store. + VerkleStateFreezerName = "state_verkle" // the folder name of state history ancient store. + MerkleTrienodeFreezerName = "trienode" // the folder name of trienode history ancient store. + VerkleTrienodeFreezerName = "trienode_verkle" // the folder name of trienode history ancient store. ) // freezers the collections of all builtin freezers. -var freezers = []string{ChainFreezerName, MerkleStateFreezerName, VerkleStateFreezerName} +var freezers = []string{ + ChainFreezerName, + MerkleStateFreezerName, VerkleStateFreezerName, + MerkleTrienodeFreezerName, VerkleTrienodeFreezerName, +} // NewStateFreezer initializes the ancient store for state history. // @@ -103,3 +126,22 @@ func NewStateFreezer(ancientDir string, verkle bool, readOnly bool) (ethdb.Reset } return newResettableFreezer(name, "eth/db/state", readOnly, stateHistoryTableSize, stateFreezerTableConfigs) } + +// NewTrienodeFreezer initializes the ancient store for trienode history. +// +// - if the empty directory is given, initializes the pure in-memory +// trienode freezer (e.g. dev mode). +// - if non-empty directory is given, initializes the regular file-based +// trienode freezer. +func NewTrienodeFreezer(ancientDir string, verkle bool, readOnly bool) (ethdb.ResettableAncientStore, error) { + if ancientDir == "" { + return NewMemoryFreezer(readOnly, trienodeFreezerTableConfigs), nil + } + var name string + if verkle { + name = filepath.Join(ancientDir, VerkleTrienodeFreezerName) + } else { + name = filepath.Join(ancientDir, MerkleTrienodeFreezerName) + } + return newResettableFreezer(name, "eth/db/trienode", readOnly, stateHistoryTableSize, trienodeFreezerTableConfigs) +} diff --git a/core/rawdb/schema.go b/core/rawdb/schema.go index 9a17e1c17384..ed7922e5639b 100644 --- a/core/rawdb/schema.go +++ b/core/rawdb/schema.go @@ -80,6 +80,10 @@ var ( // been indexed. headStateHistoryIndexKey = []byte("LastStateHistoryIndex") + // headTrienodeHistoryIndexKey tracks the ID of the latest state history that has + // been indexed. + headTrienodeHistoryIndexKey = []byte("LastTrienodeHistoryIndex") + // txIndexTailKey tracks the oldest block whose transactions have been indexed. txIndexTailKey = []byte("TransactionIndexTail") @@ -125,8 +129,10 @@ var ( StateHistoryIndexPrefix = []byte("m") // The global prefix of state history index data StateHistoryAccountMetadataPrefix = []byte("ma") // StateHistoryAccountMetadataPrefix + account address hash => account metadata StateHistoryStorageMetadataPrefix = []byte("ms") // StateHistoryStorageMetadataPrefix + account address hash + storage slot hash => slot metadata + TrienodeHistoryMetadataPrefix = []byte("mt") // TrienodeHistoryMetadataPrefix + account address hash + trienode path => trienode metadata StateHistoryAccountBlockPrefix = []byte("mba") // StateHistoryAccountBlockPrefix + account address hash + blockID => account block StateHistoryStorageBlockPrefix = []byte("mbs") // StateHistoryStorageBlockPrefix + account address hash + storage slot hash + blockID => slot block + TrienodeHistoryBlockPrefix = []byte("mbt") // TrienodeHistoryBlockPrefix + account address hash + trienode path + blockID => trienode block // VerklePrefix is the database prefix for Verkle trie data, which includes: // (a) Trie nodes @@ -395,6 +401,19 @@ func storageHistoryIndexKey(addressHash common.Hash, storageHash common.Hash) [] return out } +// trienodeHistoryIndexKey = TrienodeHistoryMetadataPrefix + addressHash + trienode path +func trienodeHistoryIndexKey(addressHash common.Hash, path []byte) []byte { + totalLen := len(TrienodeHistoryMetadataPrefix) + common.HashLength + len(path) + out := make([]byte, totalLen) + + off := 0 + off += copy(out[off:], TrienodeHistoryMetadataPrefix) + off += copy(out[off:], addressHash.Bytes()) + copy(out[off:], path) + + return out +} + // accountHistoryIndexBlockKey = StateHistoryAccountBlockPrefix + addressHash + blockID func accountHistoryIndexBlockKey(addressHash common.Hash, blockID uint32) []byte { var buf4 [4]byte @@ -428,6 +447,23 @@ func storageHistoryIndexBlockKey(addressHash common.Hash, storageHash common.Has return out } +// trienodeHistoryIndexBlockKey = TrienodeHistoryBlockPrefix + addressHash + trienode path + blockID +func trienodeHistoryIndexBlockKey(addressHash common.Hash, path []byte, blockID uint32) []byte { + var buf4 [4]byte + binary.BigEndian.PutUint32(buf4[:], blockID) + + totalLen := len(TrienodeHistoryBlockPrefix) + common.HashLength + len(path) + 4 + out := make([]byte, totalLen) + + off := 0 + off += copy(out[off:], TrienodeHistoryBlockPrefix) + off += copy(out[off:], addressHash.Bytes()) + off += copy(out[off:], path) + copy(out[off:], buf4[:]) + + return out +} + // transitionStateKey = transitionStatusKey + hash func transitionStateKey(hash common.Hash) []byte { return append(VerkleTransitionStatePrefix, hash.Bytes()...) diff --git a/triedb/pathdb/database.go b/triedb/pathdb/database.go index 546d2e0301f9..9fc65de27724 100644 --- a/triedb/pathdb/database.go +++ b/triedb/pathdb/database.go @@ -232,7 +232,7 @@ func (db *Database) repairHistory() error { // Purge all state history indexing data first batch := db.diskdb.NewBatch() rawdb.DeleteStateHistoryIndexMetadata(batch) - rawdb.DeleteStateHistoryIndex(batch) + rawdb.DeleteStateHistoryIndexes(batch) if err := batch.Write(); err != nil { log.Crit("Failed to purge state history index", "err", err) } @@ -426,7 +426,7 @@ func (db *Database) Enable(root common.Hash) error { // Purge all state history indexing data first batch.Reset() rawdb.DeleteStateHistoryIndexMetadata(batch) - rawdb.DeleteStateHistoryIndex(batch) + rawdb.DeleteStateHistoryIndexes(batch) if err := batch.Write(); err != nil { return err } diff --git a/triedb/pathdb/history.go b/triedb/pathdb/history.go index 81b843d9f12e..d78999f21835 100644 --- a/triedb/pathdb/history.go +++ b/triedb/pathdb/history.go @@ -32,6 +32,9 @@ type historyType uint8 const ( // typeStateHistory indicates history data related to account or storage changes. typeStateHistory historyType = 0 + + // typeTrienodeHistory indicates history data related to trie node changes. + typeTrienodeHistory historyType = 1 ) // String returns the string format representation. @@ -39,6 +42,8 @@ func (h historyType) String() string { switch h { case typeStateHistory: return "state" + case typeTrienodeHistory: + return "trienode" default: return fmt.Sprintf("unknown type: %d", h) } @@ -48,8 +53,9 @@ func (h historyType) String() string { type elementType uint8 const ( - typeAccount elementType = 0 // represents the account data - typeStorage elementType = 1 // represents the storage slot data + typeAccount elementType = 0 // represents the account data + typeStorage elementType = 1 // represents the storage slot data + typeTrienode elementType = 2 // represents the trie node data ) // String returns the string format representation. @@ -59,6 +65,8 @@ func (e elementType) String() string { return "account" case typeStorage: return "storage" + case typeTrienode: + return "trienode" default: return fmt.Sprintf("unknown element type: %d", e) } @@ -69,11 +77,14 @@ func toHistoryType(typ elementType) historyType { if typ == typeAccount || typ == typeStorage { return typeStateHistory } + if typ == typeTrienode { + return typeTrienodeHistory + } panic(fmt.Sprintf("unknown element type %v", typ)) } // stateIdent represents the identifier of a state element, which can be -// an account or a storage slot. +// an account, a storage slot or a trienode. type stateIdent struct { typ elementType @@ -91,6 +102,12 @@ type stateIdent struct { // // This field is null if the identifier refers to an account or a trie node. storageHash common.Hash + + // The trie node path within the trie. + // + // This field is null if the identifier refers to an account or a storage slot. + // String type is chosen to make stateIdent comparable. + path string } // String returns the string format state identifier. @@ -98,7 +115,10 @@ func (ident stateIdent) String() string { if ident.typ == typeAccount { return ident.addressHash.Hex() } - return ident.addressHash.Hex() + ident.storageHash.Hex() + if ident.typ == typeStorage { + return ident.addressHash.Hex() + ident.storageHash.Hex() + } + return ident.addressHash.Hex() + ident.path } // newAccountIdent constructs a state identifier for an account. @@ -120,8 +140,18 @@ func newStorageIdent(addressHash common.Hash, storageHash common.Hash) stateIden } } -// stateIdentQuery is the extension of stateIdent by adding the account address -// and raw storage key. +// newTrienodeIdent constructs a state identifier for a trie node. +// The address denotes the address hash of the associated account; +// the path denotes the path of the node within the trie; +func newTrienodeIdent(addressHash common.Hash, path string) stateIdent { + return stateIdent{ + typ: typeTrienode, + addressHash: addressHash, + path: path, + } +} + +// stateIdentQuery is the extension of stateIdent by adding the raw storage key. type stateIdentQuery struct { stateIdent @@ -150,8 +180,19 @@ func newStorageIdentQuery(address common.Address, addressHash common.Hash, stora } } -// history defines the interface of historical data, implemented by stateHistory -// and trienodeHistory (in the near future). +// newTrienodeIdentQuery constructs a state identifier for a trie node. +// the addressHash denotes the address hash of the associated account; +// the path denotes the path of the node within the trie; +// +// nolint:unused +func newTrienodeIdentQuery(addrHash common.Hash, path []byte) stateIdentQuery { + return stateIdentQuery{ + stateIdent: newTrienodeIdent(addrHash, string(path)), + } +} + +// history defines the interface of historical data, shared by stateHistory +// and trienodeHistory. type history interface { // typ returns the historical data type held in the history. typ() historyType diff --git a/triedb/pathdb/history_index.go b/triedb/pathdb/history_index.go index 47cee9820dd2..5b4c91d7e61e 100644 --- a/triedb/pathdb/history_index.go +++ b/triedb/pathdb/history_index.go @@ -376,6 +376,8 @@ func readStateIndex(ident stateIdent, db ethdb.KeyValueReader) []byte { return rawdb.ReadAccountHistoryIndex(db, ident.addressHash) case typeStorage: return rawdb.ReadStorageHistoryIndex(db, ident.addressHash, ident.storageHash) + case typeTrienode: + return rawdb.ReadTrienodeHistoryIndex(db, ident.addressHash, []byte(ident.path)) default: panic(fmt.Errorf("unknown type: %v", ident.typ)) } @@ -389,6 +391,8 @@ func writeStateIndex(ident stateIdent, db ethdb.KeyValueWriter, data []byte) { rawdb.WriteAccountHistoryIndex(db, ident.addressHash, data) case typeStorage: rawdb.WriteStorageHistoryIndex(db, ident.addressHash, ident.storageHash, data) + case typeTrienode: + rawdb.WriteTrienodeHistoryIndex(db, ident.addressHash, []byte(ident.path), data) default: panic(fmt.Errorf("unknown type: %v", ident.typ)) } @@ -402,6 +406,8 @@ func deleteStateIndex(ident stateIdent, db ethdb.KeyValueWriter) { rawdb.DeleteAccountHistoryIndex(db, ident.addressHash) case typeStorage: rawdb.DeleteStorageHistoryIndex(db, ident.addressHash, ident.storageHash) + case typeTrienode: + rawdb.DeleteTrienodeHistoryIndex(db, ident.addressHash, []byte(ident.path)) default: panic(fmt.Errorf("unknown type: %v", ident.typ)) } @@ -415,6 +421,8 @@ func readStateIndexBlock(ident stateIdent, db ethdb.KeyValueReader, id uint32) [ return rawdb.ReadAccountHistoryIndexBlock(db, ident.addressHash, id) case typeStorage: return rawdb.ReadStorageHistoryIndexBlock(db, ident.addressHash, ident.storageHash, id) + case typeTrienode: + return rawdb.ReadTrienodeHistoryIndexBlock(db, ident.addressHash, []byte(ident.path), id) default: panic(fmt.Errorf("unknown type: %v", ident.typ)) } @@ -428,6 +436,8 @@ func writeStateIndexBlock(ident stateIdent, db ethdb.KeyValueWriter, id uint32, rawdb.WriteAccountHistoryIndexBlock(db, ident.addressHash, id, data) case typeStorage: rawdb.WriteStorageHistoryIndexBlock(db, ident.addressHash, ident.storageHash, id, data) + case typeTrienode: + rawdb.WriteTrienodeHistoryIndexBlock(db, ident.addressHash, []byte(ident.path), id, data) default: panic(fmt.Errorf("unknown type: %v", ident.typ)) } @@ -441,6 +451,8 @@ func deleteStateIndexBlock(ident stateIdent, db ethdb.KeyValueWriter, id uint32) rawdb.DeleteAccountHistoryIndexBlock(db, ident.addressHash, id) case typeStorage: rawdb.DeleteStorageHistoryIndexBlock(db, ident.addressHash, ident.storageHash, id) + case typeTrienode: + rawdb.DeleteTrienodeHistoryIndexBlock(db, ident.addressHash, []byte(ident.path), id) default: panic(fmt.Errorf("unknown type: %v", ident.typ)) } diff --git a/triedb/pathdb/history_indexer.go b/triedb/pathdb/history_indexer.go index d6185859291f..368ff78d415e 100644 --- a/triedb/pathdb/history_indexer.go +++ b/triedb/pathdb/history_indexer.go @@ -36,8 +36,10 @@ const ( // The batch size for reading state histories historyReadBatch = 1000 - stateIndexV0 = uint8(0) // initial version of state index structure - stateIndexVersion = stateIndexV0 // the current state index version + stateHistoryIndexV0 = uint8(0) // initial version of state index structure + stateHistoryIndexVersion = stateHistoryIndexV0 // the current state index version + trienodeHistoryIndexV0 = uint8(0) // initial version of trienode index structure + trienodeHistoryIndexVersion = trienodeHistoryIndexV0 // the current trienode index version ) // indexVersion returns the latest index version for the given history type. @@ -45,7 +47,9 @@ const ( func indexVersion(typ historyType) uint8 { switch typ { case typeStateHistory: - return stateIndexVersion + return stateHistoryIndexVersion + case typeTrienodeHistory: + return trienodeHistoryIndexVersion default: panic(fmt.Errorf("unknown history type: %d", typ)) } @@ -63,6 +67,8 @@ func loadIndexMetadata(db ethdb.KeyValueReader, typ historyType) *indexMetadata switch typ { case typeStateHistory: blob = rawdb.ReadStateHistoryIndexMetadata(db) + case typeTrienodeHistory: + blob = rawdb.ReadTrienodeHistoryIndexMetadata(db) default: panic(fmt.Errorf("unknown history type %d", typ)) } @@ -90,6 +96,8 @@ func storeIndexMetadata(db ethdb.KeyValueWriter, typ historyType, last uint64) { switch typ { case typeStateHistory: rawdb.WriteStateHistoryIndexMetadata(db, blob) + case typeTrienodeHistory: + rawdb.WriteTrienodeHistoryIndexMetadata(db, blob) default: panic(fmt.Errorf("unknown history type %d", typ)) } @@ -101,6 +109,8 @@ func deleteIndexMetadata(db ethdb.KeyValueWriter, typ historyType) { switch typ { case typeStateHistory: rawdb.DeleteStateHistoryIndexMetadata(db) + case typeTrienodeHistory: + rawdb.DeleteTrienodeHistoryIndexMetadata(db) default: panic(fmt.Errorf("unknown history type %d", typ)) } @@ -215,7 +225,11 @@ func (b *batchIndexer) finish(force bool) error { func indexSingle(historyID uint64, db ethdb.KeyValueStore, freezer ethdb.AncientReader, typ historyType) error { start := time.Now() defer func() { - indexHistoryTimer.UpdateSince(start) + if typ == typeStateHistory { + stateIndexHistoryTimer.UpdateSince(start) + } else if typ == typeTrienodeHistory { + trienodeIndexHistoryTimer.UpdateSince(start) + } }() metadata := loadIndexMetadata(db, typ) @@ -234,7 +248,7 @@ func indexSingle(historyID uint64, db ethdb.KeyValueStore, freezer ethdb.Ancient if typ == typeStateHistory { h, err = readStateHistory(freezer, historyID) } else { - // h, err = readTrienodeHistory(freezer, historyID) + h, err = readTrienodeHistory(freezer, historyID) } if err != nil { return err @@ -253,7 +267,11 @@ func indexSingle(historyID uint64, db ethdb.KeyValueStore, freezer ethdb.Ancient func unindexSingle(historyID uint64, db ethdb.KeyValueStore, freezer ethdb.AncientReader, typ historyType) error { start := time.Now() defer func() { - unindexHistoryTimer.UpdateSince(start) + if typ == typeStateHistory { + stateUnindexHistoryTimer.UpdateSince(start) + } else if typ == typeTrienodeHistory { + trienodeUnindexHistoryTimer.UpdateSince(start) + } }() metadata := loadIndexMetadata(db, typ) @@ -272,7 +290,7 @@ func unindexSingle(historyID uint64, db ethdb.KeyValueStore, freezer ethdb.Ancie if typ == typeStateHistory { h, err = readStateHistory(freezer, historyID) } else { - // h, err = readTrienodeHistory(freezer, historyID) + h, err = readTrienodeHistory(freezer, historyID) } if err != nil { return err @@ -546,13 +564,13 @@ func (i *indexIniter) index(done chan struct{}, interrupt *atomic.Int32, lastID return } } else { - // histories, err = readTrienodeHistories(i.freezer, current, count) - // if err != nil { - // // The history read might fall if the history is truncated from - // // head due to revert operation. - // i.log.Error("Failed to read history for indexing", "current", current, "count", count, "err", err) - // return - // } + histories, err = readTrienodeHistories(i.freezer, current, count) + if err != nil { + // The history read might fall if the history is truncated from + // head due to revert operation. + i.log.Error("Failed to read history for indexing", "current", current, "count", count, "err", err) + return + } } for _, h := range histories { if err := batch.process(h, current); err != nil { @@ -570,7 +588,7 @@ func (i *indexIniter) index(done chan struct{}, interrupt *atomic.Int32, lastID done = current - beginID ) eta := common.CalculateETA(done, left, time.Since(start)) - i.log.Info("Indexing state history", "processed", done, "left", left, "elapsed", common.PrettyDuration(time.Since(start)), "eta", common.PrettyDuration(eta)) + i.log.Info("Indexing history", "processed", done, "left", left, "elapsed", common.PrettyDuration(time.Since(start)), "eta", common.PrettyDuration(eta)) } } i.indexed.Store(current - 1) // update indexing progress @@ -657,6 +675,8 @@ func checkVersion(disk ethdb.KeyValueStore, typ historyType) { var blob []byte if typ == typeStateHistory { blob = rawdb.ReadStateHistoryIndexMetadata(disk) + } else if typ == typeTrienodeHistory { + blob = rawdb.ReadTrienodeHistoryIndexMetadata(disk) } else { panic(fmt.Errorf("unknown history type: %v", typ)) } @@ -666,24 +686,32 @@ func checkVersion(disk ethdb.KeyValueStore, typ historyType) { return } // Short circuit if the metadata is found and the version is matched + ver := stateHistoryIndexVersion + if typ == typeTrienodeHistory { + ver = trienodeHistoryIndexVersion + } var m indexMetadata err := rlp.DecodeBytes(blob, &m) - if err == nil && m.Version == stateIndexVersion { + if err == nil && m.Version == ver { return } // Version is not matched, prune the existing data and re-index from scratch + batch := disk.NewBatch() + if typ == typeStateHistory { + rawdb.DeleteStateHistoryIndexMetadata(batch) + rawdb.DeleteStateHistoryIndexes(batch) + } else { + rawdb.DeleteTrienodeHistoryIndexMetadata(batch) + rawdb.DeleteTrienodeHistoryIndexes(batch) + } + if err := batch.Write(); err != nil { + log.Crit("Failed to purge history index", "type", typ, "err", err) + } version := "unknown" if err == nil { version = fmt.Sprintf("%d", m.Version) } - - batch := disk.NewBatch() - rawdb.DeleteStateHistoryIndexMetadata(batch) - rawdb.DeleteStateHistoryIndex(batch) - if err := batch.Write(); err != nil { - log.Crit("Failed to purge state history index", "err", err) - } - log.Info("Cleaned up obsolete state history index", "version", version, "want", stateIndexVersion) + log.Info("Cleaned up obsolete history index", "type", typ, "version", version, "want", version) } // newHistoryIndexer constructs the history indexer and launches the background diff --git a/triedb/pathdb/history_state.go b/triedb/pathdb/history_state.go index 9d1e4dfb0999..bc21915dbaa8 100644 --- a/triedb/pathdb/history_state.go +++ b/triedb/pathdb/history_state.go @@ -605,9 +605,9 @@ func writeStateHistory(writer ethdb.AncientWriter, dl *diffLayer) error { if err := rawdb.WriteStateHistory(writer, dl.stateID(), history.meta.encode(), accountIndex, storageIndex, accountData, storageData); err != nil { return err } - historyDataBytesMeter.Mark(int64(dataSize)) - historyIndexBytesMeter.Mark(int64(indexSize)) - historyBuildTimeMeter.UpdateSince(start) + stateHistoryDataBytesMeter.Mark(int64(dataSize)) + stateHistoryIndexBytesMeter.Mark(int64(indexSize)) + stateHistoryBuildTimeMeter.UpdateSince(start) log.Debug("Stored state history", "id", dl.stateID(), "block", dl.block, "data", dataSize, "index", indexSize, "elapsed", common.PrettyDuration(time.Since(start))) return nil diff --git a/triedb/pathdb/history_state_test.go b/triedb/pathdb/history_state_test.go index 5718081566c6..4046fb96400a 100644 --- a/triedb/pathdb/history_state_test.go +++ b/triedb/pathdb/history_state_test.go @@ -98,13 +98,13 @@ func testEncodeDecodeStateHistory(t *testing.T, rawStorageKey bool) { if !compareSet(dec.accounts, obj.accounts) { t.Fatal("account data is mismatched") } - if !compareStorages(dec.storages, obj.storages) { + if !compareMapSet(dec.storages, obj.storages) { t.Fatal("storage data is mismatched") } if !compareList(dec.accountList, obj.accountList) { t.Fatal("account list is mismatched") } - if !compareStorageList(dec.storageList, obj.storageList) { + if !compareMapList(dec.storageList, obj.storageList) { t.Fatal("storage list is mismatched") } } @@ -292,32 +292,32 @@ func compareList[k comparable](a, b []k) bool { return true } -func compareStorages(a, b map[common.Address]map[common.Hash][]byte) bool { +func compareMapSet[K1 comparable, K2 comparable](a, b map[K1]map[K2][]byte) bool { if len(a) != len(b) { return false } - for h, subA := range a { - subB, ok := b[h] + for key, subsetA := range a { + subsetB, ok := b[key] if !ok { return false } - if !compareSet(subA, subB) { + if !compareSet(subsetA, subsetB) { return false } } return true } -func compareStorageList(a, b map[common.Address][]common.Hash) bool { +func compareMapList[K comparable, V comparable](a, b map[K][]V) bool { if len(a) != len(b) { return false } - for h, la := range a { - lb, ok := b[h] + for key, listA := range a { + listB, ok := b[key] if !ok { return false } - if !compareList(la, lb) { + if !compareList(listA, listB) { return false } } diff --git a/triedb/pathdb/history_trienode.go b/triedb/pathdb/history_trienode.go new file mode 100644 index 000000000000..defee8d3cdda --- /dev/null +++ b/triedb/pathdb/history_trienode.go @@ -0,0 +1,680 @@ +// Copyright 2025 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "bytes" + "encoding/binary" + "fmt" + "iter" + "maps" + "math" + "slices" + "sort" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" +) + +// Each trie node history entry consists of three parts (stored in three freezer +// tables according): +// +// # Header +// The header records metadata, including: +// - the history version +// - a lexicographically sorted list of trie IDs +// - the corresponding offsets into the key and value sections for each trie data chunk +// +// # Key section +// The key section stores trie node keys (paths) in a compressed format. +// It also contains relative offsets into the value section for resolving +// the corresponding trie node data. Note that these offsets are relative +// to the data chunk for the trie; the chunk offset must be added to obtain +// the absolute position. +// +// # Value section +// The value section is a concatenated byte stream of all trie node data. +// Each trie node can be retrieved using the offset and length specified +// by its index entry. +// +// The header and key sections are sufficient for locating a trie node, +// while a partial read of the value section is enough to retrieve its data. + +// Header section: +// +// +----------+------------------+---------------------+---------------------+-------+------------------+---------------------+---------------------| +// | ver (1B) | TrieID(32 bytes) | key offset(4 bytes) | val offset(4 bytes) | ... | TrieID(32 bytes) | key offset(4 bytes) | val offset(4 bytes) | +// +----------+------------------+---------------------+---------------------+-------+------------------+---------------------+---------------------| +// +// +// Key section: +// +// + restart point + restart point (depends on restart interval) +// / / +// +---------------+---------------+---------------+---------------+---------+ +// | node entry 1 | node entry 2 | ... | node entry n | trailer | +// +---------------+---------------+---------------+---------------+---------+ +// \ / +// +---- restart block ------+ +// +// node entry: +// +// +---- key len ----+ +// / \ +// +-------+---------+-----------+---------+-----------------------+-----------------+ +// | shared (varint) | not shared (varint) | value length (varlen) | key (varlen) | +// +-----------------+---------------------+-----------------------+-----------------+ +// +// trailer: +// +// +---- 4-bytes ----+ +---- 4-bytes ----+ +// / \ / \ +// +----------------------+------------------------+-----+--------------------------+ +// | restart_1 key offset | restart_1 value offset | ... | restart number (4-bytes) | +// +----------------------+------------------------+-----+--------------------------+ +// +// Note: Both the key offset and the value offset are relative to the start of +// the trie data chunk. To obtain the absolute offset, add the offset of the +// trie data chunk itself. +// +// Value section: +// +// +--------------+--------------+-------+---------------+ +// | node data 1 | node data 2 | ... | node data n | +// +--------------+--------------+-------+---------------+ +// +// NOTE: All fixed-length integer are big-endian. + +const ( + trienodeHistoryV0 = uint8(0) // initial version of node history structure + trienodeHistoryVersion = trienodeHistoryV0 // the default node history version + trienodeVersionSize = 1 // the size of version tag in the history + trienodeTrieHeaderSize = 8 + common.HashLength // the size of a single trie header in history + trienodeDataBlockRestartLen = 16 // The restart interval length of trie node block +) + +// trienodeHistory represents a set of trie node changes resulting from a state +// transition across the main account trie and all associated storage tries. +type trienodeHistory struct { + owners []common.Hash // List of trie identifier sorted lexicographically + nodeList map[common.Hash][]string // Set of node paths sorted lexicographically + nodes map[common.Hash]map[string][]byte // Set of original value of trie nodes before state transition +} + +// newTrienodeHistory constructs a trienode history with the provided trie nodes. +func newTrienodeHistory(nodes map[common.Hash]map[string][]byte) *trienodeHistory { + nodeList := make(map[common.Hash][]string) + for owner, subset := range nodes { + keys := sort.StringSlice(slices.Collect(maps.Keys(subset))) + keys.Sort() + nodeList[owner] = keys + } + return &trienodeHistory{ + owners: slices.SortedFunc(maps.Keys(nodes), common.Hash.Cmp), + nodeList: nodeList, + nodes: nodes, + } +} + +// sharedLen returns the length of the common prefix shared by a and b. +func sharedLen(a, b []byte) int { + n := len(a) + if len(b) < n { + n = len(b) + } + for i := 0; i < n; i++ { + if a[i] != b[i] { + return i + } + } + return n +} + +// typ implements the history interface, returning the historical data type held. +func (h *trienodeHistory) typ() historyType { + return typeTrienodeHistory +} + +// forEach implements the history interface, returning an iterator to traverse the +// state entries in the history. +func (h *trienodeHistory) forEach() iter.Seq[stateIdent] { + return func(yield func(stateIdent) bool) { + for _, owner := range h.owners { + for _, path := range h.nodeList[owner] { + if !yield(newTrienodeIdent(owner, path)) { + return + } + } + } + } +} + +// encode serializes the contained trie nodes into bytes. +func (h *trienodeHistory) encode() ([]byte, []byte, []byte, error) { + var ( + buf = make([]byte, 64) + headerSection bytes.Buffer + keySection bytes.Buffer + valueSection bytes.Buffer + ) + binary.Write(&headerSection, binary.BigEndian, trienodeHistoryVersion) // 1 byte + + for _, owner := range h.owners { + // Fill the header section with offsets at key and value section + headerSection.Write(owner.Bytes()) // 32 bytes + binary.Write(&headerSection, binary.BigEndian, uint32(keySection.Len())) // 4 bytes + + // The offset to the value section is theoretically unnecessary, since the + // individual value offset is already tracked in the key section. However, + // we still keep it here for two reasons: + // - It's cheap to store (only 4 bytes for each trie). + // - It can be useful for decoding the trie data when key is not required (e.g., in hash mode). + binary.Write(&headerSection, binary.BigEndian, uint32(valueSection.Len())) // 4 bytes + + // Fill the key section with node index + var ( + prevKey []byte + restarts []uint32 + prefixLen int + + internalKeyOffset uint32 // key offset for the trie internally + internalValOffset uint32 // value offset for the trie internally + ) + for i, path := range h.nodeList[owner] { + key := []byte(path) + if i%trienodeDataBlockRestartLen == 0 { + restarts = append(restarts, internalKeyOffset) + restarts = append(restarts, internalValOffset) + prefixLen = 0 + } else { + prefixLen = sharedLen(prevKey, key) + } + value := h.nodes[owner][path] + + // key section + n := binary.PutUvarint(buf[0:], uint64(prefixLen)) // key length shared (varint) + n += binary.PutUvarint(buf[n:], uint64(len(key)-prefixLen)) // key length not shared (varint) + n += binary.PutUvarint(buf[n:], uint64(len(value))) // value length (varint) + + if _, err := keySection.Write(buf[:n]); err != nil { + return nil, nil, nil, err + } + // unshared key + if _, err := keySection.Write(key[prefixLen:]); err != nil { + return nil, nil, nil, err + } + n += len(key) - prefixLen + prevKey = key + + // value section + if _, err := valueSection.Write(value); err != nil { + return nil, nil, nil, err + } + internalKeyOffset += uint32(n) + internalValOffset += uint32(len(value)) + } + + // Encode trailer + var trailer []byte + for _, number := range append(restarts, uint32(len(restarts))/2) { + binary.BigEndian.PutUint32(buf[:4], number) + trailer = append(trailer, buf[:4]...) + } + if _, err := keySection.Write(trailer); err != nil { + return nil, nil, nil, err + } + } + return headerSection.Bytes(), keySection.Bytes(), valueSection.Bytes(), nil +} + +// decodeHeader resolves the metadata from the header section. An error +// should be returned if the header section is corrupted. +func decodeHeader(data []byte) ([]common.Hash, []uint32, []uint32, error) { + if len(data) < trienodeVersionSize { + return nil, nil, nil, fmt.Errorf("trienode history is too small, index size: %d", len(data)) + } + version := data[0] + if version != trienodeHistoryVersion { + return nil, nil, nil, fmt.Errorf("unregonized trienode history version: %d", version) + } + size := len(data) - trienodeVersionSize + if size%trienodeTrieHeaderSize != 0 { + return nil, nil, nil, fmt.Errorf("truncated trienode history data, size %d", len(data)) + } + count := size / trienodeTrieHeaderSize + + var ( + owners = make([]common.Hash, 0, count) + keyOffsets = make([]uint32, 0, count) + valOffsets = make([]uint32, 0, count) + ) + for i := 0; i < count; i++ { + n := trienodeVersionSize + trienodeTrieHeaderSize*i + owner := common.BytesToHash(data[n : n+common.HashLength]) + if i != 0 && bytes.Compare(owner.Bytes(), owners[i-1].Bytes()) <= 0 { + return nil, nil, nil, fmt.Errorf("trienode owners are out of order, prev: %v, cur: %v", owners[i-1], owner) + } + owners = append(owners, owner) + + // Decode the offset to the key section + keyOffset := binary.BigEndian.Uint32(data[n+common.HashLength : n+common.HashLength+4]) + if i != 0 && keyOffset <= keyOffsets[i-1] { + return nil, nil, nil, fmt.Errorf("key offset is out of order, prev: %v, cur: %v", keyOffsets[i-1], keyOffset) + } + keyOffsets = append(keyOffsets, keyOffset) + + // Decode the offset into the value section. Note that identical value offsets + // are valid if the node values in the last trie chunk are all zero (e.g., after + // a trie deletion). + valOffset := binary.BigEndian.Uint32(data[n+common.HashLength+4 : n+common.HashLength+8]) + if i != 0 && valOffset < valOffsets[i-1] { + return nil, nil, nil, fmt.Errorf("value offset is out of order, prev: %v, cur: %v", valOffsets[i-1], valOffset) + } + valOffsets = append(valOffsets, valOffset) + } + return owners, keyOffsets, valOffsets, nil +} + +func decodeSingle(keySection []byte, onValue func([]byte, int, int) error) ([]string, error) { + var ( + prevKey []byte + items int + keyOffsets []uint32 + valOffsets []uint32 + + keyOff int // the key offset within the single trie data + valOff int // the value offset within the single trie data + + keys []string + ) + // Decode restarts + if len(keySection) < 4 { + return nil, fmt.Errorf("key section too short, size: %d", len(keySection)) + } + nRestarts := binary.BigEndian.Uint32(keySection[len(keySection)-4:]) + + if len(keySection) < int(8*nRestarts)+4 { + return nil, fmt.Errorf("key section too short, restarts: %d, size: %d", nRestarts, len(keySection)) + } + for i := 0; i < int(nRestarts); i++ { + o := len(keySection) - 4 - (int(nRestarts)-i)*8 + keyOffset := binary.BigEndian.Uint32(keySection[o : o+4]) + if i != 0 && keyOffset <= keyOffsets[i-1] { + return nil, fmt.Errorf("key offset is out of order, prev: %v, cur: %v", keyOffsets[i-1], keyOffset) + } + keyOffsets = append(keyOffsets, keyOffset) + + // Same value offset is allowed just in case all the trie nodes in the last + // section have zero-size value. + valOffset := binary.BigEndian.Uint32(keySection[o+4 : o+8]) + if i != 0 && valOffset < valOffsets[i-1] { + return nil, fmt.Errorf("value offset is out of order, prev: %v, cur: %v", valOffsets[i-1], valOffset) + } + valOffsets = append(valOffsets, valOffset) + } + keyLimit := len(keySection) - 4 - int(nRestarts)*8 + + // Decode data + for keyOff < keyLimit { + // Validate the key and value offsets within the single trie data chunk + if items%trienodeDataBlockRestartLen == 0 { + if keyOff != int(keyOffsets[items/trienodeDataBlockRestartLen]) { + return nil, fmt.Errorf("key offset is not matched, recorded: %d, want: %d", keyOffsets[items/trienodeDataBlockRestartLen], keyOff) + } + if valOff != int(valOffsets[items/trienodeDataBlockRestartLen]) { + return nil, fmt.Errorf("value offset is not matched, recorded: %d, want: %d", valOffsets[items/trienodeDataBlockRestartLen], valOff) + } + } + // Resolve the entry from key section + nShared, nn := binary.Uvarint(keySection[keyOff:]) // key length shared (varint) + keyOff += nn + nUnshared, nn := binary.Uvarint(keySection[keyOff:]) // key length not shared (varint) + keyOff += nn + nValue, nn := binary.Uvarint(keySection[keyOff:]) // value length (varint) + keyOff += nn + + // Resolve unshared key + if keyOff+int(nUnshared) > len(keySection) { + return nil, fmt.Errorf("key length too long, unshared key length: %d, off: %d, section size: %d", nUnshared, keyOff, len(keySection)) + } + unsharedKey := keySection[keyOff : keyOff+int(nUnshared)] + keyOff += int(nUnshared) + + // Assemble the full key + var key []byte + if items%trienodeDataBlockRestartLen == 0 { + if nShared != 0 { + return nil, fmt.Errorf("unexpected non-zero shared key prefix: %d", nShared) + } + key = unsharedKey + } else { + if int(nShared) > len(prevKey) { + return nil, fmt.Errorf("unexpected shared key prefix: %d, prefix key length: %d", nShared, len(prevKey)) + } + key = append([]byte{}, prevKey[:nShared]...) + key = append(key, unsharedKey...) + } + if items != 0 && bytes.Compare(prevKey, key) >= 0 { + return nil, fmt.Errorf("trienode paths are out of order, prev: %v, cur: %v", prevKey, key) + } + prevKey = key + + // Resolve value + if onValue != nil { + if err := onValue(key, valOff, valOff+int(nValue)); err != nil { + return nil, err + } + } + valOff += int(nValue) + + items++ + keys = append(keys, string(key)) + } + if keyOff != keyLimit { + return nil, fmt.Errorf("excessive key data after decoding, offset: %d, size: %d", keyOff, keyLimit) + } + return keys, nil +} + +func decodeSingleWithValue(keySection []byte, valueSection []byte) ([]string, map[string][]byte, error) { + var ( + offset int + nodes = make(map[string][]byte) + ) + paths, err := decodeSingle(keySection, func(key []byte, start int, limit int) error { + if start != offset { + return fmt.Errorf("gapped value section offset: %d, want: %d", start, offset) + } + // start == limit is allowed for zero-value trie node (e.g., non-existent node) + if start > limit { + return fmt.Errorf("invalid value offsets, start: %d, limit: %d", start, limit) + } + if start > len(valueSection) || limit > len(valueSection) { + return fmt.Errorf("value section out of range: start: %d, limit: %d, size: %d", start, limit, len(valueSection)) + } + nodes[string(key)] = valueSection[start:limit] + + offset = limit + return nil + }) + if err != nil { + return nil, nil, err + } + if offset != len(valueSection) { + return nil, nil, fmt.Errorf("excessive value data after decoding, offset: %d, size: %d", offset, len(valueSection)) + } + return paths, nodes, nil +} + +// decode deserializes the contained trie nodes from the provided bytes. +func (h *trienodeHistory) decode(header []byte, keySection []byte, valueSection []byte) error { + owners, keyOffsets, valueOffsets, err := decodeHeader(header) + if err != nil { + return err + } + h.owners = owners + h.nodeList = make(map[common.Hash][]string) + h.nodes = make(map[common.Hash]map[string][]byte) + + for i := 0; i < len(owners); i++ { + // Resolve the boundary of key section + keyStart := keyOffsets[i] + keyLimit := len(keySection) + if i != len(owners)-1 { + keyLimit = int(keyOffsets[i+1]) + } + if int(keyStart) > len(keySection) || keyLimit > len(keySection) { + return fmt.Errorf("invalid key offsets: keyStart: %d, keyLimit: %d, size: %d", keyStart, keyLimit, len(keySection)) + } + + // Resolve the boundary of value section + valStart := valueOffsets[i] + valLimit := len(valueSection) + if i != len(owners)-1 { + valLimit = int(valueOffsets[i+1]) + } + if int(valStart) > len(valueSection) || valLimit > len(valueSection) { + return fmt.Errorf("invalid value offsets: valueStart: %d, valueLimit: %d, size: %d", valStart, valLimit, len(valueSection)) + } + + // Decode the key and values for this specific trie + paths, nodes, err := decodeSingleWithValue(keySection[keyStart:keyLimit], valueSection[valStart:valLimit]) + if err != nil { + return err + } + h.nodeList[owners[i]] = paths + h.nodes[owners[i]] = nodes + } + return nil +} + +type iRange struct { + start uint32 + limit uint32 +} + +// singleTrienodeHistoryReader provides read access to a single trie within the +// trienode history. It stores an offset to the trie's position in the history, +// along with a set of per-node offsets that can be resolved on demand. +type singleTrienodeHistoryReader struct { + id uint64 + reader ethdb.AncientReader + valueRange iRange // value range within the total value section + valueInternalOffsets map[string]iRange // value offset within the single trie data +} + +func newSingleTrienodeHistoryReader(id uint64, reader ethdb.AncientReader, keyRange iRange, valueRange iRange) (*singleTrienodeHistoryReader, error) { + // TODO(rjl493456442) partial freezer read should be supported + keyData, err := rawdb.ReadTrienodeHistoryKeySection(reader, id) + if err != nil { + return nil, err + } + keyStart := int(keyRange.start) + keyLimit := int(keyRange.limit) + if keyLimit == math.MaxUint32 { + keyLimit = len(keyData) + } + if len(keyData) < keyStart || len(keyData) < keyLimit { + return nil, fmt.Errorf("key section too short, start: %d, limit: %d, size: %d", keyStart, keyLimit, len(keyData)) + } + + valueOffsets := make(map[string]iRange) + _, err = decodeSingle(keyData[keyStart:keyLimit], func(key []byte, start int, limit int) error { + valueOffsets[string(key)] = iRange{ + start: uint32(start), + limit: uint32(limit), + } + return nil + }) + if err != nil { + return nil, err + } + return &singleTrienodeHistoryReader{ + id: id, + reader: reader, + valueRange: valueRange, + valueInternalOffsets: valueOffsets, + }, nil +} + +// read retrieves the trie node data with the provided node path. +func (sr *singleTrienodeHistoryReader) read(path string) ([]byte, error) { + offset, exists := sr.valueInternalOffsets[path] + if !exists { + return nil, fmt.Errorf("trienode %v not found", []byte(path)) + } + // TODO(rjl493456442) partial freezer read should be supported + valueData, err := rawdb.ReadTrienodeHistoryValueSection(sr.reader, sr.id) + if err != nil { + return nil, err + } + if len(valueData) < int(sr.valueRange.start) { + return nil, fmt.Errorf("value section too short, start: %d, size: %d", sr.valueRange.start, len(valueData)) + } + entryStart := sr.valueRange.start + offset.start + entryLimit := sr.valueRange.start + offset.limit + if len(valueData) < int(entryStart) || len(valueData) < int(entryLimit) { + return nil, fmt.Errorf("value section too short, start: %d, limit: %d, size: %d", entryStart, entryLimit, len(valueData)) + } + return valueData[int(entryStart):int(entryLimit)], nil +} + +// trienodeHistoryReader provides read access to node data in the trie node history. +// It resolves data from the underlying ancient store only when needed, minimizing +// I/O overhead. +type trienodeHistoryReader struct { + id uint64 // ID of the associated trienode history + reader ethdb.AncientReader // Database reader of ancient store + keyRanges map[common.Hash]iRange // Key ranges identifying trie chunks + valRanges map[common.Hash]iRange // Value ranges identifying trie chunks + iReaders map[common.Hash]*singleTrienodeHistoryReader // readers for each individual trie chunk +} + +// newTrienodeHistoryReader constructs the reader for specific trienode history. +func newTrienodeHistoryReader(id uint64, reader ethdb.AncientReader) (*trienodeHistoryReader, error) { + r := &trienodeHistoryReader{ + id: id, + reader: reader, + keyRanges: make(map[common.Hash]iRange), + valRanges: make(map[common.Hash]iRange), + iReaders: make(map[common.Hash]*singleTrienodeHistoryReader), + } + if err := r.decodeHeader(); err != nil { + return nil, err + } + return r, nil +} + +// decodeHeader decodes the metadata of trienode history from the header section. +func (r *trienodeHistoryReader) decodeHeader() error { + header, err := rawdb.ReadTrienodeHistoryHeader(r.reader, r.id) + if err != nil { + return err + } + owners, keyOffsets, valOffsets, err := decodeHeader(header) + if err != nil { + return err + } + for i, owner := range owners { + // Decode the key range for this trie chunk + var keyLimit uint32 + if i == len(owners)-1 { + keyLimit = math.MaxUint32 + } else { + keyLimit = keyOffsets[i+1] + } + r.keyRanges[owner] = iRange{ + start: keyOffsets[i], + limit: keyLimit, + } + + // Decode the value range for this trie chunk + var valLimit uint32 + if i == len(owners)-1 { + valLimit = math.MaxUint32 + } else { + valLimit = valOffsets[i+1] + } + r.valRanges[owner] = iRange{ + start: valOffsets[i], + limit: valLimit, + } + } + return nil +} + +// read retrieves the trie node data with the provided TrieID and node path. +func (r *trienodeHistoryReader) read(owner common.Hash, path string) ([]byte, error) { + ir, ok := r.iReaders[owner] + if !ok { + keyRange, exists := r.keyRanges[owner] + if !exists { + return nil, fmt.Errorf("trie %x is unknown", owner) + } + valRange, exists := r.valRanges[owner] + if !exists { + return nil, fmt.Errorf("trie %x is unknown", owner) + } + var err error + ir, err = newSingleTrienodeHistoryReader(r.id, r.reader, keyRange, valRange) + if err != nil { + return nil, err + } + r.iReaders[owner] = ir + } + return ir.read(path) +} + +// writeTrienodeHistory persists the trienode history associated with the given diff layer. +// nolint:unused +func writeTrienodeHistory(writer ethdb.AncientWriter, dl *diffLayer) error { + start := time.Now() + h := newTrienodeHistory(dl.nodes.nodeOrigin) + header, keySection, valueSection, err := h.encode() + if err != nil { + return err + } + // Write history data into five freezer table respectively. + if err := rawdb.WriteTrienodeHistory(writer, dl.stateID(), header, keySection, valueSection); err != nil { + return err + } + trienodeHistoryDataBytesMeter.Mark(int64(len(valueSection))) + trienodeHistoryIndexBytesMeter.Mark(int64(len(header) + len(keySection))) + trienodeHistoryBuildTimeMeter.UpdateSince(start) + + log.Debug( + "Stored trienode history", "id", dl.stateID(), "block", dl.block, + "header", common.StorageSize(len(header)), + "keySection", common.StorageSize(len(keySection)), + "valueSection", common.StorageSize(len(valueSection)), + "elapsed", common.PrettyDuration(time.Since(start)), + ) + return nil +} + +// readTrienodeHistory resolves a single trienode history object with specific id. +func readTrienodeHistory(reader ethdb.AncientReader, id uint64) (*trienodeHistory, error) { + header, keySection, valueSection, err := rawdb.ReadTrienodeHistory(reader, id) + if err != nil { + return nil, err + } + var h trienodeHistory + if err := h.decode(header, keySection, valueSection); err != nil { + return nil, err + } + return &h, nil +} + +// readTrienodeHistories resolves a list of trienode histories with the specific range. +func readTrienodeHistories(reader ethdb.AncientReader, start uint64, count uint64) ([]history, error) { + headers, keySections, valueSections, err := rawdb.ReadTrienodeHistoryList(reader, start, count) + if err != nil { + return nil, err + } + var res []history + for i, header := range headers { + var h trienodeHistory + if err := h.decode(header, keySections[i], valueSections[i]); err != nil { + return nil, err + } + res = append(res, &h) + } + return res, nil +} diff --git a/triedb/pathdb/history_trienode_test.go b/triedb/pathdb/history_trienode_test.go new file mode 100644 index 000000000000..69d13fdbb44c --- /dev/null +++ b/triedb/pathdb/history_trienode_test.go @@ -0,0 +1,688 @@ +// Copyright 2025 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "bytes" + "encoding/binary" + "math/rand" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/internal/testrand" +) + +// randomTrienodes generates a random trienode set. +func randomTrienodes(n int) map[common.Hash]map[string][]byte { + nodes := make(map[common.Hash]map[string][]byte) + for i := 0; i < n; i++ { + owner := testrand.Hash() + nodes[owner] = make(map[string][]byte) + + for j := 0; j < 10; j++ { + pathLen := rand.Intn(10) + path := testrand.Bytes(pathLen) + for z := 0; z < len(path); z++ { + valLen := rand.Intn(128) + nodes[owner][string(path[:z])] = testrand.Bytes(valLen) + } + } + // zero-size trie node, representing it was non-existent before + for j := 0; j < 10; j++ { + path := testrand.Bytes(32) + nodes[owner][string(path)] = nil + } + // root node with zero-size path + nodes[owner][""] = testrand.Bytes(10) + } + return nodes +} + +func makeTrinodeHistory() *trienodeHistory { + return newTrienodeHistory(randomTrienodes(10)) +} + +func makeTrienodeHistories(n int) []*trienodeHistory { + var result []*trienodeHistory + for i := 0; i < n; i++ { + h := makeTrinodeHistory() + result = append(result, h) + } + return result +} + +func TestEncodeDecodeTrienodeHistory(t *testing.T) { + var ( + dec trienodeHistory + obj = makeTrinodeHistory() + ) + header, keySection, valueSection, err := obj.encode() + if err != nil { + t.Fatalf("Failed to encode trienode history: %v", err) + } + if err := dec.decode(header, keySection, valueSection); err != nil { + t.Fatalf("Failed to decode trienode history: %v", err) + } + if !compareList(dec.owners, obj.owners) { + t.Fatal("trie owner list is mismatched") + } + if !compareMapList(dec.nodeList, obj.nodeList) { + t.Fatal("trienode list is mismatched") + } + if !compareMapSet(dec.nodes, obj.nodes) { + t.Fatal("trienode content is mismatched") + } +} + +func TestTrienodeHistoryReader(t *testing.T) { + var ( + hs = makeTrienodeHistories(10) + freezer, _ = rawdb.NewTrienodeFreezer(t.TempDir(), false, false) + ) + defer freezer.Close() + + for i, h := range hs { + header, keySection, valueSection, _ := h.encode() + if err := rawdb.WriteTrienodeHistory(freezer, uint64(i+1), header, keySection, valueSection); err != nil { + t.Fatalf("Failed to write trienode history: %v", err) + } + } + for i, h := range hs { + tr, err := newTrienodeHistoryReader(uint64(i+1), freezer) + if err != nil { + t.Fatalf("Failed to construct the history reader: %v", err) + } + for _, owner := range h.owners { + nodes := h.nodes[owner] + for key, value := range nodes { + blob, err := tr.read(owner, key) + if err != nil { + t.Fatalf("Failed to read trienode history: %v", err) + } + if !bytes.Equal(blob, value) { + t.Fatalf("Unexpected trie node data, want: %v, got: %v", value, blob) + } + } + } + } +} + +// TestEmptyTrienodeHistory tests encoding/decoding of empty trienode history +func TestEmptyTrienodeHistory(t *testing.T) { + h := newTrienodeHistory(make(map[common.Hash]map[string][]byte)) + + // Test encoding empty history + header, keySection, valueSection, err := h.encode() + if err != nil { + t.Fatalf("Failed to encode empty trienode history: %v", err) + } + + // Verify sections are minimal but valid + if len(header) == 0 { + t.Fatal("Header should not be empty") + } + if len(keySection) != 0 { + t.Fatal("Key section should be empty for empty history") + } + if len(valueSection) != 0 { + t.Fatal("Value section should be empty for empty history") + } + + // Test decoding empty history + var decoded trienodeHistory + if err := decoded.decode(header, keySection, valueSection); err != nil { + t.Fatalf("Failed to decode empty trienode history: %v", err) + } + + if len(decoded.owners) != 0 { + t.Fatal("Decoded history should have no owners") + } + if len(decoded.nodeList) != 0 { + t.Fatal("Decoded history should have no node lists") + } + if len(decoded.nodes) != 0 { + t.Fatal("Decoded history should have no nodes") + } +} + +// TestSingleTrieHistory tests encoding/decoding of history with single trie +func TestSingleTrieHistory(t *testing.T) { + nodes := make(map[common.Hash]map[string][]byte) + owner := testrand.Hash() + nodes[owner] = make(map[string][]byte) + + // Add some nodes with various sizes + nodes[owner][""] = testrand.Bytes(32) // empty key + nodes[owner]["a"] = testrand.Bytes(1) // small value + nodes[owner]["bb"] = testrand.Bytes(100) // medium value + nodes[owner]["ccc"] = testrand.Bytes(1000) // large value + nodes[owner]["dddd"] = testrand.Bytes(0) // empty value + + h := newTrienodeHistory(nodes) + testEncodeDecode(t, h) +} + +// TestMultipleTries tests multiple tries with different node counts +func TestMultipleTries(t *testing.T) { + nodes := make(map[common.Hash]map[string][]byte) + + // First trie with many small nodes + owner1 := testrand.Hash() + nodes[owner1] = make(map[string][]byte) + for i := 0; i < 100; i++ { + key := string(testrand.Bytes(rand.Intn(10))) + nodes[owner1][key] = testrand.Bytes(rand.Intn(50)) + } + + // Second trie with few large nodes + owner2 := testrand.Hash() + nodes[owner2] = make(map[string][]byte) + for i := 0; i < 5; i++ { + key := string(testrand.Bytes(rand.Intn(20))) + nodes[owner2][key] = testrand.Bytes(1000 + rand.Intn(1000)) + } + + // Third trie with nil values (zero-size nodes) + owner3 := testrand.Hash() + nodes[owner3] = make(map[string][]byte) + for i := 0; i < 10; i++ { + key := string(testrand.Bytes(rand.Intn(15))) + nodes[owner3][key] = nil + } + + h := newTrienodeHistory(nodes) + testEncodeDecode(t, h) +} + +// TestLargeNodeValues tests encoding/decoding with very large node values +func TestLargeNodeValues(t *testing.T) { + nodes := make(map[common.Hash]map[string][]byte) + owner := testrand.Hash() + nodes[owner] = make(map[string][]byte) + + // Test with progressively larger values + sizes := []int{1024, 10 * 1024, 100 * 1024, 1024 * 1024} // 1KB, 10KB, 100KB, 1MB + for _, size := range sizes { + key := string(testrand.Bytes(10)) + nodes[owner][key] = testrand.Bytes(size) + + h := newTrienodeHistory(nodes) + testEncodeDecode(t, h) + t.Logf("Successfully tested encoding/decoding with %dKB value", size/1024) + } +} + +// TestNilNodeValues tests encoding/decoding with nil (zero-length) node values +func TestNilNodeValues(t *testing.T) { + nodes := make(map[common.Hash]map[string][]byte) + owner := testrand.Hash() + nodes[owner] = make(map[string][]byte) + + // Mix of nil and non-nil values + nodes[owner]["nil1"] = nil + nodes[owner]["nil2"] = nil + nodes[owner]["data1"] = []byte("some data") + nodes[owner]["nil3"] = nil + nodes[owner]["data2"] = []byte("more data") + nodes[owner]["nil4"] = nil + + h := newTrienodeHistory(nodes) + testEncodeDecode(t, h) + + // Verify nil values are preserved + if h.nodes[owner]["nil1"] != nil { + t.Fatal("Nil value should be preserved") + } + if h.nodes[owner]["nil3"] != nil { + t.Fatal("Nil value should be preserved") + } +} + +// TestCorruptedHeader tests error handling for corrupted header data +func TestCorruptedHeader(t *testing.T) { + h := makeTrinodeHistory() + header, keySection, valueSection, _ := h.encode() + + // Test corrupted version + corruptedHeader := make([]byte, len(header)) + copy(corruptedHeader, header) + corruptedHeader[0] = 0xFF // Invalid version + + var decoded trienodeHistory + if err := decoded.decode(corruptedHeader, keySection, valueSection); err == nil { + t.Fatal("Expected error for corrupted version") + } + + // Test truncated header + truncatedHeader := header[:len(header)-5] + if err := decoded.decode(truncatedHeader, keySection, valueSection); err == nil { + t.Fatal("Expected error for truncated header") + } + + // Test header with invalid trie header size + if len(header) > trienodeVersionSize { + invalidHeader := make([]byte, len(header)) + copy(invalidHeader, header) + invalidHeader = invalidHeader[:trienodeVersionSize+5] // Not divisible by trie header size + + if err := decoded.decode(invalidHeader, keySection, valueSection); err == nil { + t.Fatal("Expected error for invalid header size") + } + } +} + +// TestCorruptedKeySection tests error handling for corrupted key section data +func TestCorruptedKeySection(t *testing.T) { + h := makeTrinodeHistory() + header, keySection, valueSection, _ := h.encode() + + // Test empty key section when header indicates data + if len(keySection) > 0 { + var decoded trienodeHistory + if err := decoded.decode(header, []byte{}, valueSection); err == nil { + t.Fatal("Expected error for empty key section with non-empty header") + } + } + + // Test truncated key section + if len(keySection) > 10 { + truncatedKeySection := keySection[:len(keySection)-10] + var decoded trienodeHistory + if err := decoded.decode(header, truncatedKeySection, valueSection); err == nil { + t.Fatal("Expected error for truncated key section") + } + } + + // Test corrupted key section with invalid varint + corruptedKeySection := make([]byte, len(keySection)) + copy(corruptedKeySection, keySection) + if len(corruptedKeySection) > 5 { + corruptedKeySection[5] = 0xFF // Corrupt varint encoding + var decoded trienodeHistory + if err := decoded.decode(header, corruptedKeySection, valueSection); err == nil { + t.Fatal("Expected error for corrupted varint in key section") + } + } +} + +// TestCorruptedValueSection tests error handling for corrupted value section data +func TestCorruptedValueSection(t *testing.T) { + h := makeTrinodeHistory() + header, keySection, valueSection, _ := h.encode() + + // Test truncated value section + if len(valueSection) > 10 { + truncatedValueSection := valueSection[:len(valueSection)-10] + var decoded trienodeHistory + if err := decoded.decode(header, keySection, truncatedValueSection); err == nil { + t.Fatal("Expected error for truncated value section") + } + } + + // Test empty value section when key section indicates data exists + if len(valueSection) > 0 { + var decoded trienodeHistory + if err := decoded.decode(header, keySection, []byte{}); err == nil { + t.Fatal("Expected error for empty value section with non-empty key section") + } + } +} + +// TestInvalidOffsets tests error handling for invalid offsets in encoded data +func TestInvalidOffsets(t *testing.T) { + h := makeTrinodeHistory() + header, keySection, valueSection, _ := h.encode() + + // Corrupt key offset in header (make it larger than key section) + corruptedHeader := make([]byte, len(header)) + copy(corruptedHeader, header) + corruptedHeader[trienodeVersionSize+common.HashLength] = 0xff + + var dec1 trienodeHistory + if err := dec1.decode(corruptedHeader, keySection, valueSection); err == nil { + t.Fatal("Expected error for invalid key offset") + } + + // Corrupt value offset in header (make it larger than value section) + corruptedHeader = make([]byte, len(header)) + copy(corruptedHeader, header) + corruptedHeader[trienodeVersionSize+common.HashLength+4] = 0xff + + var dec2 trienodeHistory + if err := dec2.decode(corruptedHeader, keySection, valueSection); err == nil { + t.Fatal("Expected error for invalid value offset") + } +} + +// TestTrienodeHistoryReaderNonExistentPath tests reading non-existent paths +func TestTrienodeHistoryReaderNonExistentPath(t *testing.T) { + var ( + h = makeTrinodeHistory() + freezer, _ = rawdb.NewTrienodeFreezer(t.TempDir(), false, false) + ) + defer freezer.Close() + + header, keySection, valueSection, _ := h.encode() + if err := rawdb.WriteTrienodeHistory(freezer, 1, header, keySection, valueSection); err != nil { + t.Fatalf("Failed to write trienode history: %v", err) + } + + tr, err := newTrienodeHistoryReader(1, freezer) + if err != nil { + t.Fatalf("Failed to construct history reader: %v", err) + } + + // Try to read a non-existent path + _, err = tr.read(testrand.Hash(), "nonexistent") + if err == nil { + t.Fatal("Expected error for non-existent trie owner") + } + + // Try to read from existing owner but non-existent path + owner := h.owners[0] + _, err = tr.read(owner, "nonexistent-path") + if err == nil { + t.Fatal("Expected error for non-existent path") + } +} + +// TestTrienodeHistoryReaderNilValues tests reading nil (zero-length) values +func TestTrienodeHistoryReaderNilValues(t *testing.T) { + nodes := make(map[common.Hash]map[string][]byte) + owner := testrand.Hash() + nodes[owner] = make(map[string][]byte) + + // Add some nil values + nodes[owner]["nil1"] = nil + nodes[owner]["nil2"] = nil + nodes[owner]["data1"] = []byte("some data") + + h := newTrienodeHistory(nodes) + + var freezer, _ = rawdb.NewTrienodeFreezer(t.TempDir(), false, false) + defer freezer.Close() + + header, keySection, valueSection, _ := h.encode() + if err := rawdb.WriteTrienodeHistory(freezer, 1, header, keySection, valueSection); err != nil { + t.Fatalf("Failed to write trienode history: %v", err) + } + + tr, err := newTrienodeHistoryReader(1, freezer) + if err != nil { + t.Fatalf("Failed to construct history reader: %v", err) + } + + // Test reading nil values + data1, err := tr.read(owner, "nil1") + if err != nil { + t.Fatalf("Failed to read nil value: %v", err) + } + if len(data1) != 0 { + t.Fatal("Expected nil data for nil value") + } + + data2, err := tr.read(owner, "nil2") + if err != nil { + t.Fatalf("Failed to read nil value: %v", err) + } + if len(data2) != 0 { + t.Fatal("Expected nil data for nil value") + } + + // Test reading non-nil value + data3, err := tr.read(owner, "data1") + if err != nil { + t.Fatalf("Failed to read non-nil value: %v", err) + } + if !bytes.Equal(data3, []byte("some data")) { + t.Fatal("Data mismatch for non-nil value") + } +} + +// TestTrienodeHistoryReaderNilKey tests reading nil (zero-length) key +func TestTrienodeHistoryReaderNilKey(t *testing.T) { + nodes := make(map[common.Hash]map[string][]byte) + owner := testrand.Hash() + nodes[owner] = make(map[string][]byte) + + // Add some nil values + nodes[owner][""] = []byte("some data") + nodes[owner]["data1"] = []byte("some data") + + h := newTrienodeHistory(nodes) + + var freezer, _ = rawdb.NewTrienodeFreezer(t.TempDir(), false, false) + defer freezer.Close() + + header, keySection, valueSection, _ := h.encode() + if err := rawdb.WriteTrienodeHistory(freezer, 1, header, keySection, valueSection); err != nil { + t.Fatalf("Failed to write trienode history: %v", err) + } + + tr, err := newTrienodeHistoryReader(1, freezer) + if err != nil { + t.Fatalf("Failed to construct history reader: %v", err) + } + + // Test reading nil values + data1, err := tr.read(owner, "") + if err != nil { + t.Fatalf("Failed to read nil value: %v", err) + } + if !bytes.Equal(data1, []byte("some data")) { + t.Fatal("Data mismatch for nil key") + } + + // Test reading non-nil value + data2, err := tr.read(owner, "data1") + if err != nil { + t.Fatalf("Failed to read non-nil value: %v", err) + } + if !bytes.Equal(data2, []byte("some data")) { + t.Fatal("Data mismatch for non-nil key") + } +} + +// TestTrienodeHistoryReaderIterator tests the iterator functionality +func TestTrienodeHistoryReaderIterator(t *testing.T) { + h := makeTrinodeHistory() + + // Count expected entries + expectedCount := 0 + for _, nodeList := range h.nodeList { + expectedCount += len(nodeList) + } + + // Test the iterator + actualCount := 0 + for x := range h.forEach() { + _ = x + actualCount++ + } + if actualCount != expectedCount { + t.Fatalf("Iterator count mismatch: expected %d, got %d", expectedCount, actualCount) + } + + // Test that iterator yields expected state identifiers + seen := make(map[stateIdent]bool) + for ident := range h.forEach() { + if ident.typ != typeTrienode { + t.Fatal("Iterator should only yield trienode history identifiers") + } + key := stateIdent{typ: ident.typ, addressHash: ident.addressHash, path: ident.path} + if seen[key] { + t.Fatal("Iterator yielded duplicate identifier") + } + seen[key] = true + } +} + +// TestSharedLen tests the sharedLen helper function +func TestSharedLen(t *testing.T) { + tests := []struct { + a, b []byte + expected int + }{ + // Empty strings + {[]byte(""), []byte(""), 0}, + // One empty string + {[]byte(""), []byte("abc"), 0}, + {[]byte("abc"), []byte(""), 0}, + // No common prefix + {[]byte("abc"), []byte("def"), 0}, + // Partial common prefix + {[]byte("abc"), []byte("abx"), 2}, + {[]byte("prefix"), []byte("pref"), 4}, + // Complete common prefix (shorter first) + {[]byte("ab"), []byte("abcd"), 2}, + // Complete common prefix (longer first) + {[]byte("abcd"), []byte("ab"), 2}, + // Identical strings + {[]byte("identical"), []byte("identical"), 9}, + // Binary data + {[]byte{0x00, 0x01, 0x02}, []byte{0x00, 0x01, 0x03}, 2}, + // Large strings + {bytes.Repeat([]byte("a"), 1000), bytes.Repeat([]byte("a"), 1000), 1000}, + {bytes.Repeat([]byte("a"), 1000), append(bytes.Repeat([]byte("a"), 999), []byte("b")...), 999}, + } + + for i, test := range tests { + result := sharedLen(test.a, test.b) + if result != test.expected { + t.Errorf("Test %d: sharedLen(%q, %q) = %d, expected %d", + i, test.a, test.b, result, test.expected) + } + // Test commutativity + resultReverse := sharedLen(test.b, test.a) + if result != resultReverse { + t.Errorf("Test %d: sharedLen is not commutative: sharedLen(a,b)=%d, sharedLen(b,a)=%d", + i, result, resultReverse) + } + } +} + +// TestDecodeHeaderCorruptedData tests decodeHeader with corrupted data +func TestDecodeHeaderCorruptedData(t *testing.T) { + // Create valid header data first + h := makeTrinodeHistory() + header, _, _, _ := h.encode() + + // Test with empty header + _, _, _, err := decodeHeader([]byte{}) + if err == nil { + t.Fatal("Expected error for empty header") + } + + // Test with invalid version + corruptedVersion := make([]byte, len(header)) + copy(corruptedVersion, header) + corruptedVersion[0] = 0xFF + _, _, _, err = decodeHeader(corruptedVersion) + if err == nil { + t.Fatal("Expected error for invalid version") + } + + // Test with truncated header (not divisible by trie header size) + truncated := header[:trienodeVersionSize+5] + _, _, _, err = decodeHeader(truncated) + if err == nil { + t.Fatal("Expected error for truncated header") + } + + // Test with unordered trie owners + unordered := make([]byte, len(header)) + copy(unordered, header) + + // Swap two owner hashes to make them unordered + hash1Start := trienodeVersionSize + hash2Start := trienodeVersionSize + trienodeTrieHeaderSize + hash1 := unordered[hash1Start : hash1Start+common.HashLength] + hash2 := unordered[hash2Start : hash2Start+common.HashLength] + + // Only swap if they would be out of order + copy(unordered[hash1Start:hash1Start+common.HashLength], hash2) + copy(unordered[hash2Start:hash2Start+common.HashLength], hash1) + + _, _, _, err = decodeHeader(unordered) + if err == nil { + t.Fatal("Expected error for unordered trie owners") + } +} + +// TestDecodeSingleCorruptedData tests decodeSingle with corrupted data +func TestDecodeSingleCorruptedData(t *testing.T) { + h := makeTrinodeHistory() + _, keySection, _, _ := h.encode() + + // Test with empty key section + _, err := decodeSingle([]byte{}, nil) + if err == nil { + t.Fatal("Expected error for empty key section") + } + + // Test with key section too small for trailer + if len(keySection) > 0 { + _, err := decodeSingle(keySection[:3], nil) // Less than 4 bytes for trailer + if err == nil { + t.Fatal("Expected error for key section too small for trailer") + } + } + + // Test with corrupted varint in key section + corrupted := make([]byte, len(keySection)) + copy(corrupted, keySection) + corrupted[5] = 0xFF // Corrupt varint + _, err = decodeSingle(corrupted, nil) + if err == nil { + t.Fatal("Expected error for corrupted varint") + } + + // Test with corrupted trailer (invalid restart count) + corrupted = make([]byte, len(keySection)) + copy(corrupted, keySection) + // Set restart count to something too large + binary.BigEndian.PutUint32(corrupted[len(corrupted)-4:], 10000) + _, err = decodeSingle(corrupted, nil) + if err == nil { + t.Fatal("Expected error for invalid restart count") + } +} + +// Helper function to test encode/decode cycle +func testEncodeDecode(t *testing.T, h *trienodeHistory) { + header, keySection, valueSection, err := h.encode() + if err != nil { + t.Fatalf("Failed to encode trienode history: %v", err) + } + + var decoded trienodeHistory + if err := decoded.decode(header, keySection, valueSection); err != nil { + t.Fatalf("Failed to decode trienode history: %v", err) + } + + // Compare the decoded history with original + if !compareList(decoded.owners, h.owners) { + t.Fatal("Trie owner list mismatch") + } + if !compareMapList(decoded.nodeList, h.nodeList) { + t.Fatal("Trienode list mismatch") + } + if !compareMapSet(decoded.nodes, h.nodes) { + t.Fatal("Trienode content mismatch") + } +} diff --git a/triedb/pathdb/metrics.go b/triedb/pathdb/metrics.go index 779f9d813ffe..31c40053fc26 100644 --- a/triedb/pathdb/metrics.go +++ b/triedb/pathdb/metrics.go @@ -69,12 +69,21 @@ var ( gcStorageMeter = metrics.NewRegisteredMeter("pathdb/gc/storage/count", nil) gcStorageBytesMeter = metrics.NewRegisteredMeter("pathdb/gc/storage/bytes", nil) - historyBuildTimeMeter = metrics.NewRegisteredResettingTimer("pathdb/history/time", nil) - historyDataBytesMeter = metrics.NewRegisteredMeter("pathdb/history/bytes/data", nil) - historyIndexBytesMeter = metrics.NewRegisteredMeter("pathdb/history/bytes/index", nil) - - indexHistoryTimer = metrics.NewRegisteredResettingTimer("pathdb/history/index/time", nil) - unindexHistoryTimer = metrics.NewRegisteredResettingTimer("pathdb/history/unindex/time", nil) + stateHistoryBuildTimeMeter = metrics.NewRegisteredResettingTimer("pathdb/history/state/time", nil) + stateHistoryDataBytesMeter = metrics.NewRegisteredMeter("pathdb/history/state/bytes/data", nil) + stateHistoryIndexBytesMeter = metrics.NewRegisteredMeter("pathdb/history/state/bytes/index", nil) + + //nolint:unused + trienodeHistoryBuildTimeMeter = metrics.NewRegisteredResettingTimer("pathdb/history/trienode/time", nil) + //nolint:unused + trienodeHistoryDataBytesMeter = metrics.NewRegisteredMeter("pathdb/history/trienode/bytes/data", nil) + //nolint:unused + trienodeHistoryIndexBytesMeter = metrics.NewRegisteredMeter("pathdb/history/trienode/bytes/index", nil) + + stateIndexHistoryTimer = metrics.NewRegisteredResettingTimer("pathdb/history/state/index/time", nil) + stateUnindexHistoryTimer = metrics.NewRegisteredResettingTimer("pathdb/history/state/unindex/time", nil) + trienodeIndexHistoryTimer = metrics.NewRegisteredResettingTimer("pathdb/history/trienode/index/time", nil) + trienodeUnindexHistoryTimer = metrics.NewRegisteredResettingTimer("pathdb/history/trienode/unindex/time", nil) lookupAddLayerTimer = metrics.NewRegisteredResettingTimer("pathdb/lookup/add/time", nil) lookupRemoveLayerTimer = metrics.NewRegisteredResettingTimer("pathdb/lookup/remove/time", nil) From 507c50d572ce5e096a16f93d5f3d59b4418ec36c Mon Sep 17 00:00:00 2001 From: Gary Rong Date: Mon, 22 Sep 2025 14:19:06 +0800 Subject: [PATCH 2/3] triedb/pathdb: include metadata in header section --- triedb/pathdb/history_trienode.go | 100 +++++++++++++++------ triedb/pathdb/history_trienode_test.go | 115 ++++++++++++++++--------- 2 files changed, 151 insertions(+), 64 deletions(-) diff --git a/triedb/pathdb/history_trienode.go b/triedb/pathdb/history_trienode.go index defee8d3cdda..28d3027a1ae2 100644 --- a/triedb/pathdb/history_trienode.go +++ b/triedb/pathdb/history_trienode.go @@ -38,10 +38,20 @@ import ( // // # Header // The header records metadata, including: -// - the history version +// +// - the history version (1 byte) +// - the parent state root (32 bytes) +// - the current state root (32 bytes) +// - block number (8 bytes) +// // - a lexicographically sorted list of trie IDs // - the corresponding offsets into the key and value sections for each trie data chunk // +// Although some fields (e.g., parent state root, block number) are duplicated +// between the state history and the trienode history, these two histories +// operate independently. To ensure each remains self-contained and self-descriptive, +// we have chosen to maintain these duplicate fields. +// // # Key section // The key section stores trie node keys (paths) in a compressed format. // It also contains relative offsets into the value section for resolving @@ -60,7 +70,7 @@ import ( // Header section: // // +----------+------------------+---------------------+---------------------+-------+------------------+---------------------+---------------------| -// | ver (1B) | TrieID(32 bytes) | key offset(4 bytes) | val offset(4 bytes) | ... | TrieID(32 bytes) | key offset(4 bytes) | val offset(4 bytes) | +// | metadata | TrieID(32 bytes) | key offset(4 bytes) | val offset(4 bytes) | ... | TrieID(32 bytes) | key offset(4 bytes) | val offset(4 bytes) | // +----------+------------------+---------------------+---------------------+-------+------------------+---------------------+---------------------| // // @@ -103,23 +113,32 @@ import ( // NOTE: All fixed-length integer are big-endian. const ( - trienodeHistoryV0 = uint8(0) // initial version of node history structure - trienodeHistoryVersion = trienodeHistoryV0 // the default node history version - trienodeVersionSize = 1 // the size of version tag in the history - trienodeTrieHeaderSize = 8 + common.HashLength // the size of a single trie header in history - trienodeDataBlockRestartLen = 16 // The restart interval length of trie node block + trienodeHistoryV0 = uint8(0) // initial version of node history structure + trienodeHistoryVersion = trienodeHistoryV0 // the default node history version + trienodeMetadataSize = 1 + 2*common.HashLength + 8 // the size of metadata in the history + trienodeTrieHeaderSize = 8 + common.HashLength // the size of a single trie header in history + trienodeDataBlockRestartLen = 16 // The restart interval length of trie node block ) +// trienodeMetadata describes the meta data of trienode history. +type trienodeMetadata struct { + version uint8 // version tag of history object + parent common.Hash // prev-state root before the state transition + root common.Hash // post-state root after the state transition + block uint64 // associated block number +} + // trienodeHistory represents a set of trie node changes resulting from a state // transition across the main account trie and all associated storage tries. type trienodeHistory struct { + meta *trienodeMetadata // Metadata of the history owners []common.Hash // List of trie identifier sorted lexicographically nodeList map[common.Hash][]string // Set of node paths sorted lexicographically nodes map[common.Hash]map[string][]byte // Set of original value of trie nodes before state transition } // newTrienodeHistory constructs a trienode history with the provided trie nodes. -func newTrienodeHistory(nodes map[common.Hash]map[string][]byte) *trienodeHistory { +func newTrienodeHistory(root common.Hash, parent common.Hash, block uint64, nodes map[common.Hash]map[string][]byte) *trienodeHistory { nodeList := make(map[common.Hash][]string) for owner, subset := range nodes { keys := sort.StringSlice(slices.Collect(maps.Keys(subset))) @@ -127,6 +146,12 @@ func newTrienodeHistory(nodes map[common.Hash]map[string][]byte) *trienodeHistor nodeList[owner] = keys } return &trienodeHistory{ + meta: &trienodeMetadata{ + version: trienodeHistoryVersion, + parent: parent, + root: root, + block: block, + }, owners: slices.SortedFunc(maps.Keys(nodes), common.Hash.Cmp), nodeList: nodeList, nodes: nodes, @@ -174,7 +199,10 @@ func (h *trienodeHistory) encode() ([]byte, []byte, []byte, error) { keySection bytes.Buffer valueSection bytes.Buffer ) - binary.Write(&headerSection, binary.BigEndian, trienodeHistoryVersion) // 1 byte + binary.Write(&headerSection, binary.BigEndian, h.meta.version) // 1 byte + headerSection.Write(h.meta.parent.Bytes()) // 32 bytes + headerSection.Write(h.meta.root.Bytes()) // 32 bytes + binary.Write(&headerSection, binary.BigEndian, h.meta.block) // 8 byte for _, owner := range h.owners { // Fill the header section with offsets at key and value section @@ -246,17 +274,21 @@ func (h *trienodeHistory) encode() ([]byte, []byte, []byte, error) { // decodeHeader resolves the metadata from the header section. An error // should be returned if the header section is corrupted. -func decodeHeader(data []byte) ([]common.Hash, []uint32, []uint32, error) { - if len(data) < trienodeVersionSize { - return nil, nil, nil, fmt.Errorf("trienode history is too small, index size: %d", len(data)) +func decodeHeader(data []byte) (*trienodeMetadata, []common.Hash, []uint32, []uint32, error) { + if len(data) < trienodeMetadataSize { + return nil, nil, nil, nil, fmt.Errorf("trienode history is too small, index size: %d", len(data)) } version := data[0] if version != trienodeHistoryVersion { - return nil, nil, nil, fmt.Errorf("unregonized trienode history version: %d", version) + return nil, nil, nil, nil, fmt.Errorf("unregonized trienode history version: %d", version) } - size := len(data) - trienodeVersionSize + parent := common.BytesToHash(data[1 : common.HashLength+1]) // 32 bytes + root := common.BytesToHash(data[common.HashLength+1 : common.HashLength*2+1]) // 32 bytes + block := binary.BigEndian.Uint64(data[common.HashLength*2+1 : trienodeMetadataSize]) // 8 bytes + + size := len(data) - trienodeMetadataSize if size%trienodeTrieHeaderSize != 0 { - return nil, nil, nil, fmt.Errorf("truncated trienode history data, size %d", len(data)) + return nil, nil, nil, nil, fmt.Errorf("truncated trienode history data, size %d", len(data)) } count := size / trienodeTrieHeaderSize @@ -266,17 +298,17 @@ func decodeHeader(data []byte) ([]common.Hash, []uint32, []uint32, error) { valOffsets = make([]uint32, 0, count) ) for i := 0; i < count; i++ { - n := trienodeVersionSize + trienodeTrieHeaderSize*i + n := trienodeMetadataSize + trienodeTrieHeaderSize*i owner := common.BytesToHash(data[n : n+common.HashLength]) if i != 0 && bytes.Compare(owner.Bytes(), owners[i-1].Bytes()) <= 0 { - return nil, nil, nil, fmt.Errorf("trienode owners are out of order, prev: %v, cur: %v", owners[i-1], owner) + return nil, nil, nil, nil, fmt.Errorf("trienode owners are out of order, prev: %v, cur: %v", owners[i-1], owner) } owners = append(owners, owner) // Decode the offset to the key section keyOffset := binary.BigEndian.Uint32(data[n+common.HashLength : n+common.HashLength+4]) if i != 0 && keyOffset <= keyOffsets[i-1] { - return nil, nil, nil, fmt.Errorf("key offset is out of order, prev: %v, cur: %v", keyOffsets[i-1], keyOffset) + return nil, nil, nil, nil, fmt.Errorf("key offset is out of order, prev: %v, cur: %v", keyOffsets[i-1], keyOffset) } keyOffsets = append(keyOffsets, keyOffset) @@ -285,11 +317,16 @@ func decodeHeader(data []byte) ([]common.Hash, []uint32, []uint32, error) { // a trie deletion). valOffset := binary.BigEndian.Uint32(data[n+common.HashLength+4 : n+common.HashLength+8]) if i != 0 && valOffset < valOffsets[i-1] { - return nil, nil, nil, fmt.Errorf("value offset is out of order, prev: %v, cur: %v", valOffsets[i-1], valOffset) + return nil, nil, nil, nil, fmt.Errorf("value offset is out of order, prev: %v, cur: %v", valOffsets[i-1], valOffset) } valOffsets = append(valOffsets, valOffset) } - return owners, keyOffsets, valOffsets, nil + return &trienodeMetadata{ + version: version, + parent: parent, + root: root, + block: block, + }, owners, keyOffsets, valOffsets, nil } func decodeSingle(keySection []byte, onValue func([]byte, int, int) error) ([]string, error) { @@ -425,10 +462,11 @@ func decodeSingleWithValue(keySection []byte, valueSection []byte) ([]string, ma // decode deserializes the contained trie nodes from the provided bytes. func (h *trienodeHistory) decode(header []byte, keySection []byte, valueSection []byte) error { - owners, keyOffsets, valueOffsets, err := decodeHeader(header) + metadata, owners, keyOffsets, valueOffsets, err := decodeHeader(header) if err != nil { return err } + h.meta = metadata h.owners = owners h.nodeList = make(map[common.Hash][]string) h.nodes = make(map[common.Hash]map[string][]byte) @@ -562,13 +600,13 @@ func newTrienodeHistoryReader(id uint64, reader ethdb.AncientReader) (*trienodeH return r, nil } -// decodeHeader decodes the metadata of trienode history from the header section. +// decodeHeader decodes the header section of trienode history. func (r *trienodeHistoryReader) decodeHeader() error { header, err := rawdb.ReadTrienodeHistoryHeader(r.reader, r.id) if err != nil { return err } - owners, keyOffsets, valOffsets, err := decodeHeader(header) + _, owners, keyOffsets, valOffsets, err := decodeHeader(header) if err != nil { return err } @@ -626,7 +664,7 @@ func (r *trienodeHistoryReader) read(owner common.Hash, path string) ([]byte, er // nolint:unused func writeTrienodeHistory(writer ethdb.AncientWriter, dl *diffLayer) error { start := time.Now() - h := newTrienodeHistory(dl.nodes.nodeOrigin) + h := newTrienodeHistory(dl.rootHash(), dl.parent.rootHash(), dl.block, dl.nodes.nodeOrigin) header, keySection, valueSection, err := h.encode() if err != nil { return err @@ -649,6 +687,20 @@ func writeTrienodeHistory(writer ethdb.AncientWriter, dl *diffLayer) error { return nil } +// readTrienodeMetadata resolves the metadata of the specified trienode history. +// nolint:unused +func readTrienodeMetadata(reader ethdb.AncientReader, id uint64) (*trienodeMetadata, error) { + header, err := rawdb.ReadTrienodeHistoryHeader(reader, id) + if err != nil { + return nil, err + } + metadata, _, _, _, err := decodeHeader(header) + if err != nil { + return nil, err + } + return metadata, nil +} + // readTrienodeHistory resolves a single trienode history object with specific id. func readTrienodeHistory(reader ethdb.AncientReader, id uint64) (*trienodeHistory, error) { header, keySection, valueSection, err := rawdb.ReadTrienodeHistory(reader, id) diff --git a/triedb/pathdb/history_trienode_test.go b/triedb/pathdb/history_trienode_test.go index 69d13fdbb44c..b02c0d5380a6 100644 --- a/triedb/pathdb/history_trienode_test.go +++ b/triedb/pathdb/history_trienode_test.go @@ -20,18 +20,26 @@ import ( "bytes" "encoding/binary" "math/rand" + "reflect" "testing" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/internal/testrand" ) // randomTrienodes generates a random trienode set. -func randomTrienodes(n int) map[common.Hash]map[string][]byte { - nodes := make(map[common.Hash]map[string][]byte) +func randomTrienodes(n int) (map[common.Hash]map[string][]byte, common.Hash) { + var ( + root common.Hash + nodes = make(map[common.Hash]map[string][]byte) + ) for i := 0; i < n; i++ { owner := testrand.Hash() + if i == 0 { + owner = common.Hash{} + } nodes[owner] = make(map[string][]byte) for j := 0; j < 10; j++ { @@ -48,20 +56,29 @@ func randomTrienodes(n int) map[common.Hash]map[string][]byte { nodes[owner][string(path)] = nil } // root node with zero-size path - nodes[owner][""] = testrand.Bytes(10) + rnode := testrand.Bytes(256) + nodes[owner][""] = rnode + if owner == (common.Hash{}) { + root = crypto.Keccak256Hash(rnode) + } } - return nodes + return nodes, root } func makeTrinodeHistory() *trienodeHistory { - return newTrienodeHistory(randomTrienodes(10)) + nodes, root := randomTrienodes(10) + return newTrienodeHistory(root, common.Hash{}, 1, nodes) } func makeTrienodeHistories(n int) []*trienodeHistory { - var result []*trienodeHistory + var ( + parent common.Hash + result []*trienodeHistory + ) for i := 0; i < n; i++ { - h := makeTrinodeHistory() - result = append(result, h) + nodes, root := randomTrienodes(10) + result = append(result, newTrienodeHistory(root, parent, uint64(i+1), nodes)) + parent = root } return result } @@ -78,6 +95,10 @@ func TestEncodeDecodeTrienodeHistory(t *testing.T) { if err := dec.decode(header, keySection, valueSection); err != nil { t.Fatalf("Failed to decode trienode history: %v", err) } + + if !reflect.DeepEqual(obj.meta, dec.meta) { + t.Fatal("trienode metadata is mismatched") + } if !compareList(dec.owners, obj.owners) { t.Fatal("trie owner list is mismatched") } @@ -120,11 +141,20 @@ func TestTrienodeHistoryReader(t *testing.T) { } } } + for i, h := range hs { + metadata, err := readTrienodeMetadata(freezer, uint64(i+1)) + if err != nil { + t.Fatalf("Failed to read trienode history metadata: %v", err) + } + if !reflect.DeepEqual(h.meta, metadata) { + t.Fatalf("Unexpected trienode metadata, want: %v, got: %v", h.meta, metadata) + } + } } // TestEmptyTrienodeHistory tests encoding/decoding of empty trienode history func TestEmptyTrienodeHistory(t *testing.T) { - h := newTrienodeHistory(make(map[common.Hash]map[string][]byte)) + h := newTrienodeHistory(common.Hash{}, common.Hash{}, 1, make(map[common.Hash]map[string][]byte)) // Test encoding empty history header, keySection, valueSection, err := h.encode() @@ -173,7 +203,7 @@ func TestSingleTrieHistory(t *testing.T) { nodes[owner]["ccc"] = testrand.Bytes(1000) // large value nodes[owner]["dddd"] = testrand.Bytes(0) // empty value - h := newTrienodeHistory(nodes) + h := newTrienodeHistory(common.Hash{}, common.Hash{}, 1, nodes) testEncodeDecode(t, h) } @@ -205,7 +235,7 @@ func TestMultipleTries(t *testing.T) { nodes[owner3][key] = nil } - h := newTrienodeHistory(nodes) + h := newTrienodeHistory(common.Hash{}, common.Hash{}, 1, nodes) testEncodeDecode(t, h) } @@ -221,7 +251,7 @@ func TestLargeNodeValues(t *testing.T) { key := string(testrand.Bytes(10)) nodes[owner][key] = testrand.Bytes(size) - h := newTrienodeHistory(nodes) + h := newTrienodeHistory(common.Hash{}, common.Hash{}, 1, nodes) testEncodeDecode(t, h) t.Logf("Successfully tested encoding/decoding with %dKB value", size/1024) } @@ -234,21 +264,16 @@ func TestNilNodeValues(t *testing.T) { nodes[owner] = make(map[string][]byte) // Mix of nil and non-nil values - nodes[owner]["nil1"] = nil - nodes[owner]["nil2"] = nil + nodes[owner]["nil"] = nil nodes[owner]["data1"] = []byte("some data") - nodes[owner]["nil3"] = nil nodes[owner]["data2"] = []byte("more data") - nodes[owner]["nil4"] = nil - h := newTrienodeHistory(nodes) + h := newTrienodeHistory(common.Hash{}, common.Hash{}, 1, nodes) testEncodeDecode(t, h) // Verify nil values are preserved - if h.nodes[owner]["nil1"] != nil { - t.Fatal("Nil value should be preserved") - } - if h.nodes[owner]["nil3"] != nil { + _, ok := h.nodes[owner]["nil"] + if !ok { t.Fatal("Nil value should be preserved") } } @@ -275,14 +300,12 @@ func TestCorruptedHeader(t *testing.T) { } // Test header with invalid trie header size - if len(header) > trienodeVersionSize { - invalidHeader := make([]byte, len(header)) - copy(invalidHeader, header) - invalidHeader = invalidHeader[:trienodeVersionSize+5] // Not divisible by trie header size + invalidHeader := make([]byte, len(header)) + copy(invalidHeader, header) + invalidHeader = invalidHeader[:trienodeMetadataSize+5] // Not divisible by trie header size - if err := decoded.decode(invalidHeader, keySection, valueSection); err == nil { - t.Fatal("Expected error for invalid header size") - } + if err := decoded.decode(invalidHeader, keySection, valueSection); err == nil { + t.Fatal("Expected error for invalid header size") } } @@ -351,7 +374,7 @@ func TestInvalidOffsets(t *testing.T) { // Corrupt key offset in header (make it larger than key section) corruptedHeader := make([]byte, len(header)) copy(corruptedHeader, header) - corruptedHeader[trienodeVersionSize+common.HashLength] = 0xff + corruptedHeader[trienodeMetadataSize+common.HashLength] = 0xff var dec1 trienodeHistory if err := dec1.decode(corruptedHeader, keySection, valueSection); err == nil { @@ -361,7 +384,7 @@ func TestInvalidOffsets(t *testing.T) { // Corrupt value offset in header (make it larger than value section) corruptedHeader = make([]byte, len(header)) copy(corruptedHeader, header) - corruptedHeader[trienodeVersionSize+common.HashLength+4] = 0xff + corruptedHeader[trienodeMetadataSize+common.HashLength+4] = 0xff var dec2 trienodeHistory if err := dec2.decode(corruptedHeader, keySection, valueSection); err == nil { @@ -412,7 +435,7 @@ func TestTrienodeHistoryReaderNilValues(t *testing.T) { nodes[owner]["nil2"] = nil nodes[owner]["data1"] = []byte("some data") - h := newTrienodeHistory(nodes) + h := newTrienodeHistory(common.Hash{}, common.Hash{}, 1, nodes) var freezer, _ = rawdb.NewTrienodeFreezer(t.TempDir(), false, false) defer freezer.Close() @@ -464,7 +487,7 @@ func TestTrienodeHistoryReaderNilKey(t *testing.T) { nodes[owner][""] = []byte("some data") nodes[owner]["data1"] = []byte("some data") - h := newTrienodeHistory(nodes) + h := newTrienodeHistory(common.Hash{}, common.Hash{}, 1, nodes) var freezer, _ = rawdb.NewTrienodeFreezer(t.TempDir(), false, false) defer freezer.Close() @@ -504,8 +527,16 @@ func TestTrienodeHistoryReaderIterator(t *testing.T) { // Count expected entries expectedCount := 0 - for _, nodeList := range h.nodeList { + expectedNodes := make(map[stateIdent]bool) + for owner, nodeList := range h.nodeList { expectedCount += len(nodeList) + for _, node := range nodeList { + expectedNodes[stateIdent{ + typ: typeTrienode, + addressHash: owner, + path: node, + }] = true + } } // Test the iterator @@ -529,6 +560,10 @@ func TestTrienodeHistoryReaderIterator(t *testing.T) { t.Fatal("Iterator yielded duplicate identifier") } seen[key] = true + + if !expectedNodes[key] { + t.Fatalf("Unexpected yielded identifier %v", key) + } } } @@ -583,7 +618,7 @@ func TestDecodeHeaderCorruptedData(t *testing.T) { header, _, _, _ := h.encode() // Test with empty header - _, _, _, err := decodeHeader([]byte{}) + _, _, _, _, err := decodeHeader([]byte{}) if err == nil { t.Fatal("Expected error for empty header") } @@ -592,14 +627,14 @@ func TestDecodeHeaderCorruptedData(t *testing.T) { corruptedVersion := make([]byte, len(header)) copy(corruptedVersion, header) corruptedVersion[0] = 0xFF - _, _, _, err = decodeHeader(corruptedVersion) + _, _, _, _, err = decodeHeader(corruptedVersion) if err == nil { t.Fatal("Expected error for invalid version") } // Test with truncated header (not divisible by trie header size) - truncated := header[:trienodeVersionSize+5] - _, _, _, err = decodeHeader(truncated) + truncated := header[:trienodeMetadataSize+5] + _, _, _, _, err = decodeHeader(truncated) if err == nil { t.Fatal("Expected error for truncated header") } @@ -609,8 +644,8 @@ func TestDecodeHeaderCorruptedData(t *testing.T) { copy(unordered, header) // Swap two owner hashes to make them unordered - hash1Start := trienodeVersionSize - hash2Start := trienodeVersionSize + trienodeTrieHeaderSize + hash1Start := trienodeMetadataSize + hash2Start := trienodeMetadataSize + trienodeTrieHeaderSize hash1 := unordered[hash1Start : hash1Start+common.HashLength] hash2 := unordered[hash2Start : hash2Start+common.HashLength] @@ -618,7 +653,7 @@ func TestDecodeHeaderCorruptedData(t *testing.T) { copy(unordered[hash1Start:hash1Start+common.HashLength], hash2) copy(unordered[hash2Start:hash2Start+common.HashLength], hash1) - _, _, _, err = decodeHeader(unordered) + _, _, _, _, err = decodeHeader(unordered) if err == nil { t.Fatal("Expected error for unordered trie owners") } From 6ab502d302ca16f52712eb63a3fbfda106d492e6 Mon Sep 17 00:00:00 2001 From: Gary Rong Date: Thu, 9 Oct 2025 11:10:48 +0800 Subject: [PATCH 3/3] core, triedb: address comments from marius --- core/rawdb/schema.go | 15 ++-------- triedb/pathdb/history_trienode.go | 10 +++---- triedb/pathdb/history_trienode_test.go | 41 +++++++++++++++++--------- 3 files changed, 34 insertions(+), 32 deletions(-) diff --git a/core/rawdb/schema.go b/core/rawdb/schema.go index ed7922e5639b..d9140c5fd658 100644 --- a/core/rawdb/schema.go +++ b/core/rawdb/schema.go @@ -416,25 +416,19 @@ func trienodeHistoryIndexKey(addressHash common.Hash, path []byte) []byte { // accountHistoryIndexBlockKey = StateHistoryAccountBlockPrefix + addressHash + blockID func accountHistoryIndexBlockKey(addressHash common.Hash, blockID uint32) []byte { - var buf4 [4]byte - binary.BigEndian.PutUint32(buf4[:], blockID) - totalLen := len(StateHistoryAccountBlockPrefix) + common.HashLength + 4 out := make([]byte, totalLen) off := 0 off += copy(out[off:], StateHistoryAccountBlockPrefix) off += copy(out[off:], addressHash.Bytes()) - copy(out[off:], buf4[:]) + binary.BigEndian.PutUint32(out[off:], blockID) return out } // storageHistoryIndexBlockKey = StateHistoryStorageBlockPrefix + addressHash + storageHash + blockID func storageHistoryIndexBlockKey(addressHash common.Hash, storageHash common.Hash, blockID uint32) []byte { - var buf4 [4]byte - binary.BigEndian.PutUint32(buf4[:], blockID) - totalLen := len(StateHistoryStorageBlockPrefix) + 2*common.HashLength + 4 out := make([]byte, totalLen) @@ -442,16 +436,13 @@ func storageHistoryIndexBlockKey(addressHash common.Hash, storageHash common.Has off += copy(out[off:], StateHistoryStorageBlockPrefix) off += copy(out[off:], addressHash.Bytes()) off += copy(out[off:], storageHash.Bytes()) - copy(out[off:], buf4[:]) + binary.BigEndian.PutUint32(out[off:], blockID) return out } // trienodeHistoryIndexBlockKey = TrienodeHistoryBlockPrefix + addressHash + trienode path + blockID func trienodeHistoryIndexBlockKey(addressHash common.Hash, path []byte, blockID uint32) []byte { - var buf4 [4]byte - binary.BigEndian.PutUint32(buf4[:], blockID) - totalLen := len(TrienodeHistoryBlockPrefix) + common.HashLength + len(path) + 4 out := make([]byte, totalLen) @@ -459,7 +450,7 @@ func trienodeHistoryIndexBlockKey(addressHash common.Hash, path []byte, blockID off += copy(out[off:], TrienodeHistoryBlockPrefix) off += copy(out[off:], addressHash.Bytes()) off += copy(out[off:], path) - copy(out[off:], buf4[:]) + binary.BigEndian.PutUint32(out[off:], blockID) return out } diff --git a/triedb/pathdb/history_trienode.go b/triedb/pathdb/history_trienode.go index 28d3027a1ae2..2a4459d4ad46 100644 --- a/triedb/pathdb/history_trienode.go +++ b/triedb/pathdb/history_trienode.go @@ -160,10 +160,7 @@ func newTrienodeHistory(root common.Hash, parent common.Hash, block uint64, node // sharedLen returns the length of the common prefix shared by a and b. func sharedLen(a, b []byte) int { - n := len(a) - if len(b) < n { - n = len(b) - } + n := min(len(a), len(b)) for i := 0; i < n; i++ { if a[i] != b[i] { return i @@ -259,7 +256,8 @@ func (h *trienodeHistory) encode() ([]byte, []byte, []byte, error) { internalValOffset += uint32(len(value)) } - // Encode trailer + // Encode trailer, the number of restart sections is len(restarts))/2, + // as we track the offsets of both key and value sections. var trailer []byte for _, number := range append(restarts, uint32(len(restarts))/2) { binary.BigEndian.PutUint32(buf[:4], number) @@ -341,7 +339,7 @@ func decodeSingle(keySection []byte, onValue func([]byte, int, int) error) ([]st keys []string ) - // Decode restarts + // Decode the number of restart section if len(keySection) < 4 { return nil, fmt.Errorf("key section too short, size: %d", len(keySection)) } diff --git a/triedb/pathdb/history_trienode_test.go b/triedb/pathdb/history_trienode_test.go index b02c0d5380a6..d6b80f61f56c 100644 --- a/triedb/pathdb/history_trienode_test.go +++ b/triedb/pathdb/history_trienode_test.go @@ -43,11 +43,9 @@ func randomTrienodes(n int) (map[common.Hash]map[string][]byte, common.Hash) { nodes[owner] = make(map[string][]byte) for j := 0; j < 10; j++ { - pathLen := rand.Intn(10) - path := testrand.Bytes(pathLen) + path := testrand.Bytes(rand.Intn(10)) for z := 0; z < len(path); z++ { - valLen := rand.Intn(128) - nodes[owner][string(path[:z])] = testrand.Bytes(valLen) + nodes[owner][string(path[:z])] = testrand.Bytes(rand.Intn(128)) } } // zero-size trie node, representing it was non-existent before @@ -65,7 +63,7 @@ func randomTrienodes(n int) (map[common.Hash]map[string][]byte, common.Hash) { return nodes, root } -func makeTrinodeHistory() *trienodeHistory { +func makeTrienodeHistory() *trienodeHistory { nodes, root := randomTrienodes(10) return newTrienodeHistory(root, common.Hash{}, 1, nodes) } @@ -86,7 +84,7 @@ func makeTrienodeHistories(n int) []*trienodeHistory { func TestEncodeDecodeTrienodeHistory(t *testing.T) { var ( dec trienodeHistory - obj = makeTrinodeHistory() + obj = makeTrienodeHistory() ) header, keySection, valueSection, err := obj.encode() if err != nil { @@ -108,6 +106,21 @@ func TestEncodeDecodeTrienodeHistory(t *testing.T) { if !compareMapSet(dec.nodes, obj.nodes) { t.Fatal("trienode content is mismatched") } + + // Re-encode again, ensuring the encoded blob still match + header2, keySection2, valueSection2, err := dec.encode() + if err != nil { + t.Fatalf("Failed to encode trienode history: %v", err) + } + if !bytes.Equal(header, header2) { + t.Fatal("re-encoded header is mismatched") + } + if !bytes.Equal(keySection, keySection2) { + t.Fatal("re-encoded key section is mismatched") + } + if !bytes.Equal(valueSection, valueSection2) { + t.Fatal("re-encoded value section is mismatched") + } } func TestTrienodeHistoryReader(t *testing.T) { @@ -280,7 +293,7 @@ func TestNilNodeValues(t *testing.T) { // TestCorruptedHeader tests error handling for corrupted header data func TestCorruptedHeader(t *testing.T) { - h := makeTrinodeHistory() + h := makeTrienodeHistory() header, keySection, valueSection, _ := h.encode() // Test corrupted version @@ -311,7 +324,7 @@ func TestCorruptedHeader(t *testing.T) { // TestCorruptedKeySection tests error handling for corrupted key section data func TestCorruptedKeySection(t *testing.T) { - h := makeTrinodeHistory() + h := makeTrienodeHistory() header, keySection, valueSection, _ := h.encode() // Test empty key section when header indicates data @@ -345,7 +358,7 @@ func TestCorruptedKeySection(t *testing.T) { // TestCorruptedValueSection tests error handling for corrupted value section data func TestCorruptedValueSection(t *testing.T) { - h := makeTrinodeHistory() + h := makeTrienodeHistory() header, keySection, valueSection, _ := h.encode() // Test truncated value section @@ -368,7 +381,7 @@ func TestCorruptedValueSection(t *testing.T) { // TestInvalidOffsets tests error handling for invalid offsets in encoded data func TestInvalidOffsets(t *testing.T) { - h := makeTrinodeHistory() + h := makeTrienodeHistory() header, keySection, valueSection, _ := h.encode() // Corrupt key offset in header (make it larger than key section) @@ -395,7 +408,7 @@ func TestInvalidOffsets(t *testing.T) { // TestTrienodeHistoryReaderNonExistentPath tests reading non-existent paths func TestTrienodeHistoryReaderNonExistentPath(t *testing.T) { var ( - h = makeTrinodeHistory() + h = makeTrienodeHistory() freezer, _ = rawdb.NewTrienodeFreezer(t.TempDir(), false, false) ) defer freezer.Close() @@ -523,7 +536,7 @@ func TestTrienodeHistoryReaderNilKey(t *testing.T) { // TestTrienodeHistoryReaderIterator tests the iterator functionality func TestTrienodeHistoryReaderIterator(t *testing.T) { - h := makeTrinodeHistory() + h := makeTrienodeHistory() // Count expected entries expectedCount := 0 @@ -614,7 +627,7 @@ func TestSharedLen(t *testing.T) { // TestDecodeHeaderCorruptedData tests decodeHeader with corrupted data func TestDecodeHeaderCorruptedData(t *testing.T) { // Create valid header data first - h := makeTrinodeHistory() + h := makeTrienodeHistory() header, _, _, _ := h.encode() // Test with empty header @@ -661,7 +674,7 @@ func TestDecodeHeaderCorruptedData(t *testing.T) { // TestDecodeSingleCorruptedData tests decodeSingle with corrupted data func TestDecodeSingleCorruptedData(t *testing.T) { - h := makeTrinodeHistory() + h := makeTrienodeHistory() _, keySection, _, _ := h.encode() // Test with empty key section