diff --git a/triedb/pathdb/layertree.go b/triedb/pathdb/layertree.go index ec45257db56c..221fdda844d5 100644 --- a/triedb/pathdb/layertree.go +++ b/triedb/pathdb/layertree.go @@ -338,3 +338,21 @@ func (tree *layerTree) lookupStorage(accountHash common.Hash, slotHash common.Ha } return l, nil } + +// lookupNode returns the layer that is guaranteed to contain the trie node +// data corresponding to the specified state root being queried. +func (tree *layerTree) lookupNode(accountHash common.Hash, path string, state common.Hash) (layer, error) { + // Hold the read lock to prevent the unexpected layer changes + tree.lock.RLock() + defer tree.lock.RUnlock() + + tip := tree.lookup.nodeTip(accountHash, path, state, tree.base.root) + if tip == (common.Hash{}) { + return nil, fmt.Errorf("[%#x] %w", state, errSnapshotStale) + } + l := tree.layers[tip] + if l == nil { + return nil, fmt.Errorf("triedb layer [%#x] missing", tip) + } + return l, nil +} diff --git a/triedb/pathdb/lookup.go b/triedb/pathdb/lookup.go index 8b092730f8fc..73dca9710094 100644 --- a/triedb/pathdb/lookup.go +++ b/triedb/pathdb/lookup.go @@ -22,9 +22,14 @@ import ( "time" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/trie/trienode" "golang.org/x/sync/errgroup" ) +// trienodeShardCount is the number of shards used for trie nodes. +const trienodeShardCount = 16 + // storageKey returns a key for uniquely identifying the storage slot. func storageKey(accountHash common.Hash, slotHash common.Hash) [64]byte { var key [64]byte @@ -33,6 +38,23 @@ func storageKey(accountHash common.Hash, slotHash common.Hash) [64]byte { return key } +// trienodeKey uses a fixed-size byte array instead of string to avoid string allocations. +type trienodeKey [96]byte // 32 bytes for hash + up to 64 bytes for path + +// makeTrienodeKey returns a key for uniquely identifying the trie node. +func makeTrienodeKey(accountHash common.Hash, path string) trienodeKey { + var key trienodeKey + copy(key[:32], accountHash[:]) + copy(key[32:], path) + return key +} + +// shardTask used to batch task by shard to minimize lock contention +type shardTask struct { + accountHash common.Hash + path string +} + // lookup is an internal structure used to efficiently determine the layer in // which a state entry resides. type lookup struct { @@ -48,11 +70,34 @@ type lookup struct { // where the slot was modified, with the order from oldest to newest. storages map[[64]byte][]common.Hash + // accountNodes represents the mutation history for specific account + // trie nodes, distributed across 16 shards for efficiency. + // The key is the trie path of the node, and the value is a slice + // of **diff layer** IDs indicating where the account was modified, + // with the order from oldest to newest. + accountNodes [trienodeShardCount]map[string][]common.Hash + + // storageNodes represents the mutation history for specific storage + // slot trie nodes, distributed across 16 shards for efficiency. + // The key is the account address hash and the trie path of the node, + // the value is a slice of **diff layer** IDs indicating where the + // slot was modified, with the order from oldest to newest. + storageNodes [trienodeShardCount]map[trienodeKey][]common.Hash + // descendant is the callback indicating whether the layer with // given root is a descendant of the one specified by `ancestor`. descendant func(state common.Hash, ancestor common.Hash) bool } +// getNodeShardIndex returns the shard index for a given path +func getNodeShardIndex(path string) int { + if len(path) == 0 { + return 0 + } + // use the first char of the path to determine the shard index + return int(path[0]) % trienodeShardCount +} + // newLookup initializes the lookup structure. func newLookup(head layer, descendant func(state common.Hash, ancestor common.Hash) bool) *lookup { var ( @@ -68,6 +113,12 @@ func newLookup(head layer, descendant func(state common.Hash, ancestor common.Ha storages: make(map[[64]byte][]common.Hash), descendant: descendant, } + // Initialize all 16 storage node shards + for i := 0; i < trienodeShardCount; i++ { + l.storageNodes[i] = make(map[trienodeKey][]common.Hash) + l.accountNodes[i] = make(map[string][]common.Hash) + } + // Apply the diff layers from bottom to top for i := len(layers) - 1; i >= 0; i-- { switch diff := layers[i].(type) { @@ -161,6 +212,45 @@ func (l *lookup) storageTip(accountHash common.Hash, slotHash common.Hash, state return common.Hash{} } +// nodeTip traverses the layer list associated with the given account and path +// in reverse order to locate the first entry that either matches +// the specified stateID or is a descendant of it. +// +// If found, the trie node data corresponding to the supplied stateID resides +// in that layer. Otherwise, two scenarios are possible: +// +// (a) the trie node remains unmodified from the current disk layer up to +// the state layer specified by the stateID: fallback to the disk layer for +// data retrieval, (b) or the layer specified by the stateID is stale: reject +// the data retrieval. +func (l *lookup) nodeTip(accountHash common.Hash, path string, stateID common.Hash, base common.Hash) common.Hash { + var list []common.Hash + if accountHash == (common.Hash{}) { + shardIndex := getNodeShardIndex(path) + list = l.accountNodes[shardIndex][path] + } else { + shardIndex := getNodeShardIndex(path) // Use only path for sharding + list = l.storageNodes[shardIndex][makeTrienodeKey(accountHash, path)] + } + for i := len(list) - 1; i >= 0; i-- { + // If the current state matches the stateID, or the requested state is a + // descendant of it, return the current state as the most recent one + // containing the modified data. Otherwise, the current state may be ahead + // of the requested one or belong to a different branch. + if list[i] == stateID || l.descendant(stateID, list[i]) { + return list[i] + } + } + // No layer matching the stateID or its descendants was found. Use the + // current disk layer as a fallback. + if base == stateID || l.descendant(stateID, base) { + return base + } + // The layer associated with 'stateID' is not the descendant of the current + // disk layer, it's already stale, return nothing. + return common.Hash{} +} + // addLayer traverses the state data retained in the specified diff layer and // integrates it into the lookup set. // @@ -170,6 +260,7 @@ func (l *lookup) storageTip(accountHash common.Hash, slotHash common.Hash, state func (l *lookup) addLayer(diff *diffLayer) { defer func(now time.Time) { lookupAddLayerTimer.UpdateSince(now) + log.Debug("PathDB lookup add layer", "id", diff.id, "block", diff.block, "elapsed", time.Since(now)) }(time.Now()) var ( @@ -204,6 +295,97 @@ func (l *lookup) addLayer(diff *diffLayer) { } } }() + + wg.Add(1) + go func() { + defer wg.Done() + l.addAccountNodes(state, diff.nodes.accountNodes) + }() + + wg.Add(1) + go func() { + defer wg.Done() + l.addStorageNodes(state, diff.nodes.storageNodes) + }() + + states := len(diff.states.accountData) + for _, slots := range diff.states.storageData { + states += len(slots) + } + lookupStateMeter.Mark(int64(states)) + + trienodes := len(diff.nodes.accountNodes) + for _, nodes := range diff.nodes.storageNodes { + trienodes += len(nodes) + } + lookupTrienodeMeter.Mark(int64(trienodes)) + + wg.Wait() +} + +func (l *lookup) addStorageNodes(state common.Hash, nodes map[common.Hash]map[string]*trienode.Node) { + defer func(start time.Time) { + lookupAddTrienodeLayerTimer.UpdateSince(start) + }(time.Now()) + + var ( + wg sync.WaitGroup + tasks = make([][]shardTask, trienodeShardCount) + ) + for accountHash, slots := range nodes { + for path := range slots { + shardIndex := getNodeShardIndex(path) + tasks[shardIndex] = append(tasks[shardIndex], shardTask{ + accountHash: accountHash, + path: path, + }) + } + } + for shardIdx := 0; shardIdx < trienodeShardCount; shardIdx++ { + taskList := tasks[shardIdx] + if len(taskList) == 0 { + continue + } + wg.Add(1) + go func() { + defer wg.Done() + shard := l.storageNodes[shardIdx] + for _, task := range taskList { + key := makeTrienodeKey(task.accountHash, task.path) + shard[key] = append(shard[key], state) + } + }() + } + wg.Wait() +} + +func (l *lookup) addAccountNodes(state common.Hash, nodes map[string]*trienode.Node) { + defer func(start time.Time) { + lookupAddTrienodeLayerTimer.UpdateSince(start) + }(time.Now()) + + var ( + wg sync.WaitGroup + tasks = make([][]string, trienodeShardCount) + ) + for path := range nodes { + shardIndex := getNodeShardIndex(path) + tasks[shardIndex] = append(tasks[shardIndex], path) + } + for shardIdx := 0; shardIdx < trienodeShardCount; shardIdx++ { + taskList := tasks[shardIdx] + if len(taskList) == 0 { + continue + } + wg.Add(1) + go func() { + defer wg.Done() + shard := l.accountNodes[shardIdx] + for _, path := range taskList { + shard[path] = append(shard[path], state) + } + }() + } wg.Wait() } @@ -236,6 +418,7 @@ func removeFromList(list []common.Hash, element common.Hash) (bool, []common.Has func (l *lookup) removeLayer(diff *diffLayer) error { defer func(now time.Time) { lookupRemoveLayerTimer.UpdateSince(now) + log.Debug("PathDB lookup remove layer", "id", diff.id, "block", diff.block, "elapsed", time.Since(now)) }(time.Now()) var ( @@ -274,5 +457,93 @@ func (l *lookup) removeLayer(diff *diffLayer) error { } return nil }) + + eg.Go(func() error { + return l.removeAccountNodes(state, diff.nodes.accountNodes) + }) + + eg.Go(func() error { + return l.removeStorageNodes(state, diff.nodes.storageNodes) + }) + return eg.Wait() +} + +func (l *lookup) removeStorageNodes(state common.Hash, nodes map[common.Hash]map[string]*trienode.Node) error { + defer func(start time.Time) { + lookupRemoveTrienodeLayerTimer.UpdateSince(start) + }(time.Now()) + + var ( + eg errgroup.Group + tasks = make([][]shardTask, trienodeShardCount) + ) + for accountHash, slots := range nodes { + for path := range slots { + shardIndex := getNodeShardIndex(path) + tasks[shardIndex] = append(tasks[shardIndex], shardTask{ + accountHash: accountHash, + path: path, + }) + } + } + for shardIdx := 0; shardIdx < trienodeShardCount; shardIdx++ { + taskList := tasks[shardIdx] + if len(taskList) == 0 { + continue + } + eg.Go(func() error { + shard := l.storageNodes[shardIdx] + for _, task := range taskList { + key := makeTrienodeKey(task.accountHash, task.path) + found, list := removeFromList(shard[key], state) + if !found { + return fmt.Errorf("storage lookup is not found, key: %x, state: %x", key, state) + } + if len(list) != 0 { + shard[key] = list + } else { + delete(shard, key) + } + } + return nil + }) + } + return eg.Wait() +} + +func (l *lookup) removeAccountNodes(state common.Hash, nodes map[string]*trienode.Node) error { + defer func(start time.Time) { + lookupRemoveTrienodeLayerTimer.UpdateSince(start) + }(time.Now()) + + var ( + eg errgroup.Group + tasks = make([][]string, trienodeShardCount) + ) + for path := range nodes { + shardIndex := getNodeShardIndex(path) + tasks[shardIndex] = append(tasks[shardIndex], path) + } + for shardIdx := 0; shardIdx < trienodeShardCount; shardIdx++ { + taskList := tasks[shardIdx] + if len(taskList) == 0 { + continue + } + eg.Go(func() error { + shard := l.accountNodes[shardIdx] + for _, path := range taskList { + found, list := removeFromList(shard[path], state) + if !found { + return fmt.Errorf("account lookup is not found, %x, state: %x", path, state) + } + if len(list) != 0 { + shard[path] = list + } else { + delete(shard, path) + } + } + return nil + }) + } return eg.Wait() } diff --git a/triedb/pathdb/lookup_test.go b/triedb/pathdb/lookup_test.go new file mode 100644 index 000000000000..5c3e77938df7 --- /dev/null +++ b/triedb/pathdb/lookup_test.go @@ -0,0 +1,117 @@ +// Copyright 2025 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "crypto/rand" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/trie/trienode" +) + +// generateRandomAccountNodes creates a map of random trie nodes +func generateRandomAccountNodes(count int) map[string]*trienode.Node { + nodes := make(map[string]*trienode.Node, count) + + for i := 0; i < count; i++ { + path := make([]byte, 32) + rand.Read(path) + + blob := make([]byte, 64) + rand.Read(blob) + + var hash common.Hash + rand.Read(hash[:]) + + nodes[common.Bytes2Hex(path)] = &trienode.Node{Hash: hash, Blob: blob} + } + + return nodes +} + +// generateRandomStorageNodes creates a map of storage nodes organized by account +func generateRandomStorageNodes(accountCount, nodesPerAccount int) map[common.Hash]map[string]*trienode.Node { + storageNodes := make(map[common.Hash]map[string]*trienode.Node, accountCount) + + for i := 0; i < accountCount; i++ { + var hash common.Hash + rand.Read(hash[:]) + + storageNodes[hash] = generateRandomAccountNodes(nodesPerAccount) + } + + return storageNodes +} + +func BenchmarkAddNodes(b *testing.B) { + tests := []struct { + name string + accountNodeCount int + nodesPerAccount int + }{ + { + name: "Small-100-accounts-10-nodes", + accountNodeCount: 100, + nodesPerAccount: 10, + }, + { + name: "Medium-500-accounts-20-nodes", + accountNodeCount: 500, + nodesPerAccount: 20, + }, + { + name: "Large-2000-accounts-40-nodes", + accountNodeCount: 2000, + nodesPerAccount: 40, + }, + { + name: "XLarge-5000-accounts-50-nodes", + accountNodeCount: 5000, + nodesPerAccount: 50, + }, + } + + for _, tc := range tests { + b.Run(tc.name, func(b *testing.B) { + storageNodes := generateRandomStorageNodes(tc.accountNodeCount, tc.nodesPerAccount) + + lookup := &lookup{} + + // Initialize all 16 storage node shards + for i := 0; i < trienodeShardCount; i++ { + lookup.storageNodes[i] = make(map[trienodeKey][]common.Hash) + lookup.accountNodes[i] = make(map[string][]common.Hash) + } + + var state common.Hash + rand.Read(state[:]) + + b.ResetTimer() + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + // Reset the lookup instance for each benchmark iteration + for j := 0; j < trienodeShardCount; j++ { + lookup.storageNodes[j] = make(map[trienodeKey][]common.Hash) + } + + lookup.addStorageNodes(state, storageNodes) + } + }) + } +} diff --git a/triedb/pathdb/metrics.go b/triedb/pathdb/metrics.go index 31c40053fc26..98381832ec82 100644 --- a/triedb/pathdb/metrics.go +++ b/triedb/pathdb/metrics.go @@ -85,8 +85,12 @@ var ( trienodeIndexHistoryTimer = metrics.NewRegisteredResettingTimer("pathdb/history/trienode/index/time", nil) trienodeUnindexHistoryTimer = metrics.NewRegisteredResettingTimer("pathdb/history/trienode/unindex/time", nil) - lookupAddLayerTimer = metrics.NewRegisteredResettingTimer("pathdb/lookup/add/time", nil) - lookupRemoveLayerTimer = metrics.NewRegisteredResettingTimer("pathdb/lookup/remove/time", nil) + lookupAddLayerTimer = metrics.NewRegisteredResettingTimer("pathdb/lookup/add/time", nil) + lookupRemoveLayerTimer = metrics.NewRegisteredResettingTimer("pathdb/lookup/remove/time", nil) + lookupAddTrienodeLayerTimer = metrics.NewRegisteredResettingTimer("pathdb/lookup/trienode/add/time", nil) + lookupRemoveTrienodeLayerTimer = metrics.NewRegisteredResettingTimer("pathdb/lookup/trienode/remove/time", nil) + lookupStateMeter = metrics.NewRegisteredMeter("pathdb/lookup/state/count", nil) + lookupTrienodeMeter = metrics.NewRegisteredMeter("pathdb/lookup/trienode/count", nil) historicalAccountReadTimer = metrics.NewRegisteredResettingTimer("pathdb/history/account/reads", nil) historicalStorageReadTimer = metrics.NewRegisteredResettingTimer("pathdb/history/storage/reads", nil) diff --git a/triedb/pathdb/reader.go b/triedb/pathdb/reader.go index 842ac0972e38..db52844ee674 100644 --- a/triedb/pathdb/reader.go +++ b/triedb/pathdb/reader.go @@ -64,7 +64,14 @@ type reader struct { // node info. Don't modify the returned byte slice since it's not deep-copied // and still be referenced by database. func (r *reader) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) { - blob, got, loc, err := r.layer.node(owner, path, 0) + l, err := r.db.tree.lookupNode(owner, string(path), r.state) + if err != nil { + return nil, err + } + blob, got, loc, err := l.node(owner, path, 0) + if errors.Is(err, errSnapshotStale) { + blob, got, loc, err = r.layer.node(owner, path, 0) + } if err != nil { return nil, err }