@@ -6,11 +6,13 @@ import (
66 "errors"
77 "fmt"
88 "io/fs"
9+ "math"
910 "os"
1011 "path/filepath"
1112 "strings"
1213 "time"
1314
15+ cmtBlockstore "github.com/cometbft/cometbft/store"
1416 badgerDB "github.com/dgraph-io/badger/v4"
1517 "github.com/spf13/cobra"
1618
@@ -20,6 +22,7 @@ import (
2022 "github.com/oasisprotocol/oasis-core/go/config"
2123 "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/abci"
2224 cmtCommon "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/common"
25+ cmtConfig "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/config"
2326 cometbftBadger "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/db/badger"
2427 cmdCommon "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common"
2528 roothash "github.com/oasisprotocol/oasis-core/go/roothash/api"
@@ -70,6 +73,13 @@ WARNING: Ensure you have at least as much of a free disk as your largest databas
7073 RunE : doDBCompactions ,
7174 }
7275
76+ pruneCmd = & cobra.Command {
77+ Use : "prune-experimental" ,
78+ Args : cobra .NoArgs ,
79+ Short : "EXPERIMENTAL: trigger pruning for all consensus databases" ,
80+ RunE : doPrune ,
81+ }
82+
7383 logger = logging .GetLogger ("cmd/storage" )
7484
7585 pretty = cmdCommon .Isatty (1 )
@@ -395,7 +405,17 @@ func flattenBadgerDB(db *badgerDB.DB, logger *logging.Logger, path string) error
395405}
396406
397407func compactConsensusNodeDB (dataDir string ) error {
398- ldb , ndb , _ , err := abci .InitStateStorage (
408+ ndb , err := openConsensusStateNodeDB (dataDir )
409+ if err != nil {
410+ return fmt .Errorf ("failed to initialize ABCI storage backend: %w" , err )
411+ }
412+ defer ndb .Close ()
413+
414+ return ndb .Compact ()
415+ }
416+
417+ func openConsensusStateNodeDB (dataDir string ) (db.NodeDB , error ) {
418+ _ , ndb , _ , err := abci .InitStateStorage (
399419 & abci.ApplicationConfig {
400420 DataDir : filepath .Join (dataDir , cmtCommon .StateDir ),
401421 StorageBackend : config .GlobalConfig .Storage .Backend ,
@@ -405,16 +425,163 @@ func compactConsensusNodeDB(dataDir string) error {
405425 // ChainContext: doc.ChainContext(), TODO: Should we read this from the doc?
406426 },
407427 )
408- if err != nil {
409- return fmt .Errorf ("failed to initialize ABCI storage backend: %w" , err )
428+
429+ return ndb , err
430+ }
431+
432+ func doPrune (_ * cobra.Command , args []string ) error {
433+ if err := cmdCommon .Init (); err != nil {
434+ cmdCommon .EarlyLogAndExit (err )
435+ }
436+
437+ // TODO consider validating correct mode?
438+
439+ dataDir := cmdCommon .DataDir ()
440+ if err := pruneConsensusDBs (dataDir ); err != nil {
441+ return fmt .Errorf ("failed to prune consensus databases: %w" , err )
410442 }
411443
412- // Close the resources. Both Close and Cleanup only close NodeDB.
413- // Closing both here, to prevent resource leaks in things change in the future.
444+ return nil
445+ }
446+
447+ func pruneConsensusDBs (dataDir string ) error {
448+ if config .GlobalConfig .Consensus .Prune .Strategy == cmtConfig .PruneStrategyNone {
449+ logger .Info ("skipping consensus pruning: (strategy=%s)" , cmtConfig .PruneStrategyNone )
450+ return nil
451+ }
452+
453+ ndb , err := openConsensusStateNodeDB (dataDir )
454+ if err != nil {
455+ return fmt .Errorf ("failed to open NodeDB: %w" , err )
456+ }
414457 defer ndb .Close ()
415- defer ldb .Cleanup ()
416458
417- return ndb .Compact ()
459+ latest , ok := ndb .GetLatestVersion ()
460+ if ! ok {
461+ logger .Info ("skipping consensus pruning as state db is empty" )
462+ return nil
463+ }
464+
465+ earliest , err := pruneConsensusState (dataDir , ndb , latest )
466+ if err != nil {
467+ return fmt .Errorf ("failed to prune application state: %w" , err )
468+ }
469+
470+ if err := pruneCometDBs (dataDir , int64 (earliest )); err != nil {
471+ return fmt .Errorf ("failed to prune CometBFT managed databases: %w" , err )
472+ }
473+
474+ return nil
475+ }
476+
477+ func pruneConsensusState (dataDir string , ndb db.NodeDB , latest uint64 ) (uint64 , error ) {
478+ if latest < config .GlobalConfig .Consensus .Prune .NumKept {
479+ logger .Info ("consensus state pruning skipped: latest version is smaller than the number of versions to keep" )
480+ return latest , nil
481+ }
482+
483+ // In case of configured runtimes, we should not prune past the latest reindexed
484+ // consensus height, so that light history can be populated correctly.
485+ minReindexed , err := minReindexedHeight (dataDir )
486+ if err != nil {
487+ return 0 , fmt .Errorf ("failed to fetch minimum reindexed consensus height: %w" , err )
488+ }
489+
490+ start := ndb .GetEarliestVersion ()
491+ end := min (
492+ latest - config .GlobalConfig .Consensus .Prune .NumKept , // does not underflow due to if at the top.
493+ uint64 (minReindexed ),
494+ )
495+
496+ if end <= start {
497+ logger .Info ("consensus state already pruned" )
498+ return end , nil
499+ }
500+
501+ logger .Info ("pruning consensus state" , "start" , start , "end" , end )
502+ for i := start ; i < end ; i ++ {
503+ if err := ndb .Prune (i ); err != nil {
504+ return 0 , fmt .Errorf ("failed to prune version %d: %w" , i , err )
505+ }
506+
507+ if i % 10_000 == 0 { // TODO not sure this is even needed.
508+ if err := ndb .Sync (); err != nil {
509+ return 0 , fmt .Errorf ("failed to sync NodeDB: %w" , err )
510+ }
511+ logger .Debug ("forcing NodeDB disk sync during pruning" , "version" , i )
512+ }
513+ }
514+
515+ if err := ndb .Sync (); err != nil {
516+ return 0 , fmt .Errorf ("failed to sync NodeDB: %w" , err )
517+ }
518+
519+ return end , nil
520+ }
521+
522+ // minReindexedHeight returns the smallest consensus height reindexed by any
523+ // of the configured runtimes.
524+ //
525+ // In case of no configured runtimes it returns max int64.
526+ func minReindexedHeight (dataDir string ) (int64 , error ) {
527+ fetchLastReindexedHeight := func (runtimeID common.Namespace ) (int64 , error ) {
528+ rtDir := runtimeConfig .GetRuntimeStateDir (dataDir , runtimeID )
529+ mode := config .GlobalConfig .Mode
530+ hasLocalStorage := mode .HasLocalStorage () && ! mode .IsArchive ()
531+
532+ // TODO ideally we would not start whole light history with all background workers, but this would
533+ // require as to refactor existing code...
534+ history , err := history .New (runtimeID , rtDir , history .NewNonePrunerFactory (), hasLocalStorage )
535+ if err != nil {
536+ return 0 , fmt .Errorf ("failed to open new light history: %w" , err )
537+ }
538+ defer history .Close ()
539+
540+ h , err := history .LastConsensusHeight ()
541+ if err != nil {
542+ return 0 , fmt .Errorf ("failed to get last consensus height: %w" , err )
543+ }
544+
545+ return h , nil
546+ }
547+
548+ var minH int64 = math .MaxInt64
549+ for _ , rt := range config .GlobalConfig .Runtime .Runtimes {
550+ h , err := fetchLastReindexedHeight (rt .ID )
551+ if err != nil {
552+ return 0 , fmt .Errorf ("failed to fetch last reindexed height for %s: %w" , rt .ID , err )
553+ }
554+
555+ if h < minH {
556+ minH = h
557+ }
558+ }
559+
560+ return minH , nil
561+ }
562+
563+ func pruneCometDBs (dataDir string , height int64 ) error {
564+ // TODO: This is a hack. In fact even if we manage to get this right via
565+ // BadgerDBProvider and somehow pass correct config via context, this will
566+ // still not be intended way to use it. I believe this hack is worth it, but
567+ // we should definitely release this command as experimental first.
568+ blockstorePath := fmt .Sprintf ("%s/consensus/data/blockstore.badger.db" , dataDir )
569+ blockDB , err := cometbftBadger .New (blockstorePath , false )
570+ if err != nil {
571+ return fmt .Errorf ("failed to open blockstore: %w" , err )
572+ }
573+ blockstore := cmtBlockstore .NewBlockStore (blockDB )
574+
575+ logger .Info ("pruning consensus blockstore" , "target_height" , height )
576+ n , err := blockstore .PruneBlocks (height )
577+ if err != nil {
578+ return fmt .Errorf ("failed to prune blocks: %w" , err )
579+ }
580+ logger .Info ("consensus blockstore finished" , "pruned" , n )
581+
582+ // TODO add pruning of state.badger.db
583+
584+ return nil
418585}
419586
420587// Register registers the client sub-command and all of its children.
@@ -425,5 +592,6 @@ func Register(parentCmd *cobra.Command) {
425592 storageCmd .AddCommand (storageCheckCmd )
426593 storageCmd .AddCommand (storageRenameNsCmd )
427594 storageCmd .AddCommand (storageCompactCmd )
595+ storageCmd .AddCommand (pruneCmd )
428596 parentCmd .AddCommand (storageCmd )
429597}
0 commit comments