Skip to content

Commit 73f61c4

Browse files
Merge pull request #6311 from oasisprotocol/martin/feature/cli/compact-db-instances
go/oasis-node/cmd/storage: Add command that flattens consensus dbs
2 parents d578880 + ac4c227 commit 73f61c4

File tree

7 files changed

+233
-7
lines changed

7 files changed

+233
-7
lines changed

.changelog/6311.feature.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
go/oasis-node: Add new command for compacting consensus databases
2+
3+
A new experimental command `oasis-node storage compact-experimental`
4+
was added.
5+
6+
The command triggers manual compactions for all the consensus databases.
7+
This way node operators can forcefuly release disk space if enabling late
8+
pruning.

docs/oasis-node/cli.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,3 +331,36 @@ response:
331331
```
332332
oasis1qqncl383h8458mr9cytatygctzwsx02n4c5f8ed7
333333
```
334+
335+
## storage
336+
337+
### compact-experimental
338+
339+
Run
340+
341+
```sh
342+
oasis-node storage compact-experimental --config /path/to/config/file
343+
```
344+
345+
to trigger manual compaction of consensus database instances:
346+
347+
```sh
348+
{"caller":"storage.go:310","level":"info","module":"cmd/storage", \
349+
"msg":"Starting database compactions. This may take a while...", \
350+
"ts":"2025-10-08T09:18:22.185451554Z"}
351+
```
352+
353+
If pruning was not enabled from the start or was recently increased, then even
354+
after successful pruning, the disk usage may stay the same.
355+
356+
This is due to the LSM-tree storage design that BadgerDB uses. Concretely,
357+
deleting a key only marks it as ready to be deleted (a tombstone entry). The
358+
actual removal of the stale data happens later during the compaction.
359+
360+
During normal operation, compaction happens in the background. However, BadgerDB
361+
is intentionally lazy, trading write throughput for disk space among other
362+
things. Therefore it is expected that in case of late pruning, the disk space
363+
may stay constant or not be reclaimed for a very long time.
364+
365+
This command gives operators manual control to release disk space during
366+
maintenance periods.

go/consensus/cometbft/db/badger/badger.go

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,7 @@ func New(fn string, noSuffix bool) (dbm.DB, error) {
6363

6464
logger := baseLogger.With("path", fn)
6565

66-
opts := badger.DefaultOptions(fn) // This may benefit from LSMOnlyOptions.
67-
opts = opts.WithLogger(cmnBadger.NewLogAdapter(logger))
68-
opts = opts.WithSyncWrites(false)
69-
opts = opts.WithCompression(options.Snappy)
70-
opts = opts.WithBlockCacheSize(64 * 1024 * 1024)
71-
72-
db, err := badger.Open(opts)
66+
db, err := OpenBadger(fn, logger)
7367
if err != nil {
7468
return nil, fmt.Errorf("cometbft/db/badger: failed to open database: %w", err)
7569
}
@@ -86,6 +80,17 @@ func New(fn string, noSuffix bool) (dbm.DB, error) {
8680
return impl, nil
8781
}
8882

83+
// OpenBadger opens badgerDB instance used for constructing instance that implements
84+
// CometBFT DB interface.
85+
func OpenBadger(path string, logger *logging.Logger) (*badger.DB, error) {
86+
opts := badger.DefaultOptions(path) // This may benefit from LSMOnlyOptions.
87+
opts = opts.WithLogger(cmnBadger.NewLogAdapter(logger))
88+
opts = opts.WithSyncWrites(false)
89+
opts = opts.WithCompression(options.Snappy)
90+
opts = opts.WithBlockCacheSize(64 * 1024 * 1024)
91+
return badger.Open(opts)
92+
}
93+
8994
func (d *badgerDBImpl) Get(key []byte) ([]byte, error) {
9095
k := toDBKey(key)
9196

go/oasis-node/cmd/storage/storage.go

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,22 @@ import (
55
"context"
66
"errors"
77
"fmt"
8+
"io/fs"
89
"os"
910
"path/filepath"
11+
"strings"
1012
"time"
1113

14+
badgerDB "github.com/dgraph-io/badger/v4"
1215
"github.com/spf13/cobra"
1316

1417
"github.com/oasisprotocol/oasis-core/go/common"
1518
"github.com/oasisprotocol/oasis-core/go/common/crypto/hash"
1619
"github.com/oasisprotocol/oasis-core/go/common/logging"
1720
"github.com/oasisprotocol/oasis-core/go/config"
21+
"github.com/oasisprotocol/oasis-core/go/consensus/cometbft/abci"
22+
cmtCommon "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/common"
23+
cmtDBProvider "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/db/badger"
1824
cmdCommon "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common"
1925
roothash "github.com/oasisprotocol/oasis-core/go/roothash/api"
2026
"github.com/oasisprotocol/oasis-core/go/runtime/bundle"
@@ -53,6 +59,17 @@ var (
5359
RunE: doRenameNs,
5460
}
5561

62+
storageCompactCmd = &cobra.Command{
63+
Use: "compact-experimental",
64+
Args: cobra.NoArgs,
65+
Short: "EXPERIMENTAL: trigger compaction for all consensus databases",
66+
Long: `EXPERIMENTAL: Optimize the storage for all consensus databases by manually compacting the underlying storage engines.
67+
68+
WARNING: Ensure you have at least as much of a free disk as your largest database.
69+
`,
70+
RunE: doDBCompactions,
71+
}
72+
5673
logger = logging.GetLogger("cmd/storage")
5774

5875
pretty = cmdCommon.Isatty(1)
@@ -283,12 +300,119 @@ func doRenameNs(_ *cobra.Command, args []string) error {
283300
return nil
284301
}
285302

303+
func doDBCompactions(_ *cobra.Command, args []string) error {
304+
if err := cmdCommon.Init(); err != nil {
305+
cmdCommon.EarlyLogAndExit(err)
306+
}
307+
308+
dataDir := cmdCommon.DataDir()
309+
310+
logger.Info("Starting database compactions. This may take a while...")
311+
312+
// Compact CometBFT managed databases: block store, evidence and state (NOT application state).
313+
if err := compactCometDBs(dataDir); err != nil {
314+
return fmt.Errorf("failed to compact CometBFT managed databases: %w", err)
315+
}
316+
317+
if err := compactConsensusNodeDB(dataDir); err != nil {
318+
return fmt.Errorf("failed to compact consensus NodeDB: %w", err)
319+
}
320+
321+
return nil
322+
}
323+
324+
func compactCometDBs(dataDir string) error {
325+
paths, err := findCometDBs(dataDir)
326+
if err != nil {
327+
return fmt.Errorf("failed to find database instances: %w", err)
328+
}
329+
for _, path := range paths {
330+
if err := compactCometDB(path); err != nil {
331+
return fmt.Errorf("failed to compact %s: %w", path, err)
332+
}
333+
}
334+
return nil
335+
}
336+
337+
func compactCometDB(path string) error {
338+
logger := logger.With("path", path)
339+
db, err := cmtDBProvider.OpenBadger(path, logger)
340+
if err != nil {
341+
return fmt.Errorf("failed to open BadgerDB: %w", err)
342+
}
343+
344+
if err := flattenBadgerDB(db, logger); err != nil {
345+
return fmt.Errorf("failed to compact %s: %w", path, err)
346+
}
347+
348+
return nil
349+
}
350+
351+
func findCometDBs(dataDir string) ([]string, error) {
352+
dir := fmt.Sprintf("%s/consensus/data", dataDir)
353+
354+
var dbDirs []string
355+
err := filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
356+
if err != nil {
357+
return err
358+
}
359+
if d.IsDir() && strings.HasSuffix(d.Name(), ".db") {
360+
dbDirs = append(dbDirs, path)
361+
}
362+
return nil
363+
})
364+
if err != nil {
365+
return nil, fmt.Errorf("failed to walk dir %s: %w", dataDir, err)
366+
}
367+
368+
if len(dbDirs) == 0 {
369+
return nil, fmt.Errorf("zero database instances found")
370+
}
371+
372+
return dbDirs, nil
373+
}
374+
375+
func flattenBadgerDB(db *badgerDB.DB, logger *logging.Logger) error {
376+
logger.Info("compacting")
377+
378+
if err := db.Flatten(1); err != nil {
379+
return fmt.Errorf("failed to flatten db: %w", err)
380+
}
381+
382+
logger.Info("compaction completed")
383+
384+
return nil
385+
}
386+
387+
func compactConsensusNodeDB(dataDir string) error {
388+
ldb, ndb, _, err := abci.InitStateStorage(
389+
&abci.ApplicationConfig{
390+
DataDir: filepath.Join(dataDir, cmtCommon.StateDir),
391+
StorageBackend: config.GlobalConfig.Storage.Backend,
392+
MemoryOnlyStorage: false,
393+
ReadOnlyStorage: false,
394+
DisableCheckpointer: true,
395+
},
396+
)
397+
if err != nil {
398+
return fmt.Errorf("failed to initialize ABCI storage backend: %w", err)
399+
}
400+
401+
// Close the resources. Both Close and Cleanup only close NodeDB.
402+
// Closing both here, to prevent resource leaks if things change in the future.
403+
defer ndb.Close()
404+
defer ldb.Cleanup()
405+
406+
return ndb.Compact()
407+
}
408+
286409
// Register registers the client sub-command and all of its children.
287410
func Register(parentCmd *cobra.Command) {
288411
storageMigrateCmd.Flags().AddFlagSet(bundle.Flags)
289412
storageCheckCmd.Flags().AddFlagSet(bundle.Flags)
290413
storageCmd.AddCommand(storageMigrateCmd)
291414
storageCmd.AddCommand(storageCheckCmd)
292415
storageCmd.AddCommand(storageRenameNsCmd)
416+
storageCmd.AddCommand(storageCompactCmd)
293417
parentCmd.AddCommand(storageCmd)
294418
}

go/storage/mkvs/db/api/api.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,12 @@ type NodeDB interface {
184184
// Only the earliest version can be pruned, passing any other version will result in an error.
185185
Prune(version uint64) error
186186

187+
// Compact triggers compaction of the NodeDB underlying storage engine.
188+
//
189+
// Warning: Depending on the NodeDB implementation this may be only safe to call when no
190+
// writes are happening.
191+
Compact() error
192+
187193
// Size returns the size of the database in bytes.
188194
Size() (int64, error)
189195

@@ -294,6 +300,10 @@ func (d *nopNodeDB) Prune(uint64) error {
294300
return nil
295301
}
296302

303+
func (d *nopNodeDB) Compact() error {
304+
return nil
305+
}
306+
297307
func (d *nopNodeDB) Size() (int64, error) {
298308
return 0, nil
299309
}

go/storage/mkvs/db/badger/badger.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,17 @@ func New(cfg *api.Config) (api.NodeDB, error) {
101101
db.gc = cmnBadger.NewGCWorker(db.logger, db.db)
102102
db.gc.Start()
103103

104+
// Setting a discard timestamp of the BadgerDB is not persistent and is currently
105+
// only done during the prune operation.
106+
//
107+
// Imagine a scenario where during the previous boot of the BadgerDB, data was successfully pruned,
108+
// but not yet compacted. Then the NodeDB is restarted, only this time with pruning disabled.
109+
// Unless setting discard timestamp to the earliest version manually, the data stored for the
110+
// already pruned versions may never be compacted, resulting in redundant disk usage.
111+
if discardTs := versionToTs(db.GetEarliestVersion()) - 1; discardTs > tsMetadata {
112+
db.db.SetDiscardTs(discardTs)
113+
}
114+
104115
return db, nil
105116
}
106117

@@ -915,6 +926,18 @@ func (d *badgerNodeDB) NewBatch(oldRoot node.Root, version uint64, chunk bool) (
915926
}, nil
916927
}
917928

929+
func (d *badgerNodeDB) Compact() error {
930+
d.logger.Info("compacting")
931+
932+
if err := d.db.Flatten(1); err != nil {
933+
return fmt.Errorf("failed to flatten db: %w", err)
934+
}
935+
936+
d.logger.Info("compaction completed")
937+
938+
return nil
939+
}
940+
918941
func (d *badgerNodeDB) Size() (int64, error) {
919942
lsm, vlog := d.db.Size()
920943
return lsm + vlog, nil

go/storage/mkvs/db/pathbadger/pathbadger.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,17 @@ func New(cfg *api.Config) (api.NodeDB, error) {
5858
db.gc = cmnBadger.NewGCWorker(db.logger, db.db)
5959
db.gc.Start()
6060

61+
// Setting a discard timestamp of the BadgerDB is not persistent and is currently
62+
// only done during the prune operation.
63+
//
64+
// Imagine a scenario where during the previous boot of the BadgerDB, data was successfully pruned,
65+
// but not yet compacted. Then the NodeDB is restarted, only this time with pruning disabled.
66+
// Unless setting discard timestamp to the earliest version manually, the data stored for the
67+
// already pruned versions may never be compacted, resulting in redundant disk usage.
68+
if discardTs := versionToTs(db.GetEarliestVersion()) - 1; discardTs > tsMetadata {
69+
db.db.SetDiscardTs(discardTs)
70+
}
71+
6172
return db, nil
6273
}
6374

@@ -726,6 +737,18 @@ func (d *badgerNodeDB) NewBatch(oldRoot node.Root, version uint64, chunk bool) (
726737
}, nil
727738
}
728739

740+
func (d *badgerNodeDB) Compact() error {
741+
d.logger.Info("compacting")
742+
743+
if err := d.db.Flatten(1); err != nil {
744+
return fmt.Errorf("failed to flatten db: %w", err)
745+
}
746+
747+
d.logger.Info("compaction completed")
748+
749+
return nil
750+
}
751+
729752
// Implements api.NodeDB.
730753
func (d *badgerNodeDB) Size() (int64, error) {
731754
lsm, vlog := d.db.Size()

0 commit comments

Comments
 (0)