Skip to content

Commit 9a21657

Browse files
jasonstackmichaelsembwever
authored andcommitted
CNDB-15581: CDNB-15253: make UCS shard progression smoother when num_shards and min sstable size are configured (#2031)
riptano/cndb#15253 large shard count jump was involved in HCD-130 Replace power-of-two shard progression with factorization-based smooth growth when base shard count is not power of 2. This prevents problematic large jumps. For example, with num_shards 1000 which is 5^3* 2^3: - Before: 1 → 2 → 8 → 1000 (125x jump causes data loss due to hcd-130) - After: 1 → 5 → 25 → 125 → 250 → 500 → 1000 (max 5x jump) * new behavior can be disabled via `-Dunified_compaction.use_factorization_shard_count_growth`
1 parent bda4e88 commit 9a21657

File tree

2 files changed

+334
-5
lines changed

2 files changed

+334
-5
lines changed

src/java/org/apache/cassandra/db/compaction/unified/Controller.java

Lines changed: 128 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package org.apache.cassandra.db.compaction.unified;
1818

1919
import java.io.IOException;
20+
import java.util.ArrayList;
2021
import java.util.Arrays;
2122
import java.util.Collections;
2223
import java.util.HashMap;
@@ -321,6 +322,15 @@ public abstract class Controller
321322
static final String MAX_SSTABLES_PER_SHARD_FACTOR_OPTION = "max_sstables_per_shard_factor";
322323
static final double DEFAULT_MAX_SSTABLES_PER_SHARD_FACTOR = UCS_MAX_SSTABLES_PER_SHARD_FACTOR.getDoubleWithLegacyFallback();
323324

325+
/**
326+
* Whether to use factorization-based shard count growth for smoother progression when base_shard_count is not power of 2.
327+
* When enabled (default: true), instead of using power-of-two jumps like 1→2→8→1000, the system will
328+
* use prime factorization to create smooth sequences like 1→5→25→125→250→500→1000 for num_shards=1000.
329+
* This prevents the large jumps that were involved in the data loss incident caused by HCD-130
330+
* <p>
331+
*/
332+
static final boolean USE_FACTORIZATION_SHARD_COUNT_GROWTH = Boolean.parseBoolean(System.getProperty("use_factorization_shard_count_growth", "true"));
333+
324334
protected final MonotonicClock clock;
325335
protected final Environment env;
326336
protected final double[] survivalFactors;
@@ -336,6 +346,8 @@ public abstract class Controller
336346
protected final TableMetadata metadata;
337347

338348
protected final int baseShardCount;
349+
private final Optional<int[]> factorizedShardSequence;
350+
339351
private final boolean isReplicaAware;
340352

341353
protected final long targetSSTableSize;
@@ -410,6 +422,8 @@ public abstract class Controller
410422
"Set it to 'true' to enable aggressive SSTable expiration.");
411423
}
412424
this.ignoreOverlapsInExpirationCheck = ALLOW_UNSAFE_AGGRESSIVE_SSTABLE_EXPIRATION && ignoreOverlapsInExpirationCheck;
425+
426+
this.factorizedShardSequence = useFactorizationShardCountGrowth() ? Optional.of(factorizedSmoothShardSequence(baseShardCount)) : Optional.empty();
413427
}
414428

415429
public static File getControllerConfigPath(TableMetadata metadata)
@@ -527,10 +541,18 @@ public int getNumShards(double localDensity)
527541
// Note: the minimum size cannot be larger than the target size's minimum.
528542
if (!(count >= baseShardCount)) // also true for count == NaN
529543
{
530-
// Make it a power of two, rounding down so that sstables are greater in size than the min.
531-
// Setting the bottom bit to 1 ensures the result is at least 1.
532-
// If baseShardCount is not a power of 2, split only to powers of two that are divisors of baseShardCount so boundaries match higher levels
533-
shards = Math.min(Integer.highestOneBit((int) count | 1), baseShardCount & -baseShardCount);
544+
// Use factorization-based growth for smoother progression if it's not power of 2
545+
if (factorizedShardSequence.isPresent())
546+
{
547+
shards = getLargestFactorizedShardCount(count);
548+
}
549+
else
550+
{
551+
// Make it a power of two, rounding down so that sstables are greater in size than the min.
552+
// Setting the bottom bit to 1 ensures the result is at least 1.
553+
// If baseShardCount is not a power of 2, split only to powers of two that are divisors of baseShardCount so boundaries match higher levels
554+
shards = Math.min(Integer.highestOneBit((int) count | 1), baseShardCount & -baseShardCount);
555+
}
534556
if (logger.isDebugEnabled())
535557
logger.debug("Shard count {} for density {}, {} times min size {}",
536558
shards,
@@ -1569,6 +1591,108 @@ public List<CompactionAggregate.UnifiedAggregate> prioritize(List<CompactionAggr
15691591
return aggregates;
15701592
}
15711593

1594+
/**
1595+
* Check if factorization-based growth is enabled and base shard count is not power of 2.
1596+
*/
1597+
@VisibleForTesting
1598+
boolean useFactorizationShardCountGrowth()
1599+
{
1600+
return USE_FACTORIZATION_SHARD_COUNT_GROWTH && baseShardCount > 0 && Integer.bitCount(baseShardCount) != 1;
1601+
}
1602+
1603+
/**
1604+
* Compute a smooth shard count sequence based on prime factorization and find the largest shard count that is not larger
1605+
* than current shard count based on density or first shard if current count is NaN or negative
1606+
*
1607+
* For num_shards=1000 (5^3 * 2^3), returns the appropriate shard count
1608+
* based on current density to achieve smooth growth: 1, 5, 25, 125, 250, 500, 1000
1609+
*
1610+
* @param currentCountBasedOnDensity the current count based on density
1611+
* @return the largest shard count for the current density
1612+
*/
1613+
@VisibleForTesting
1614+
int getLargestFactorizedShardCount(double currentCountBasedOnDensity)
1615+
{
1616+
int[] sequence = factorizedShardSequence.get();
1617+
if (Double.isNaN(currentCountBasedOnDensity))
1618+
return sequence[0];
1619+
1620+
int searchKey = (int) Math.floor(currentCountBasedOnDensity);
1621+
int idx = Arrays.binarySearch(sequence, searchKey);
1622+
// exact match
1623+
if (idx >= 0)
1624+
return sequence[idx];
1625+
1626+
// insertion point
1627+
int insertionPoint = -idx - 1;
1628+
// we need the value before insertion point or 0 if insertion point is 0
1629+
int candidateIndex = Math.max(0, insertionPoint - 1);
1630+
return sequence[candidateIndex];
1631+
}
1632+
1633+
/**
1634+
* Generate a factorized shard sequence for smooth shard count growth.
1635+
* Uses prime factorization with largest factors first to create a cumulative sequence.
1636+
*
1637+
* For example: 1000 (5³×2³) → [1, 5, 25, 125, 250, 500, 1000]
1638+
* This provides much smoother growth than power-of-2 jumps.
1639+
*/
1640+
@VisibleForTesting
1641+
static int[] factorizedSmoothShardSequence(int target)
1642+
{
1643+
if (target <= 0) throw new IllegalArgumentException("target must be positive");
1644+
if (target == 1) return new int[]{ 1 };
1645+
1646+
// 1) Factorize targeShards into list of prime factors in ascending order
1647+
List<Integer> primesAscending = primeFactors(target);
1648+
1649+
// 2) Cumulative product to form the chain by using largest prime first.
1650+
int[] divisors = new int[primesAscending.size() + 1];
1651+
int cur = 1;
1652+
divisors[0] = cur;
1653+
for (int i = 0; i < primesAscending.size(); i++)
1654+
{
1655+
cur *= primesAscending.get(primesAscending.size() - 1 - i);
1656+
divisors[i + 1] = cur;
1657+
}
1658+
return divisors;
1659+
}
1660+
1661+
/**
1662+
* Prime factorization for shard count (usually small) to produce a list of prime factors in ascending order
1663+
* For example: 1000 -> [2, 2, 2, 5, 5, 5]
1664+
*/
1665+
@VisibleForTesting
1666+
static List<Integer> primeFactors(int num)
1667+
{
1668+
if (num <= 1)
1669+
throw new IllegalArgumentException("num must be greater than 1, got: " + num);
1670+
1671+
List<Integer> result = new ArrayList<>();
1672+
1673+
// Factor out 2 using more readable modulo check
1674+
while (num % 2 == 0)
1675+
{
1676+
result.add(2);
1677+
num /= 2;
1678+
}
1679+
1680+
for (int factor = 3; (long) factor * factor <= num; factor += 2)
1681+
{
1682+
while (num % factor == 0)
1683+
{
1684+
result.add(factor);
1685+
num /= factor;
1686+
}
1687+
}
1688+
1689+
// If num is still > 1, then it's a prime factor
1690+
if (num > 1)
1691+
result.add(num);
1692+
1693+
return result;
1694+
}
1695+
15721696
static final class Metrics
15731697
{
15741698
private final MetricNameFactory factory;

test/unit/org/apache/cassandra/db/compaction/unified/ControllerTest.java

Lines changed: 206 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import java.util.Arrays;
2020
import java.util.HashMap;
21+
import java.util.List;
2122
import java.util.Map;
2223
import java.util.concurrent.ScheduledExecutorService;
2324
import java.util.concurrent.ScheduledFuture;
@@ -52,6 +53,7 @@
5253

5354
import static org.apache.cassandra.config.CassandraRelevantProperties.UCS_OVERRIDE_UCS_CONFIG_FOR_VECTOR_TABLES;
5455
import static org.apache.cassandra.SchemaLoader.standardCFMD;
56+
import static org.assertj.core.api.Assertions.assertThatThrownBy;
5557
import static org.junit.Assert.assertArrayEquals;
5658
import static org.junit.Assert.assertEquals;
5759
import static org.junit.Assert.assertFalse;
@@ -712,4 +714,207 @@ void testValidateCompactionStrategyOptions(boolean testLogType)
712714
Map<String, String> uncheckedOptions = CompactionStrategyOptions.validateOptions(options);
713715
assertNotNull(uncheckedOptions);
714716
}
715-
}
717+
718+
@Test
719+
public void testFactorizedShardGrowth()
720+
{
721+
// Test factorization-based growth for num_shards=1000 (5^3 * 2^3)
722+
Map<String, String> options = new HashMap<>();
723+
options.put(Controller.NUM_SHARDS_OPTION, "1000");
724+
options.put(Controller.MIN_SSTABLE_SIZE_OPTION, "1GiB");
725+
mockFlushSize(100);
726+
Controller controller = Controller.fromOptions(cfs, options);
727+
728+
// Verify shard count is set
729+
assertEquals(1.0, controller.sstableGrowthModifier, 0.0);
730+
assertTrue(controller.useFactorizationShardCountGrowth());
731+
732+
// Test the smooth progression sequence: 1, 5, 25, 125, 250, 500, 1000
733+
assertEquals(1, controller.getNumShards(0)); // 0 GiB → 1 shard
734+
assertEquals(1, controller.getNumShards(Math.scalb(1.5, 30))); // 1.5 GiB → 1 shard
735+
assertEquals(5, controller.getNumShards(Math.scalb(6.0, 30))); // 6 GiB → 5 shards
736+
assertEquals(25, controller.getNumShards(Math.scalb(30.0, 30))); // 30 GiB → 25 shards
737+
assertEquals(125, controller.getNumShards(Math.scalb(130.0, 30))); // 130 GiB → 125 shards
738+
assertEquals(250, controller.getNumShards(Math.scalb(300.0, 30))); // 300 GiB → 250 shards
739+
assertEquals(500, controller.getNumShards(Math.scalb(600.0, 30))); // 600 GiB → 500 shards
740+
assertEquals(1000, controller.getNumShards(Math.scalb(1000.0, 30))); // 1000 GiB → 1000 shards
741+
742+
// Test boundary cases
743+
assertEquals(1, controller.getNumShards(Math.scalb(4.9, 30))); // Just below 5
744+
assertEquals(5, controller.getNumShards(Math.scalb(5.0, 30))); // Exactly 5
745+
assertEquals(5, controller.getNumShards(Math.scalb(24.9, 30))); // Just below 25
746+
assertEquals(25, controller.getNumShards(Math.scalb(25.0, 30))); // Exactly 25
747+
748+
// Test very large values
749+
assertEquals(1000, controller.getNumShards(Double.POSITIVE_INFINITY));
750+
}
751+
752+
@Test
753+
public void testFactorizedShardGrowthPowerOfTwo()
754+
{
755+
// Test with a power of 2 (should behave similarly to old logic)
756+
Map<String, String> options = new HashMap<>();
757+
options.put(Controller.NUM_SHARDS_OPTION, "1024"); // 2^10
758+
options.put(Controller.MIN_SSTABLE_SIZE_OPTION, "1GiB");
759+
mockFlushSize(100);
760+
Controller controller = Controller.fromOptions(cfs, options);
761+
762+
// Should give smooth power-of-2 progression: 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024
763+
assertEquals(1, controller.getNumShards(Math.scalb(1.5, 30)));
764+
assertEquals(2, controller.getNumShards(Math.scalb(2.5, 30)));
765+
assertEquals(4, controller.getNumShards(Math.scalb(5.0, 30)));
766+
assertEquals(8, controller.getNumShards(Math.scalb(10.0, 30)));
767+
assertEquals(16, controller.getNumShards(Math.scalb(20.0, 30)));
768+
assertEquals(32, controller.getNumShards(Math.scalb(40.0, 30)));
769+
assertEquals(64, controller.getNumShards(Math.scalb(80.0, 30)));
770+
assertEquals(128, controller.getNumShards(Math.scalb(150.0, 30)));
771+
assertEquals(256, controller.getNumShards(Math.scalb(300.0, 30)));
772+
assertEquals(512, controller.getNumShards(Math.scalb(600.0, 30)));
773+
assertEquals(1024, controller.getNumShards(Math.scalb(1024.0, 30)));
774+
}
775+
776+
@Test
777+
public void testNoFactorizedShardGrowthWithPowerOfTwo()
778+
{
779+
// Test no factorization-based growth for num_shards=128
780+
Map<String, String> options = new HashMap<>();
781+
options.put(Controller.NUM_SHARDS_OPTION, "128");
782+
options.put(Controller.MIN_SSTABLE_SIZE_OPTION, "1GiB");
783+
Controller controller = Controller.fromOptions(cfs, options);
784+
785+
assertFalse(controller.useFactorizationShardCountGrowth());
786+
}
787+
788+
@Test
789+
public void testFactorizedShardGrowthPrimeTarget()
790+
{
791+
// Test with a prime number (should stay at 1 until reaching the target)
792+
Map<String, String> options = new HashMap<>();
793+
options.put(Controller.NUM_SHARDS_OPTION, "17"); // Prime number
794+
options.put(Controller.MIN_SSTABLE_SIZE_OPTION, "1GiB");
795+
mockFlushSize(100);
796+
Controller controller = Controller.fromOptions(cfs, options);
797+
798+
// Since 17 is prime, sequence should be just: 1, 17
799+
assertEquals(1, controller.getNumShards(Double.NaN));
800+
assertEquals(1, controller.getNumShards(Math.scalb(1.5, 30)));
801+
assertEquals(1, controller.getNumShards(Math.scalb(5.0, 30)));
802+
assertEquals(1, controller.getNumShards(Math.scalb(10.0, 30)));
803+
assertEquals(1, controller.getNumShards(Math.scalb(16.9, 30)));
804+
assertEquals(17, controller.getNumShards(Math.scalb(17.0, 30)));
805+
assertEquals(17, controller.getNumShards(Math.scalb(100.0, 30)));
806+
}
807+
808+
@Test
809+
public void testFactorizedShardSequence()
810+
{
811+
// Test small numbers
812+
assertArrayEquals(new int[]{ 1 }, Controller.factorizedSmoothShardSequence(1));
813+
assertArrayEquals(new int[]{ 1, 2 }, Controller.factorizedSmoothShardSequence(2));
814+
assertArrayEquals(new int[]{ 1, 3 }, Controller.factorizedSmoothShardSequence(3));
815+
assertArrayEquals(new int[]{ 1, 2, 4 }, Controller.factorizedSmoothShardSequence(4));
816+
817+
// Test perfect squares
818+
assertArrayEquals(new int[]{ 1, 3, 9 }, Controller.factorizedSmoothShardSequence(9));
819+
assertArrayEquals(new int[]{ 1, 2, 4, 8, 16 }, Controller.factorizedSmoothShardSequence(16));
820+
821+
// Test primes
822+
assertArrayEquals(new int[]{ 1, 7 }, Controller.factorizedSmoothShardSequence(7));
823+
assertArrayEquals(new int[]{ 1, 11 }, Controller.factorizedSmoothShardSequence(11));
824+
825+
// Test composite numbers
826+
assertArrayEquals(new int[]{ 1, 3, 6, 12 }, Controller.factorizedSmoothShardSequence(12));
827+
828+
// Test 1000 = 5^3 * 2^3
829+
int[] expected1000 = new int[]{ 1, 5, 25, 125, 250, 500, 1000 };
830+
assertArrayEquals(expected1000, Controller.factorizedSmoothShardSequence(1000));
831+
832+
// Test 3200 = 5^2 * 2^7
833+
int[] expected3200 = new int[]{ 1, 5, 25, 50, 100, 200, 400, 800, 1600, 3200 };
834+
assertArrayEquals(expected3200, Controller.factorizedSmoothShardSequence(3200));
835+
836+
// Test Max Int
837+
assertArrayEquals(new int[]{ 1, Integer.MAX_VALUE }, Controller.factorizedSmoothShardSequence(Integer.MAX_VALUE));
838+
}
839+
840+
@Test
841+
public void testFactorizedShardSequenceInputValidation()
842+
{
843+
assertThatThrownBy(() -> Controller.factorizedSmoothShardSequence(0)).hasMessageContaining("must be positive");
844+
assertThatThrownBy(() -> Controller.factorizedSmoothShardSequence(-5)).hasMessageContaining("must be positive");
845+
}
846+
847+
@Test
848+
public void testPrimeFactors()
849+
{
850+
// Test small numbers
851+
assertEquals(Arrays.asList(2), Controller.primeFactors(2));
852+
assertEquals(Arrays.asList(3), Controller.primeFactors(3));
853+
assertEquals(Arrays.asList(2, 2), Controller.primeFactors(4));
854+
855+
// Test larger numbers
856+
assertEquals(Arrays.asList(2, 2, 2, 5, 5, 5), Controller.primeFactors(1000));
857+
assertEquals(Arrays.asList(2, 2, 2, 2, 2, 2, 2, 5, 5), Controller.primeFactors(3200));
858+
859+
// Test primes
860+
assertEquals(Arrays.asList(7), Controller.primeFactors(7));
861+
assertEquals(Arrays.asList(97), Controller.primeFactors(97));
862+
assertEquals(Arrays.asList(Integer.MAX_VALUE), Controller.primeFactors(Integer.MAX_VALUE));
863+
}
864+
865+
@Test
866+
public void testPrimeFactorsInputValidation()
867+
{
868+
assertThatThrownBy(() -> Controller.primeFactors(0)).hasMessageContaining("greater than 1");
869+
assertThatThrownBy(() -> Controller.primeFactors(1)).hasMessageContaining("greater than 1");
870+
assertThatThrownBy(() -> Controller.primeFactors(-10)).hasMessageContaining("greater than 1");
871+
}
872+
873+
@Test
874+
public void testGetLargestFactorizedShardCount()
875+
{
876+
// Test with num_shards=1000 to get sequence: [1, 5, 25, 125, 250, 500, 1000]
877+
Map<String, String> options = new HashMap<>();
878+
options.put(Controller.NUM_SHARDS_OPTION, "1000");
879+
options.put(Controller.MIN_SSTABLE_SIZE_OPTION, "1GiB");
880+
mockFlushSize(100);
881+
Controller controller = Controller.fromOptions(cfs, options);
882+
883+
// Test exact matches
884+
assertEquals(1, controller.getLargestFactorizedShardCount(1.0));
885+
assertEquals(5, controller.getLargestFactorizedShardCount(5.0));
886+
assertEquals(25, controller.getLargestFactorizedShardCount(25.0));
887+
assertEquals(125, controller.getLargestFactorizedShardCount(125.0));
888+
assertEquals(250, controller.getLargestFactorizedShardCount(250.0));
889+
assertEquals(500, controller.getLargestFactorizedShardCount(500.0));
890+
assertEquals(1000, controller.getLargestFactorizedShardCount(1000.0));
891+
892+
// Test values between sequence elements (should return largest ≤ input)
893+
assertEquals(1, controller.getLargestFactorizedShardCount(1.5));
894+
assertEquals(1, controller.getLargestFactorizedShardCount(4.9));
895+
assertEquals(5, controller.getLargestFactorizedShardCount(5.1));
896+
assertEquals(5, controller.getLargestFactorizedShardCount(24.9));
897+
assertEquals(25, controller.getLargestFactorizedShardCount(25.1));
898+
assertEquals(25, controller.getLargestFactorizedShardCount(124.9));
899+
assertEquals(125, controller.getLargestFactorizedShardCount(125.1));
900+
assertEquals(125, controller.getLargestFactorizedShardCount(249.9));
901+
assertEquals(250, controller.getLargestFactorizedShardCount(250.1));
902+
assertEquals(250, controller.getLargestFactorizedShardCount(499.9));
903+
assertEquals(500, controller.getLargestFactorizedShardCount(500.1));
904+
assertEquals(500, controller.getLargestFactorizedShardCount(999.9));
905+
906+
// Test edge cases
907+
assertEquals(1, controller.getLargestFactorizedShardCount(0.0));
908+
assertEquals(1, controller.getLargestFactorizedShardCount(0.5));
909+
assertEquals(1000, controller.getLargestFactorizedShardCount(1000.1));
910+
assertEquals(1000, controller.getLargestFactorizedShardCount(2000.0));
911+
assertEquals(1000, controller.getLargestFactorizedShardCount(Double.POSITIVE_INFINITY));
912+
913+
// Test NaN
914+
assertEquals(1, controller.getLargestFactorizedShardCount(Double.NaN));
915+
916+
// Test negative values (should return first element)
917+
assertEquals(1, controller.getLargestFactorizedShardCount(-1.0));
918+
assertEquals(1, controller.getLargestFactorizedShardCount(-100.0));
919+
}
920+
}

0 commit comments

Comments
 (0)