diff --git a/src/java/org/apache/cassandra/db/compaction/unified/Controller.java b/src/java/org/apache/cassandra/db/compaction/unified/Controller.java
index b83b4b88a9f..0c5a2ad2a03 100644
--- a/src/java/org/apache/cassandra/db/compaction/unified/Controller.java
+++ b/src/java/org/apache/cassandra/db/compaction/unified/Controller.java
@@ -17,6 +17,7 @@
package org.apache.cassandra.db.compaction.unified;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
@@ -321,6 +322,15 @@ public abstract class Controller
static final String MAX_SSTABLES_PER_SHARD_FACTOR_OPTION = "max_sstables_per_shard_factor";
static final double DEFAULT_MAX_SSTABLES_PER_SHARD_FACTOR = UCS_MAX_SSTABLES_PER_SHARD_FACTOR.getDoubleWithLegacyFallback();
+ /**
+ * Whether to use factorization-based shard count growth for smoother progression when base_shard_count is not power of 2.
+ * When enabled (default: true), instead of using power-of-two jumps like 1→2→8→1000, the system will
+ * use prime factorization to create smooth sequences like 1→5→25→125→250→500→1000 for num_shards=1000.
+ * This prevents the large jumps that were involved in the data loss incident caused by HCD-130
+ *
+ */
+ static final boolean USE_FACTORIZATION_SHARD_COUNT_GROWTH = Boolean.parseBoolean(System.getProperty("use_factorization_shard_count_growth", "true"));
+
protected final MonotonicClock clock;
protected final Environment env;
protected final double[] survivalFactors;
@@ -336,6 +346,8 @@ public abstract class Controller
protected final TableMetadata metadata;
protected final int baseShardCount;
+ private final Optional factorizedShardSequence;
+
private final boolean isReplicaAware;
protected final long targetSSTableSize;
@@ -410,6 +422,8 @@ public abstract class Controller
"Set it to 'true' to enable aggressive SSTable expiration.");
}
this.ignoreOverlapsInExpirationCheck = ALLOW_UNSAFE_AGGRESSIVE_SSTABLE_EXPIRATION && ignoreOverlapsInExpirationCheck;
+
+ this.factorizedShardSequence = useFactorizationShardCountGrowth() ? Optional.of(factorizedSmoothShardSequence(baseShardCount)) : Optional.empty();
}
public static File getControllerConfigPath(TableMetadata metadata)
@@ -527,10 +541,18 @@ public int getNumShards(double localDensity)
// Note: the minimum size cannot be larger than the target size's minimum.
if (!(count >= baseShardCount)) // also true for count == NaN
{
- // Make it a power of two, rounding down so that sstables are greater in size than the min.
- // Setting the bottom bit to 1 ensures the result is at least 1.
- // If baseShardCount is not a power of 2, split only to powers of two that are divisors of baseShardCount so boundaries match higher levels
- shards = Math.min(Integer.highestOneBit((int) count | 1), baseShardCount & -baseShardCount);
+ // Use factorization-based growth for smoother progression if it's not power of 2
+ if (factorizedShardSequence.isPresent())
+ {
+ shards = getLargestFactorizedShardCount(count);
+ }
+ else
+ {
+ // Make it a power of two, rounding down so that sstables are greater in size than the min.
+ // Setting the bottom bit to 1 ensures the result is at least 1.
+ // If baseShardCount is not a power of 2, split only to powers of two that are divisors of baseShardCount so boundaries match higher levels
+ shards = Math.min(Integer.highestOneBit((int) count | 1), baseShardCount & -baseShardCount);
+ }
if (logger.isDebugEnabled())
logger.debug("Shard count {} for density {}, {} times min size {}",
shards,
@@ -1569,6 +1591,108 @@ public List prioritize(List 0 && Integer.bitCount(baseShardCount) != 1;
+ }
+
+ /**
+ * Compute a smooth shard count sequence based on prime factorization and find the largest shard count that is not larger
+ * than current shard count based on density or first shard if current count is NaN or negative
+ *
+ * For num_shards=1000 (5^3 * 2^3), returns the appropriate shard count
+ * based on current density to achieve smooth growth: 1, 5, 25, 125, 250, 500, 1000
+ *
+ * @param currentCountBasedOnDensity the current count based on density
+ * @return the largest shard count for the current density
+ */
+ @VisibleForTesting
+ int getLargestFactorizedShardCount(double currentCountBasedOnDensity)
+ {
+ int[] sequence = factorizedShardSequence.get();
+ if (Double.isNaN(currentCountBasedOnDensity))
+ return sequence[0];
+
+ int searchKey = (int) Math.floor(currentCountBasedOnDensity);
+ int idx = Arrays.binarySearch(sequence, searchKey);
+ // exact match
+ if (idx >= 0)
+ return sequence[idx];
+
+ // insertion point
+ int insertionPoint = -idx - 1;
+ // we need the value before insertion point or 0 if insertion point is 0
+ int candidateIndex = Math.max(0, insertionPoint - 1);
+ return sequence[candidateIndex];
+ }
+
+ /**
+ * Generate a factorized shard sequence for smooth shard count growth.
+ * Uses prime factorization with largest factors first to create a cumulative sequence.
+ *
+ * For example: 1000 (5³×2³) → [1, 5, 25, 125, 250, 500, 1000]
+ * This provides much smoother growth than power-of-2 jumps.
+ */
+ @VisibleForTesting
+ static int[] factorizedSmoothShardSequence(int target)
+ {
+ if (target <= 0) throw new IllegalArgumentException("target must be positive");
+ if (target == 1) return new int[]{ 1 };
+
+ // 1) Factorize targeShards into list of prime factors in ascending order
+ List primesAscending = primeFactors(target);
+
+ // 2) Cumulative product to form the chain by using largest prime first.
+ int[] divisors = new int[primesAscending.size() + 1];
+ int cur = 1;
+ divisors[0] = cur;
+ for (int i = 0; i < primesAscending.size(); i++)
+ {
+ cur *= primesAscending.get(primesAscending.size() - 1 - i);
+ divisors[i + 1] = cur;
+ }
+ return divisors;
+ }
+
+ /**
+ * Prime factorization for shard count (usually small) to produce a list of prime factors in ascending order
+ * For example: 1000 -> [2, 2, 2, 5, 5, 5]
+ */
+ @VisibleForTesting
+ static List primeFactors(int num)
+ {
+ if (num <= 1)
+ throw new IllegalArgumentException("num must be greater than 1, got: " + num);
+
+ List result = new ArrayList<>();
+
+ // Factor out 2 using more readable modulo check
+ while (num % 2 == 0)
+ {
+ result.add(2);
+ num /= 2;
+ }
+
+ for (int factor = 3; (long) factor * factor <= num; factor += 2)
+ {
+ while (num % factor == 0)
+ {
+ result.add(factor);
+ num /= factor;
+ }
+ }
+
+ // If num is still > 1, then it's a prime factor
+ if (num > 1)
+ result.add(num);
+
+ return result;
+ }
+
static final class Metrics
{
private final MetricNameFactory factory;
diff --git a/test/unit/org/apache/cassandra/db/compaction/unified/ControllerTest.java b/test/unit/org/apache/cassandra/db/compaction/unified/ControllerTest.java
index f6bcc32cb92..28c47f23316 100644
--- a/test/unit/org/apache/cassandra/db/compaction/unified/ControllerTest.java
+++ b/test/unit/org/apache/cassandra/db/compaction/unified/ControllerTest.java
@@ -18,6 +18,7 @@
import java.util.Arrays;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
@@ -52,6 +53,7 @@
import static org.apache.cassandra.config.CassandraRelevantProperties.UCS_OVERRIDE_UCS_CONFIG_FOR_VECTOR_TABLES;
import static org.apache.cassandra.SchemaLoader.standardCFMD;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
@@ -712,4 +714,207 @@ void testValidateCompactionStrategyOptions(boolean testLogType)
Map uncheckedOptions = CompactionStrategyOptions.validateOptions(options);
assertNotNull(uncheckedOptions);
}
-}
\ No newline at end of file
+
+ @Test
+ public void testFactorizedShardGrowth()
+ {
+ // Test factorization-based growth for num_shards=1000 (5^3 * 2^3)
+ Map options = new HashMap<>();
+ options.put(Controller.NUM_SHARDS_OPTION, "1000");
+ options.put(Controller.MIN_SSTABLE_SIZE_OPTION, "1GiB");
+ mockFlushSize(100);
+ Controller controller = Controller.fromOptions(cfs, options);
+
+ // Verify shard count is set
+ assertEquals(1.0, controller.sstableGrowthModifier, 0.0);
+ assertTrue(controller.useFactorizationShardCountGrowth());
+
+ // Test the smooth progression sequence: 1, 5, 25, 125, 250, 500, 1000
+ assertEquals(1, controller.getNumShards(0)); // 0 GiB → 1 shard
+ assertEquals(1, controller.getNumShards(Math.scalb(1.5, 30))); // 1.5 GiB → 1 shard
+ assertEquals(5, controller.getNumShards(Math.scalb(6.0, 30))); // 6 GiB → 5 shards
+ assertEquals(25, controller.getNumShards(Math.scalb(30.0, 30))); // 30 GiB → 25 shards
+ assertEquals(125, controller.getNumShards(Math.scalb(130.0, 30))); // 130 GiB → 125 shards
+ assertEquals(250, controller.getNumShards(Math.scalb(300.0, 30))); // 300 GiB → 250 shards
+ assertEquals(500, controller.getNumShards(Math.scalb(600.0, 30))); // 600 GiB → 500 shards
+ assertEquals(1000, controller.getNumShards(Math.scalb(1000.0, 30))); // 1000 GiB → 1000 shards
+
+ // Test boundary cases
+ assertEquals(1, controller.getNumShards(Math.scalb(4.9, 30))); // Just below 5
+ assertEquals(5, controller.getNumShards(Math.scalb(5.0, 30))); // Exactly 5
+ assertEquals(5, controller.getNumShards(Math.scalb(24.9, 30))); // Just below 25
+ assertEquals(25, controller.getNumShards(Math.scalb(25.0, 30))); // Exactly 25
+
+ // Test very large values
+ assertEquals(1000, controller.getNumShards(Double.POSITIVE_INFINITY));
+ }
+
+ @Test
+ public void testFactorizedShardGrowthPowerOfTwo()
+ {
+ // Test with a power of 2 (should behave similarly to old logic)
+ Map options = new HashMap<>();
+ options.put(Controller.NUM_SHARDS_OPTION, "1024"); // 2^10
+ options.put(Controller.MIN_SSTABLE_SIZE_OPTION, "1GiB");
+ mockFlushSize(100);
+ Controller controller = Controller.fromOptions(cfs, options);
+
+ // Should give smooth power-of-2 progression: 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024
+ assertEquals(1, controller.getNumShards(Math.scalb(1.5, 30)));
+ assertEquals(2, controller.getNumShards(Math.scalb(2.5, 30)));
+ assertEquals(4, controller.getNumShards(Math.scalb(5.0, 30)));
+ assertEquals(8, controller.getNumShards(Math.scalb(10.0, 30)));
+ assertEquals(16, controller.getNumShards(Math.scalb(20.0, 30)));
+ assertEquals(32, controller.getNumShards(Math.scalb(40.0, 30)));
+ assertEquals(64, controller.getNumShards(Math.scalb(80.0, 30)));
+ assertEquals(128, controller.getNumShards(Math.scalb(150.0, 30)));
+ assertEquals(256, controller.getNumShards(Math.scalb(300.0, 30)));
+ assertEquals(512, controller.getNumShards(Math.scalb(600.0, 30)));
+ assertEquals(1024, controller.getNumShards(Math.scalb(1024.0, 30)));
+ }
+
+ @Test
+ public void testNoFactorizedShardGrowthWithPowerOfTwo()
+ {
+ // Test no factorization-based growth for num_shards=128
+ Map options = new HashMap<>();
+ options.put(Controller.NUM_SHARDS_OPTION, "128");
+ options.put(Controller.MIN_SSTABLE_SIZE_OPTION, "1GiB");
+ Controller controller = Controller.fromOptions(cfs, options);
+
+ assertFalse(controller.useFactorizationShardCountGrowth());
+ }
+
+ @Test
+ public void testFactorizedShardGrowthPrimeTarget()
+ {
+ // Test with a prime number (should stay at 1 until reaching the target)
+ Map options = new HashMap<>();
+ options.put(Controller.NUM_SHARDS_OPTION, "17"); // Prime number
+ options.put(Controller.MIN_SSTABLE_SIZE_OPTION, "1GiB");
+ mockFlushSize(100);
+ Controller controller = Controller.fromOptions(cfs, options);
+
+ // Since 17 is prime, sequence should be just: 1, 17
+ assertEquals(1, controller.getNumShards(Double.NaN));
+ assertEquals(1, controller.getNumShards(Math.scalb(1.5, 30)));
+ assertEquals(1, controller.getNumShards(Math.scalb(5.0, 30)));
+ assertEquals(1, controller.getNumShards(Math.scalb(10.0, 30)));
+ assertEquals(1, controller.getNumShards(Math.scalb(16.9, 30)));
+ assertEquals(17, controller.getNumShards(Math.scalb(17.0, 30)));
+ assertEquals(17, controller.getNumShards(Math.scalb(100.0, 30)));
+ }
+
+ @Test
+ public void testFactorizedShardSequence()
+ {
+ // Test small numbers
+ assertArrayEquals(new int[]{ 1 }, Controller.factorizedSmoothShardSequence(1));
+ assertArrayEquals(new int[]{ 1, 2 }, Controller.factorizedSmoothShardSequence(2));
+ assertArrayEquals(new int[]{ 1, 3 }, Controller.factorizedSmoothShardSequence(3));
+ assertArrayEquals(new int[]{ 1, 2, 4 }, Controller.factorizedSmoothShardSequence(4));
+
+ // Test perfect squares
+ assertArrayEquals(new int[]{ 1, 3, 9 }, Controller.factorizedSmoothShardSequence(9));
+ assertArrayEquals(new int[]{ 1, 2, 4, 8, 16 }, Controller.factorizedSmoothShardSequence(16));
+
+ // Test primes
+ assertArrayEquals(new int[]{ 1, 7 }, Controller.factorizedSmoothShardSequence(7));
+ assertArrayEquals(new int[]{ 1, 11 }, Controller.factorizedSmoothShardSequence(11));
+
+ // Test composite numbers
+ assertArrayEquals(new int[]{ 1, 3, 6, 12 }, Controller.factorizedSmoothShardSequence(12));
+
+ // Test 1000 = 5^3 * 2^3
+ int[] expected1000 = new int[]{ 1, 5, 25, 125, 250, 500, 1000 };
+ assertArrayEquals(expected1000, Controller.factorizedSmoothShardSequence(1000));
+
+ // Test 3200 = 5^2 * 2^7
+ int[] expected3200 = new int[]{ 1, 5, 25, 50, 100, 200, 400, 800, 1600, 3200 };
+ assertArrayEquals(expected3200, Controller.factorizedSmoothShardSequence(3200));
+
+ // Test Max Int
+ assertArrayEquals(new int[]{ 1, Integer.MAX_VALUE }, Controller.factorizedSmoothShardSequence(Integer.MAX_VALUE));
+ }
+
+ @Test
+ public void testFactorizedShardSequenceInputValidation()
+ {
+ assertThatThrownBy(() -> Controller.factorizedSmoothShardSequence(0)).hasMessageContaining("must be positive");
+ assertThatThrownBy(() -> Controller.factorizedSmoothShardSequence(-5)).hasMessageContaining("must be positive");
+ }
+
+ @Test
+ public void testPrimeFactors()
+ {
+ // Test small numbers
+ assertEquals(Arrays.asList(2), Controller.primeFactors(2));
+ assertEquals(Arrays.asList(3), Controller.primeFactors(3));
+ assertEquals(Arrays.asList(2, 2), Controller.primeFactors(4));
+
+ // Test larger numbers
+ assertEquals(Arrays.asList(2, 2, 2, 5, 5, 5), Controller.primeFactors(1000));
+ assertEquals(Arrays.asList(2, 2, 2, 2, 2, 2, 2, 5, 5), Controller.primeFactors(3200));
+
+ // Test primes
+ assertEquals(Arrays.asList(7), Controller.primeFactors(7));
+ assertEquals(Arrays.asList(97), Controller.primeFactors(97));
+ assertEquals(Arrays.asList(Integer.MAX_VALUE), Controller.primeFactors(Integer.MAX_VALUE));
+ }
+
+ @Test
+ public void testPrimeFactorsInputValidation()
+ {
+ assertThatThrownBy(() -> Controller.primeFactors(0)).hasMessageContaining("greater than 1");
+ assertThatThrownBy(() -> Controller.primeFactors(1)).hasMessageContaining("greater than 1");
+ assertThatThrownBy(() -> Controller.primeFactors(-10)).hasMessageContaining("greater than 1");
+ }
+
+ @Test
+ public void testGetLargestFactorizedShardCount()
+ {
+ // Test with num_shards=1000 to get sequence: [1, 5, 25, 125, 250, 500, 1000]
+ Map options = new HashMap<>();
+ options.put(Controller.NUM_SHARDS_OPTION, "1000");
+ options.put(Controller.MIN_SSTABLE_SIZE_OPTION, "1GiB");
+ mockFlushSize(100);
+ Controller controller = Controller.fromOptions(cfs, options);
+
+ // Test exact matches
+ assertEquals(1, controller.getLargestFactorizedShardCount(1.0));
+ assertEquals(5, controller.getLargestFactorizedShardCount(5.0));
+ assertEquals(25, controller.getLargestFactorizedShardCount(25.0));
+ assertEquals(125, controller.getLargestFactorizedShardCount(125.0));
+ assertEquals(250, controller.getLargestFactorizedShardCount(250.0));
+ assertEquals(500, controller.getLargestFactorizedShardCount(500.0));
+ assertEquals(1000, controller.getLargestFactorizedShardCount(1000.0));
+
+ // Test values between sequence elements (should return largest ≤ input)
+ assertEquals(1, controller.getLargestFactorizedShardCount(1.5));
+ assertEquals(1, controller.getLargestFactorizedShardCount(4.9));
+ assertEquals(5, controller.getLargestFactorizedShardCount(5.1));
+ assertEquals(5, controller.getLargestFactorizedShardCount(24.9));
+ assertEquals(25, controller.getLargestFactorizedShardCount(25.1));
+ assertEquals(25, controller.getLargestFactorizedShardCount(124.9));
+ assertEquals(125, controller.getLargestFactorizedShardCount(125.1));
+ assertEquals(125, controller.getLargestFactorizedShardCount(249.9));
+ assertEquals(250, controller.getLargestFactorizedShardCount(250.1));
+ assertEquals(250, controller.getLargestFactorizedShardCount(499.9));
+ assertEquals(500, controller.getLargestFactorizedShardCount(500.1));
+ assertEquals(500, controller.getLargestFactorizedShardCount(999.9));
+
+ // Test edge cases
+ assertEquals(1, controller.getLargestFactorizedShardCount(0.0));
+ assertEquals(1, controller.getLargestFactorizedShardCount(0.5));
+ assertEquals(1000, controller.getLargestFactorizedShardCount(1000.1));
+ assertEquals(1000, controller.getLargestFactorizedShardCount(2000.0));
+ assertEquals(1000, controller.getLargestFactorizedShardCount(Double.POSITIVE_INFINITY));
+
+ // Test NaN
+ assertEquals(1, controller.getLargestFactorizedShardCount(Double.NaN));
+
+ // Test negative values (should return first element)
+ assertEquals(1, controller.getLargestFactorizedShardCount(-1.0));
+ assertEquals(1, controller.getLargestFactorizedShardCount(-100.0));
+ }
+}