diff --git a/src/java/org/apache/cassandra/index/sai/StorageAttachedIndex.java b/src/java/org/apache/cassandra/index/sai/StorageAttachedIndex.java index aa19f21dbab..2c271362d34 100644 --- a/src/java/org/apache/cassandra/index/sai/StorageAttachedIndex.java +++ b/src/java/org/apache/cassandra/index/sai/StorageAttachedIndex.java @@ -354,6 +354,11 @@ else if (analyzerFactory.isNGram()) } else if (type.isVector()) { + if (!Version.current(metadata.keyspace).onOrAfter(Version.JVECTOR_EARLIEST)) + { + throw new InvalidRequestException(vectorUnsupportedByVersionError(Version.current(metadata.keyspace))); + } + if (type.valueLengthIfFixed() == 4 && config.getSimilarityFunction() == VectorSimilarityFunction.COSINE) throw new InvalidRequestException("Cosine similarity is not supported for single-dimension vectors"); @@ -450,12 +455,7 @@ private Future startInitialBuild(ColumnFamilyStore baseCfs, boolean validate, if (indexContext.isVector() && indexContext.version().compareTo(Version.JVECTOR_EARLIEST) < 0) { - throw new FeatureNeedsIndexRebuildException(String.format("The current configured on-disk format version %s does not support vector indexes. " + - "The minimum version that supports vectors is %s. " + - "The on-disk format version can be set via the -D%s system property.", - indexContext.version(), - Version.JVECTOR_EARLIEST, - CassandraRelevantProperties.SAI_CURRENT_VERSION.name())); + throw new FeatureNeedsIndexRebuildException(vectorUnsupportedByVersionError(indexContext.version())); } // stop in-progress compaction tasks to prevent compacted sstables not being indexed. @@ -500,6 +500,17 @@ private Future startInitialBuild(ColumnFamilyStore baseCfs, boolean validate, return Futures.allAsList(futures); } + @VisibleForTesting + public static String vectorUnsupportedByVersionError(Version currentVersion) + { + return String.format("The current configured on-disk format version %s does not support vector indexes. " + + "The minimum version that supports vectors is %s. " + + "The on-disk format version can be set via the -D%s system property.", + currentVersion, + Version.JVECTOR_EARLIEST, + CassandraRelevantProperties.SAI_CURRENT_VERSION.name()); + } + /** * Splits SSTables into groups of similar overall size. * diff --git a/test/unit/org/apache/cassandra/index/sai/SAITester.java b/test/unit/org/apache/cassandra/index/sai/SAITester.java index 045bf690532..7b9f37e3bd9 100644 --- a/test/unit/org/apache/cassandra/index/sai/SAITester.java +++ b/test/unit/org/apache/cassandra/index/sai/SAITester.java @@ -559,7 +559,12 @@ protected long getDiskUsage() protected void verifyNoIndexFiles() { - assertTrue(indexFiles().size() == 0); + verifyNoIndexFiles(KEYSPACE, currentTable()); + } + + protected void verifyNoIndexFiles(String keyspace, String table) + { + assertTrue(indexFiles(keyspace, table).isEmpty()); } // Verify every sstables is indexed correctly and the components are valid. @@ -656,7 +661,12 @@ protected boolean isBuildCompletionMarker(IndexComponentType indexComponentType) protected Set indexFiles() { - ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(currentTable()); + return indexFiles(KEYSPACE, currentTable()); + } + + protected Set indexFiles(String keyspace, String table) + { + ColumnFamilyStore cfs = Keyspace.open(keyspace).getColumnFamilyStore(table); return cfs.getDirectories().getCFDirectories() .stream() .flatMap(dir -> Arrays.stream(dir.tryList())) diff --git a/test/unit/org/apache/cassandra/index/sai/cql/FeaturesVersionSupportTest.java b/test/unit/org/apache/cassandra/index/sai/cql/FeaturesVersionSupportTest.java index 80837b791da..bebfcdc41a4 100644 --- a/test/unit/org/apache/cassandra/index/sai/cql/FeaturesVersionSupportTest.java +++ b/test/unit/org/apache/cassandra/index/sai/cql/FeaturesVersionSupportTest.java @@ -16,20 +16,20 @@ package org.apache.cassandra.index.sai.cql; -import java.util.Collection; -import java.util.stream.Collectors; - -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.exceptions.ReadFailureException; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.index.sai.SAIUtil; +import org.apache.cassandra.index.sai.StorageAttachedIndex; import org.apache.cassandra.index.sai.disk.format.Version; import org.assertj.core.api.Assertions; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.Collection; +import java.util.stream.Collectors; import static org.assertj.core.api.Assertions.assertThat; @@ -76,24 +76,25 @@ public void testANNSupport() createIndex("CREATE CUSTOM INDEX ON %s(x) USING 'StorageAttachedIndex'"); // vector index creation will be rejected in older versions - String idx = createIndexAsync("CREATE CUSTOM INDEX ON %s(v) USING 'StorageAttachedIndex'"); - waitForIndexBuilds(idx); - boolean isIndexQueryable = isIndexQueryable(keyspace(), idx); + String createIndexQuery = "CREATE CUSTOM INDEX ON %s(v) USING 'StorageAttachedIndex'"; if (version.onOrAfter(Version.JVECTOR_EARLIEST)) { - assertThat(isIndexQueryable).isTrue(); + String idx = createIndex(createIndexQuery); + assertThat(isIndexQueryable(keyspace(), idx)).isTrue(); // the index is marked as queryable, so we should be able to query it assertRows(execute("SELECT k FROM %s ORDER BY v ANN OF [2.5, 3.5, 4.5] LIMIT 3"), row(2), row(1), row(3)); assertRows(execute("SELECT k FROM %s ORDER BY v ANN OF [2.5, 3.5, 4.5] LIMIT 3 WITH ann_options = {'rerank_k':3}"), row(2), row(1), row(3)); + + dropIndex("DROP INDEX %s." + idx); } else { - assertThat(isIndexQueryable).isFalse(); + Assertions.assertThatThrownBy(() -> createIndex(createIndexQuery)) + .hasMessageContaining(StorageAttachedIndex.vectorUnsupportedByVersionError(version)); } // vector index creation will be accepted on newer versions, even if there is still another index in the older version - dropIndex("DROP INDEX %s." + idx); SAIUtil.setCurrentVersion(Version.LATEST); createIndex("CREATE CUSTOM INDEX ON %s(v) USING 'StorageAttachedIndex'"); @@ -124,24 +125,25 @@ public void testGeoDistance() createIndex("CREATE CUSTOM INDEX ON %s(x) USING 'StorageAttachedIndex'"); // vector index creation will be rejected in older versions - String idx = createIndexAsync("CREATE CUSTOM INDEX ON %s(v) USING 'StorageAttachedIndex' WITH OPTIONS = {'similarity_function' : 'euclidean'}"); - waitForIndexBuilds(idx); - boolean isIndexQueryable = isIndexQueryable(keyspace(), idx); + String createIndexQuery = "CREATE CUSTOM INDEX ON %s(v) USING 'StorageAttachedIndex' WITH OPTIONS = {'similarity_function' : 'euclidean'}"; if (version.onOrAfter(Version.JVECTOR_EARLIEST)) { - assertThat(isIndexQueryable).isTrue(); + String idx = createIndex(createIndexQuery); + assertThat(isIndexQueryable(keyspace(), idx)).isTrue(); // the index is marked as queryable, so we should be able to query it assertRowsIgnoringOrder(execute("SELECT k FROM %s WHERE GEO_DISTANCE(v, [5,5]) < 157000"), row(2), row(3)); assertRowsIgnoringOrder(execute("SELECT k FROM %s WHERE GEO_DISTANCE(v, [5,5]) < 157011"), row(1), row(2), row(3)); + + dropIndex("DROP INDEX %s." + idx); } else { - assertThat(isIndexQueryable).isFalse(); + Assertions.assertThatThrownBy(() -> createIndex(createIndexQuery)) + .hasMessageContaining(StorageAttachedIndex.vectorUnsupportedByVersionError(version)); } // vector index creation will be accepted on newer versions, even if there is still an index in the older version - dropIndex("DROP INDEX %s." + idx); SAIUtil.setCurrentVersion(Version.LATEST); createIndex("CREATE CUSTOM INDEX ON %s(v) USING 'StorageAttachedIndex' WITH OPTIONS = {'similarity_function' : 'euclidean'}"); diff --git a/test/unit/org/apache/cassandra/index/sai/cql/VersionSelectorTest.java b/test/unit/org/apache/cassandra/index/sai/cql/VersionSelectorTest.java index 2de5e402c84..077417d9882 100644 --- a/test/unit/org/apache/cassandra/index/sai/cql/VersionSelectorTest.java +++ b/test/unit/org/apache/cassandra/index/sai/cql/VersionSelectorTest.java @@ -16,22 +16,22 @@ package org.apache.cassandra.index.sai.cql; +import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.Map; import java.util.function.Consumer; -import java.util.stream.Collectors; +import java.util.function.Function; -import org.junit.Assume; +import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.index.sai.SAITester; import org.apache.cassandra.index.sai.SAIUtil; +import org.apache.cassandra.index.sai.StorageAttachedIndex; import org.apache.cassandra.index.sai.disk.format.Version; import org.assertj.core.api.Assertions; @@ -46,15 +46,37 @@ public class VersionSelectorTest extends SAITester { public static final String TABLE = "tbl"; - private static final Logger logger = LoggerFactory.getLogger(VersionSelectorTest.class); @Parameterized.Parameter public Version globalCurrentVersion; - @Parameterized.Parameters(name = "globalCurrentVersion={0}") + @Parameterized.Parameter(1) + public Operation operation; + + public enum Operation + { + CREATE, + REBUILD + } + + @Parameterized.Parameters(name = "globalCurrentVersion={0} operation={1}") public static Collection data() { - return Version.ALL.stream().map(v -> new Object[]{ v }).collect(Collectors.toList()); + Collection params = new ArrayList<>(); + for (Operation operation : Operation.values()) + { + for (Version version : Version.ALL) + { + params.add(new Object[]{ version, operation }); + } + } + return params; + } + + @Before + public void before() + { + SAIUtil.setCurrentVersion(globalCurrentVersion); } @Test @@ -62,13 +84,13 @@ public void testNumericOnSkinnyTable() { test(keyspace -> { createTable(keyspace, "CREATE TABLE %s (k int PRIMARY KEY, v int)"); - createIndex(keyspace, "CREATE CUSTOM INDEX IF NOT EXISTS ON %s(v) USING 'StorageAttachedIndex'"); execute(keyspace, "INSERT INTO %s (k, v) VALUES (0, 0)"); execute(keyspace, "INSERT INTO %s (k, v) VALUES (1, 1)"); - }).withQueries(keyspace -> { - assertRowCount(keyspace, "SELECT * FROM %s WHERE v>=0", 2); - assertRowCount(keyspace, "SELECT * FROM %s WHERE v>0", 1); - }); + }).withIndex(keyspace -> createIndex(keyspace, "CREATE CUSTOM INDEX IF NOT EXISTS ON %s(v) USING 'StorageAttachedIndex'")) + .withQueries(keyspace -> { + assertRowCount(keyspace, "SELECT * FROM %s WHERE v>=0", 2); + assertRowCount(keyspace, "SELECT * FROM %s WHERE v>0", 1); + }); } @Test @@ -76,17 +98,17 @@ public void testNumericOnWideTable() { test(keyspace -> { createTable(keyspace, "CREATE TABLE %s (k int, c int, v int, PRIMARY KEY (k, c))"); - createIndex(keyspace, "CREATE CUSTOM INDEX IF NOT EXISTS ON %s(v) USING 'StorageAttachedIndex'"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (0, 0, 0)"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (0, 1, 1)"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (1, 0, 0)"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (1, 1, 2)"); - }).withQueries(keyspace -> { - assertRowCount(keyspace, "SELECT * FROM %s WHERE v>=0", 4); - assertRowCount(keyspace, "SELECT * FROM %s WHERE v>0", 2); - assertRowCount(keyspace, "SELECT * FROM %s WHERE k = 0 AND v>=0", 2); - assertRowCount(keyspace, "SELECT * FROM %s WHERE k = 0 AND v>0", 1); - }); + }).withIndex(keyspace -> createIndex(keyspace, "CREATE CUSTOM INDEX IF NOT EXISTS ON %s(v) USING 'StorageAttachedIndex'")) + .withQueries(keyspace -> { + assertRowCount(keyspace, "SELECT * FROM %s WHERE v>=0", 4); + assertRowCount(keyspace, "SELECT * FROM %s WHERE v>0", 2); + assertRowCount(keyspace, "SELECT * FROM %s WHERE k = 0 AND v>=0", 2); + assertRowCount(keyspace, "SELECT * FROM %s WHERE k = 0 AND v>0", 1); + }); } @Test @@ -94,15 +116,15 @@ public void testLiteralOnSkinnyTable() { test(keyspace -> { createTable(keyspace, "CREATE TABLE %s (k int PRIMARY KEY, v text)"); - createIndex(keyspace, "CREATE CUSTOM INDEX IF NOT EXISTS ON %s(v) USING 'StorageAttachedIndex'"); execute(keyspace, "INSERT INTO %s (k, v) VALUES (0, '0')"); execute(keyspace, "INSERT INTO %s (k, v) VALUES (1, '0')"); execute(keyspace, "INSERT INTO %s (k, v) VALUES (2, '1')"); - }).withQueries(keyspace -> { - assertRowCount(keyspace, "SELECT * FROM %s WHERE v='0'", 2); - assertRowCount(keyspace, "SELECT * FROM %s WHERE v='1'", 1); - assertRowCount(keyspace, "SELECT * FROM %s WHERE v='2'", 0); - }); + }).withIndex(keyspace -> createIndex(keyspace, "CREATE CUSTOM INDEX IF NOT EXISTS ON %s(v) USING 'StorageAttachedIndex'")) + .withQueries(keyspace -> { + assertRowCount(keyspace, "SELECT * FROM %s WHERE v='0'", 2); + assertRowCount(keyspace, "SELECT * FROM %s WHERE v='1'", 1); + assertRowCount(keyspace, "SELECT * FROM %s WHERE v='2'", 0); + }); } @Test @@ -110,21 +132,21 @@ public void testLiteralOnWideTable() { test(keyspace -> { createTable(keyspace, "CREATE TABLE %s (k int, c int, v text, PRIMARY KEY (k, c))"); - createIndex(keyspace, "CREATE CUSTOM INDEX IF NOT EXISTS ON %s(v) USING 'StorageAttachedIndex'"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (0, 0, '0')"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (0, 1, '0')"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (0, 2, '1')"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (1, 0, '0')"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (1, 1, '0')"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (1, 2, '1')"); - }).withQueries(keyspace -> { - assertRowCount(keyspace, "SELECT * FROM %s WHERE v='0'", 4); - assertRowCount(keyspace, "SELECT * FROM %s WHERE v='1'", 2); - assertRowCount(keyspace, "SELECT * FROM %s WHERE v='2'", 0); - assertRowCount(keyspace, "SELECT * FROM %s WHERE k = 0 AND v='0'", 2); - assertRowCount(keyspace, "SELECT * FROM %s WHERE k = 0 AND v='1'", 1); - assertRowCount(keyspace, "SELECT * FROM %s WHERE k = 0 AND v='2'", 0); - }); + }).withIndex(keyspace -> createIndex(keyspace, "CREATE CUSTOM INDEX IF NOT EXISTS ON %s(v) USING 'StorageAttachedIndex'")) + .withQueries(keyspace -> { + assertRowCount(keyspace, "SELECT * FROM %s WHERE v='0'", 4); + assertRowCount(keyspace, "SELECT * FROM %s WHERE v='1'", 2); + assertRowCount(keyspace, "SELECT * FROM %s WHERE v='2'", 0); + assertRowCount(keyspace, "SELECT * FROM %s WHERE k = 0 AND v='0'", 2); + assertRowCount(keyspace, "SELECT * FROM %s WHERE k = 0 AND v='1'", 1); + assertRowCount(keyspace, "SELECT * FROM %s WHERE k = 0 AND v='2'", 0); + }); } @Test @@ -132,26 +154,27 @@ public void testQueryAnalyzersOnSkinnyTable() { test(keyspace -> { createTable(keyspace, "CREATE TABLE %s (k int PRIMARY KEY, v text)"); - createIndex(keyspace, "CREATE CUSTOM INDEX IF NOT EXISTS ON %s(v) USING 'StorageAttachedIndex' " + - "WITH OPTIONS = {" + - "'index_analyzer': '{" + - " \"tokenizer\" : { \"name\" : \"whitespace\", \"args\" : {} }," + - " \"filters\" : [ { \"name\" : \"lowercase\", \"args\": {} }, " + - " { \"name\" : \"edgengram\", \"args\": { \"minGramSize\":\"1\", \"maxGramSize\":\"30\" } }]," + - " \"charFilters\" : []}', " + - "'query_analyzer': '{" + - " \"tokenizer\" : { \"name\" : \"whitespace\", \"args\" : {} }," + - " \"filters\" : [ {\"name\" : \"lowercase\",\"args\": {}} ]}'}"); execute(keyspace, "INSERT INTO %s (k, v) VALUES (1, 'astra quick fox')"); execute(keyspace, "INSERT INTO %s (k, v) VALUES (2, 'astra2 quick fox')"); execute(keyspace, "INSERT INTO %s (k, v) VALUES (3, 'astra3 quick foxes')"); - }).withQueries(keyspace -> { - assertRowCount(keyspace, "SELECT * FROM %s WHERE v='ast'", 3); - assertRowCount(keyspace, "SELECT * FROM %s WHERE v='astra'", 3); - assertRowCount(keyspace, "SELECT * FROM %s WHERE v='astra2'", 1); - assertRowCount(keyspace, "SELECT * FROM %s WHERE v='fox'", 3); - assertRowCount(keyspace, "SELECT * FROM %s WHERE v='foxes'", 1); - }); + }).withIndex(keyspace -> createIndex(keyspace, + "CREATE CUSTOM INDEX IF NOT EXISTS ON %s(v) USING 'StorageAttachedIndex' " + + "WITH OPTIONS = {" + + "'index_analyzer': '{" + + " \"tokenizer\" : { \"name\" : \"whitespace\", \"args\" : {} }," + + " \"filters\" : [ { \"name\" : \"lowercase\", \"args\": {} }, " + + " { \"name\" : \"edgengram\", \"args\": { \"minGramSize\":\"1\", \"maxGramSize\":\"30\" } }]," + + " \"charFilters\" : []}', " + + "'query_analyzer': '{" + + " \"tokenizer\" : { \"name\" : \"whitespace\", \"args\" : {} }," + + " \"filters\" : [ {\"name\" : \"lowercase\",\"args\": {}} ]}'}")) + .withQueries(keyspace -> { + assertRowCount(keyspace, "SELECT * FROM %s WHERE v='ast'", 3); + assertRowCount(keyspace, "SELECT * FROM %s WHERE v='astra'", 3); + assertRowCount(keyspace, "SELECT * FROM %s WHERE v='astra2'", 1); + assertRowCount(keyspace, "SELECT * FROM %s WHERE v='fox'", 3); + assertRowCount(keyspace, "SELECT * FROM %s WHERE v='foxes'", 1); + }); } @Test @@ -159,34 +182,35 @@ public void testQueryAnalyzersOnWideTable() { test(keyspace -> { createTable(keyspace, "CREATE TABLE %s (k int, c int, v text, PRIMARY KEY (k, c))"); - createIndex(keyspace, "CREATE CUSTOM INDEX IF NOT EXISTS ON %s(v) USING 'StorageAttachedIndex' " + - "WITH OPTIONS = {" + - "'index_analyzer': '{" + - " \"tokenizer\" : { \"name\" : \"whitespace\", \"args\" : {} }," + - " \"filters\" : [ { \"name\" : \"lowercase\", \"args\": {} }, " + - " { \"name\" : \"edgengram\", \"args\": { \"minGramSize\":\"1\", \"maxGramSize\":\"30\" } }]," + - " \"charFilters\" : []}', " + - "'query_analyzer': '{" + - " \"tokenizer\" : { \"name\" : \"whitespace\", \"args\" : {} }," + - " \"filters\" : [ {\"name\" : \"lowercase\",\"args\": {}} ]}'}"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (0, 1, 'astra quick fox')"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (0, 2, 'astra2 quick fox')"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (0, 3, 'astra3 quick foxes')"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (1, 1, 'astra quick fox')"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (1, 2, 'astra2 quick fox')"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (1, 3, 'astra3 quick foxes')"); - }).withQueries(keyspace -> { - assertRowCount(keyspace, "SELECT * FROM %s WHERE v='ast'", 6); - assertRowCount(keyspace, "SELECT * FROM %s WHERE v='astra'", 6); - assertRowCount(keyspace, "SELECT * FROM %s WHERE v='astra2'", 2); - assertRowCount(keyspace, "SELECT * FROM %s WHERE v='fox'", 6); - assertRowCount(keyspace, "SELECT * FROM %s WHERE v='foxes'", 2); - assertRowCount(keyspace, "SELECT * FROM %s WHERE k = 0 AND v='ast'", 3); - assertRowCount(keyspace, "SELECT * FROM %s WHERE k = 0 AND v='astra'", 3); - assertRowCount(keyspace, "SELECT * FROM %s WHERE k = 0 AND v='astra2'", 1); - assertRowCount(keyspace, "SELECT * FROM %s WHERE k = 0 AND v='fox'", 3); - assertRowCount(keyspace, "SELECT * FROM %s WHERE k = 0 AND v='foxes'", 1); - }); + }).withIndex(keyspace -> createIndex(keyspace, + "CREATE CUSTOM INDEX IF NOT EXISTS ON %s(v) USING 'StorageAttachedIndex' " + + "WITH OPTIONS = {" + + "'index_analyzer': '{" + + " \"tokenizer\" : { \"name\" : \"whitespace\", \"args\" : {} }," + + " \"filters\" : [ { \"name\" : \"lowercase\", \"args\": {} }, " + + " { \"name\" : \"edgengram\", \"args\": { \"minGramSize\":\"1\", \"maxGramSize\":\"30\" } }]," + + " \"charFilters\" : []}', " + + "'query_analyzer': '{" + + " \"tokenizer\" : { \"name\" : \"whitespace\", \"args\" : {} }," + + " \"filters\" : [ {\"name\" : \"lowercase\",\"args\": {}} ]}'}")) + .withQueries(keyspace -> { + assertRowCount(keyspace, "SELECT * FROM %s WHERE v='ast'", 6); + assertRowCount(keyspace, "SELECT * FROM %s WHERE v='astra'", 6); + assertRowCount(keyspace, "SELECT * FROM %s WHERE v='astra2'", 2); + assertRowCount(keyspace, "SELECT * FROM %s WHERE v='fox'", 6); + assertRowCount(keyspace, "SELECT * FROM %s WHERE v='foxes'", 2); + assertRowCount(keyspace, "SELECT * FROM %s WHERE k = 0 AND v='ast'", 3); + assertRowCount(keyspace, "SELECT * FROM %s WHERE k = 0 AND v='astra'", 3); + assertRowCount(keyspace, "SELECT * FROM %s WHERE k = 0 AND v='astra2'", 1); + assertRowCount(keyspace, "SELECT * FROM %s WHERE k = 0 AND v='fox'", 3); + assertRowCount(keyspace, "SELECT * FROM %s WHERE k = 0 AND v='foxes'", 1); + }); } @Test @@ -194,16 +218,19 @@ public void testANNOnSkinnyTable() { test(keyspace -> { createTable(keyspace, "CREATE TABLE %s (k int PRIMARY KEY, v vector)"); - createIndex(keyspace, "CREATE CUSTOM INDEX IF NOT EXISTS ON %s(v) USING 'StorageAttachedIndex' " + - "WITH OPTIONS = {'similarity_function' : 'euclidean'}"); execute(keyspace, "INSERT INTO %s (k, v) VALUES (0, [1.0, 2.0])"); execute(keyspace, "INSERT INTO %s (k, v) VALUES (1, [2.0, 3.0])"); execute(keyspace, "INSERT INTO %s (k, v) VALUES (2, [3.0, 4.0])"); execute(keyspace, "INSERT INTO %s (k, v) VALUES (3, [4.0, 5.0])"); - }, Version.JVECTOR_EARLIEST, "JVector is not supported in V2OnDiskFormat").withQueries(keyspace -> { - assertRowCount(keyspace, "SELECT k FROM %s ORDER BY v ANN OF [2.5, 3.5] LIMIT 3", 3); - assertRowCount(keyspace, "SELECT k FROM %s WHERE GEO_DISTANCE(v, [2.5, 3.5]) < 157000", 2); - }, Version.JVECTOR_EARLIEST, "INDEX_NOT_AVAILABLE"); + }).withIndex(keyspace -> createIndex(keyspace, "CREATE CUSTOM INDEX IF NOT EXISTS ON %s(v) USING 'StorageAttachedIndex' " + + "WITH OPTIONS = {'similarity_function' : 'euclidean'}"), + Version.JVECTOR_EARLIEST, + StorageAttachedIndex::vectorUnsupportedByVersionError, + "JVector is not supported in V2OnDiskFormat") + .withQueries(keyspace -> { + assertRowCount(keyspace, "SELECT k FROM %s ORDER BY v ANN OF [2.5, 3.5] LIMIT 3", 3); + assertRowCount(keyspace, "SELECT k FROM %s WHERE GEO_DISTANCE(v, [2.5, 3.5]) < 157000", 2); + }, Version.JVECTOR_EARLIEST, "INDEX_NOT_AVAILABLE"); } @Test @@ -211,8 +238,6 @@ public void testANNOnWideTable() { test(keyspace -> { createTable(keyspace, "CREATE TABLE %s (k int, c int, v vector, PRIMARY KEY (k, c))"); - createIndex(keyspace, "CREATE CUSTOM INDEX IF NOT EXISTS ON %s(v) USING 'StorageAttachedIndex' " + - "WITH OPTIONS = {'similarity_function' : 'euclidean'}"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (0, 0, [1.0, 2.0])"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (0, 1, [2.0, 3.0])"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (0, 2, [3.0, 4.0])"); @@ -221,12 +246,17 @@ public void testANNOnWideTable() execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (1, 1, [2.0, 3.0])"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (1, 2, [3.0, 4.0])"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (1, 3, [4.0, 5.0])"); - }, Version.JVECTOR_EARLIEST, "JVector is not supported in V2OnDiskFormat").withQueries(keyspace -> { - assertRowCount(keyspace, "SELECT k FROM %s ORDER BY v ANN OF [2.5, 3.5] LIMIT 3", 3); - assertRowCount(keyspace, "SELECT k FROM %s WHERE GEO_DISTANCE(v, [2.5, 3.5]) < 157000", 4); - assertRowCount(keyspace, "SELECT k FROM %s WHERE k=0 ORDER BY v ANN OF [2.5, 3.5] LIMIT 3", 3); - assertRowCount(keyspace, "SELECT k FROM %s WHERE k=0 AND GEO_DISTANCE(v, [2.5, 3.5]) < 157000", 2); - }, Version.JVECTOR_EARLIEST, "INDEX_NOT_AVAILABLE"); + }).withIndex(keyspace -> createIndex(keyspace, "CREATE CUSTOM INDEX IF NOT EXISTS ON %s(v) USING 'StorageAttachedIndex' " + + "WITH OPTIONS = {'similarity_function' : 'euclidean'}"), + Version.JVECTOR_EARLIEST, + StorageAttachedIndex::vectorUnsupportedByVersionError, + "JVector is not supported in V2OnDiskFormat") + .withQueries(keyspace -> { + assertRowCount(keyspace, "SELECT k FROM %s ORDER BY v ANN OF [2.5, 3.5] LIMIT 3", 3); + assertRowCount(keyspace, "SELECT k FROM %s WHERE GEO_DISTANCE(v, [2.5, 3.5]) < 157000", 4); + assertRowCount(keyspace, "SELECT k FROM %s WHERE k=0 ORDER BY v ANN OF [2.5, 3.5] LIMIT 3", 3); + assertRowCount(keyspace, "SELECT k FROM %s WHERE k=0 AND GEO_DISTANCE(v, [2.5, 3.5]) < 157000", 2); + }, Version.JVECTOR_EARLIEST, "INDEX_NOT_AVAILABLE"); } @Test @@ -234,20 +264,20 @@ public void testBM25OnSkinnyTable() { test(keyspace -> { createTable(keyspace, "CREATE TABLE %s (k int PRIMARY KEY, v text)"); - createIndex(keyspace, "CREATE CUSTOM INDEX IF NOT EXISTS ON %s(v) USING 'StorageAttachedIndex' " + - "WITH OPTIONS = {" + - "'index_analyzer': '{" + - "\"tokenizer\" : {\"name\" : \"standard\"}, " + - "\"filters\" : [{\"name\" : \"porterstem\"}]}'}"); execute(keyspace, "INSERT INTO %s (k, v) VALUES (1, 'apple')"); execute(keyspace, "INSERT INTO %s (k, v) VALUES (2, 'orange')"); execute(keyspace, "INSERT INTO %s (k, v) VALUES (3, 'banana')"); execute(keyspace, "INSERT INTO %s (k, v) VALUES (4, 'apple')"); - }).withQueries(keyspace -> { - assertRowCount(keyspace, "SELECT k FROM %s ORDER BY v BM25 OF 'apple' LIMIT 3", 2); - assertRowCount(keyspace, "SELECT k FROM %s ORDER BY v BM25 OF 'orange' LIMIT 3", 1); - assertRowCount(keyspace, "SELECT k FROM %s ORDER BY v BM25 OF 'kiwi' LIMIT 3", 0); - }, Version.BM25_EARLIEST, "FEATURE_NEEDS_INDEX_REBUILD"); + }).withIndex(keyspace -> createIndex(keyspace, "CREATE CUSTOM INDEX IF NOT EXISTS ON %s(v) USING 'StorageAttachedIndex' " + + "WITH OPTIONS = {" + + "'index_analyzer': '{" + + "\"tokenizer\" : {\"name\" : \"standard\"}, " + + "\"filters\" : [{\"name\" : \"porterstem\"}]}'}")) + .withQueries(keyspace -> { + assertRowCount(keyspace, "SELECT k FROM %s ORDER BY v BM25 OF 'apple' LIMIT 3", 2); + assertRowCount(keyspace, "SELECT k FROM %s ORDER BY v BM25 OF 'orange' LIMIT 3", 1); + assertRowCount(keyspace, "SELECT k FROM %s ORDER BY v BM25 OF 'kiwi' LIMIT 3", 0); + }, Version.BM25_EARLIEST, "FEATURE_NEEDS_INDEX_REBUILD"); } @Test @@ -255,11 +285,6 @@ public void testBM25OnWideTable() { test(keyspace -> { createTable(keyspace, "CREATE TABLE %s (k int, c int, v text, PRIMARY KEY (k, c))"); - createIndex(keyspace, "CREATE CUSTOM INDEX IF NOT EXISTS ON %s(v) USING 'StorageAttachedIndex' " + - "WITH OPTIONS = {" + - "'index_analyzer': '{" + - "\"tokenizer\" : {\"name\" : \"standard\"}, " + - "\"filters\" : [{\"name\" : \"porterstem\"}]}'}"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (0, 1, 'apple')"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (0, 2, 'orange')"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (0, 3, 'banana')"); @@ -268,14 +293,19 @@ public void testBM25OnWideTable() execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (1, 2, 'orange')"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (1, 3, 'banana')"); execute(keyspace, "INSERT INTO %s (k, c, v) VALUES (1, 4, 'apple')"); - }).withQueries(keyspace -> { - assertRowCount(keyspace, "SELECT k FROM %s ORDER BY v BM25 OF 'apple' LIMIT 3", 3); - assertRowCount(keyspace, "SELECT k FROM %s ORDER BY v BM25 OF 'orange' LIMIT 3", 2); - assertRowCount(keyspace, "SELECT k FROM %s ORDER BY v BM25 OF 'kiwi' LIMIT 3", 0); - assertRowCount(keyspace, "SELECT k FROM %s WHERE k=0 ORDER BY v BM25 OF 'apple' LIMIT 3", 2); - assertRowCount(keyspace, "SELECT k FROM %s WHERE k=0 ORDER BY v BM25 OF 'orange' LIMIT 3", 1); - assertRowCount(keyspace, "SELECT k FROM %s WHERE k=0 ORDER BY v BM25 OF 'kiwi' LIMIT 3", 0); - }, Version.BM25_EARLIEST, "FEATURE_NEEDS_INDEX_REBUILD"); + }).withIndex(keyspace -> createIndex(keyspace, "CREATE CUSTOM INDEX IF NOT EXISTS ON %s(v) USING 'StorageAttachedIndex' " + + "WITH OPTIONS = {" + + "'index_analyzer': '{" + + "\"tokenizer\" : {\"name\" : \"standard\"}, " + + "\"filters\" : [{\"name\" : \"porterstem\"}]}'}")) + .withQueries(keyspace -> { + assertRowCount(keyspace, "SELECT k FROM %s ORDER BY v BM25 OF 'apple' LIMIT 3", 3); + assertRowCount(keyspace, "SELECT k FROM %s ORDER BY v BM25 OF 'orange' LIMIT 3", 2); + assertRowCount(keyspace, "SELECT k FROM %s ORDER BY v BM25 OF 'kiwi' LIMIT 3", 0); + assertRowCount(keyspace, "SELECT k FROM %s WHERE k=0 ORDER BY v BM25 OF 'apple' LIMIT 3", 2); + assertRowCount(keyspace, "SELECT k FROM %s WHERE k=0 ORDER BY v BM25 OF 'orange' LIMIT 3", 1); + assertRowCount(keyspace, "SELECT k FROM %s WHERE k=0 ORDER BY v BM25 OF 'kiwi' LIMIT 3", 0); + }, Version.BM25_EARLIEST, "FEATURE_NEEDS_INDEX_REBUILD"); } @Override @@ -300,45 +330,62 @@ private void assertRowCount(String keyspace, String query, int expected) assertRowCount(execute(keyspace, query), expected); } - /** - * @param schemaCreator the initial schema and data - */ - private Tester test(Consumer schemaCreator) - { - return test(schemaCreator, Version.AA, null); - } - - /** - * @param schemaCreator the initial schema and data - * @param minVersionForIndexing the minimum index version required to support creating an index with the tested features - * @param indexingErrorIfUnsupported the error message expected during indexing if the index version does not support the tested features - */ - private Tester test(Consumer schemaCreator, Version minVersionForIndexing, String indexingErrorIfUnsupported) + private TestCase test(Consumer tableCreator) { - return new Tester(schemaCreator, minVersionForIndexing, indexingErrorIfUnsupported); + return Operation.CREATE == operation + ? new CreateIndexTestCase(tableCreator) + : new RebuildIndexTestCase(tableCreator); } - private class Tester + private abstract class TestCase { - private final Consumer schemaCreator; - private final Version minVersionForIndexing; - private final String indexingErrorIfUnsupported; + protected final Map versionsPerKeyspace; + protected Consumer indexCreator; + protected Version minVersionForIndexing; + protected Function createIndexErrorIfUnsupported; + protected String rebuildIndexErrorIfUnsupported; /** - * @param schemaCreator the initial schema and data - * @param minVersionForIndexing the minimum index version required to support creating an index with the tested features - * @param indexingErrorIfUnsupported the error message expected during indexing if the index version does not support the tested features + * @param tableCreator the initial schema and data */ - Tester(Consumer schemaCreator, - Version minVersionForIndexing, - String indexingErrorIfUnsupported) + TestCase(Consumer tableCreator) + { + tableCreator.accept(KEYSPACE); + + // create one keyspace, table and dataset per index version + versionsPerKeyspace = new HashMap<>(); + for (Version version : Version.ALL) + { + String keyspace = "ks_" + version; + createKeyspace("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}", keyspace); + tableCreator.accept(keyspace); + disableCompaction(keyspace, TABLE); + flush(keyspace, TABLE); + versionsPerKeyspace.put(keyspace, version); + } + } + + TestCase withIndex(Consumer indexCreator) { - // we can skip the test case if the starting version does not support the tested index - Assume.assumeTrue(globalCurrentVersion.onOrAfter(minVersionForIndexing)); + return withIndex(indexCreator, Version.AA, null, null); + } + /** + * @param indexCreator the index creation statement + * @param minVersionForIndexing the minimum index version required to create or rebuild the index + * @param createIndexErrorIfUnsupported the error message expected if the index version does not support the tested index + * @param rebuildIndexErrorIfUnsupported the error message expected if the index version does not support index rebuild + */ + TestCase withIndex(Consumer indexCreator, + Version minVersionForIndexing, + Function createIndexErrorIfUnsupported, + String rebuildIndexErrorIfUnsupported) + { + this.indexCreator = indexCreator; this.minVersionForIndexing = minVersionForIndexing; - this.indexingErrorIfUnsupported = indexingErrorIfUnsupported; - this.schemaCreator = schemaCreator; + this.createIndexErrorIfUnsupported = createIndexErrorIfUnsupported; + this.rebuildIndexErrorIfUnsupported = rebuildIndexErrorIfUnsupported; + return this; } /** @@ -354,65 +401,136 @@ void withQueries(Consumer queryResultsVerifier) * @param minVersionForQuerying the minimum index version required to support the tested queries * @param queryingErrorIfUnsupported the error message expected if the index version does not support the tested queries */ + abstract void withQueries(Consumer queryResultsVerifier, + Version minVersionForQuerying, + String queryingErrorIfUnsupported); + + protected void verifyQueries(Consumer queryResultsVerifier, + String keyspace, + Version currentVersion, + Version minVersion, + String errorMessage) + { + if (currentVersion.onOrAfter(minVersion)) + queryResultsVerifier.accept(keyspace); + else + Assertions.assertThatThrownBy(() -> queryResultsVerifier.accept(keyspace)) + .hasMessageContaining(errorMessage); + } + } + + /** + * Test case for index creation. + */ + private class CreateIndexTestCase extends TestCase + { + CreateIndexTestCase(Consumer tableCreator) + { + super(tableCreator); + } + + @Override void withQueries(Consumer queryResultsVerifier, Version minVersionForQuerying, String queryingErrorIfUnsupported) { - // we can skip the test case if the starting version does not support the tested queries - Assume.assumeTrue(globalCurrentVersion.onOrAfter(minVersionForQuerying)); + // set the per-keyspace current versions + SAIUtil.setCurrentVersion(globalCurrentVersion, versionsPerKeyspace); - // set the default current version, without any per-keyspace overrides - SAIUtil.setCurrentVersion(globalCurrentVersion); + // verify whether we can create the index in each keyspace, + // depending on whether the tested version supports it, + // and whether we can run the tested queries in case the index creation succeeds + versionsPerKeyspace.forEach((keyspace, version) -> { + if (version.onOrAfter(minVersionForIndexing)) + { + indexCreator.accept(keyspace); + verifySAIVersionInUse(version, keyspace, TABLE); + verifyQueries(queryResultsVerifier, + keyspace, + version, + minVersionForQuerying, + queryingErrorIfUnsupported); + } + else + { + Assertions.assertThatThrownBy(() -> indexCreator.accept(keyspace)) + .hasMessageContaining(createIndexErrorIfUnsupported.apply(version)); + verifyNoIndexFiles(keyspace, TABLE); + } + }); + } + } - // create a series of keyspaces, all of them using the initial default current version, - // and verify the queries and the version of the index - Map versionsPerKeyspace = new HashMap<>(); - for (Version version : Version.ALL) + /** + * Test case for index rebuild. + */ + private class RebuildIndexTestCase extends TestCase + { + RebuildIndexTestCase(Consumer tableCreator) + { + super(tableCreator); + } + + @Override + void withQueries(Consumer queryResultsVerifier, + Version minVersionForQuerying, + String queryingErrorIfUnsupported) + { + // if the starting version does not support the tested queries then we cannot even create the index + if (!globalCurrentVersion.onOrAfter(minVersionForIndexing)) { - // create and track the keyspace for the index version we will later upgrade to - String keyspace = "ks_" + version; - createKeyspace("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}", keyspace); - versionsPerKeyspace.put(keyspace, version); + Assertions.assertThatThrownBy(() -> indexCreator.accept(KEYSPACE)) + .hasMessageContaining(createIndexErrorIfUnsupported.apply(globalCurrentVersion)); + verifyNoIndexFiles(KEYSPACE, TABLE); + return; + } - // create the schema and data, still using the initial version, and verify the queries - schemaCreator.accept(keyspace); - disableCompaction(keyspace, TABLE); - flush(keyspace, TABLE); + // set the default current version, without any per-keyspace overrides + SAIUtil.setCurrentVersion(globalCurrentVersion); + + // create the index for each keyspace, which should use the default current version + versionsPerKeyspace.keySet().forEach(keyspace -> { + indexCreator.accept(keyspace); verifySAIVersionInUse(globalCurrentVersion, keyspace, TABLE); - queryResultsVerifier.accept(keyspace); - } + verifyQueries(queryResultsVerifier, + keyspace, + globalCurrentVersion, + minVersionForQuerying, + queryingErrorIfUnsupported); + }); - // assign a new version to each tested keyspace, so we end up with a different version for each keyspace, + // assign a new version to each tested keyspace, + // so we end up with a different version for each keyspace, // although their index files are still in the initial version SAIUtil.setCurrentVersion(globalCurrentVersion, versionsPerKeyspace); - // try to rebuild the indexes of each keyspace, which should leave them in their new version, + // try to rebuild the indexes of each keyspace, + // which should leave them in their new version if it's supported, // and verify the queries and the version of the index versionsPerKeyspace.forEach((keyspace, version) -> { - logger.debug("Testing index rebuild from {} to {} in keyspace {}", globalCurrentVersion, version, keyspace); verifySAIVersionInUse(globalCurrentVersion, keyspace, TABLE); // Verify if we can rebuild the index, depending on whether the new version supports it - passOrFail(() -> { + if (version.onOrAfter(minVersionForIndexing)) + { rebuildTableIndexes(keyspace, TABLE); reloadSSTableIndexInPlace(keyspace, TABLE); verifySAIVersionInUse(version, keyspace, TABLE); - }, version.onOrAfter(minVersionForIndexing), indexingErrorIfUnsupported); + } + else + { + Assertions.assertThatThrownBy(() -> rebuildTableIndexes(keyspace, TABLE)) + .hasMessageContaining(rebuildIndexErrorIfUnsupported); + } // Verify if we can query the index, depending on whether the new version supports the queries - passOrFail(() -> queryResultsVerifier.accept(keyspace), - version.onOrAfter(minVersionForQuerying), - queryingErrorIfUnsupported); + verifyQueries(queryResultsVerifier, + keyspace, + version, + minVersionForQuerying, + queryingErrorIfUnsupported); }); } - - private void passOrFail(Runnable r, boolean shouldPass, String message) - { - if (shouldPass) - r.run(); - else - Assertions.assertThatThrownBy(r::run).hasMessageContaining(message); - } } }