Skip to content

Commit ce8e326

Browse files
adelapenadjatnieks
authored andcommitted
CNDB-13074: Reject analysis options on frozen collections (#1610)
Reject creating indexes with analysis options on frozen collections. We don't have a way to correctly index them, and we don't support querying either.
1 parent 29a6668 commit ce8e326

File tree

6 files changed

+104
-7
lines changed

6 files changed

+104
-7
lines changed

src/java/org/apache/cassandra/index/sai/IndexContext.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -183,14 +183,16 @@ public IndexContext(@Nonnull String keyspace,
183183
this.cfs = cfs;
184184
this.primaryKeyFactory = Version.latest().onDiskFormat().newPrimaryKeyFactory(clusteringComparator);
185185

186+
String columnName = column.name.toString();
187+
186188
if (config != null)
187189
{
188190
String fullIndexName = String.format("%s.%s.%s", this.keyspace, this.table, this.config.name);
189191
this.indexWriterConfig = IndexWriterConfig.fromOptions(fullIndexName, validator, config.options);
190192
this.isAnalyzed = AbstractAnalyzer.isAnalyzed(config.options);
191-
this.analyzerFactory = AbstractAnalyzer.fromOptions(getValidator(), config.options);
193+
this.analyzerFactory = AbstractAnalyzer.fromOptions(columnName, validator, config.options);
192194
this.queryAnalyzerFactory = AbstractAnalyzer.hasQueryAnalyzer(config.options)
193-
? AbstractAnalyzer.fromOptionsQueryAnalyzer(getValidator(), config.options)
195+
? AbstractAnalyzer.fromOptionsQueryAnalyzer(validator, config.options)
194196
: this.analyzerFactory;
195197
this.vectorSimilarityFunction = indexWriterConfig.getSimilarityFunction();
196198
this.hasEuclideanSimilarityFunc = vectorSimilarityFunction == VectorSimilarityFunction.EUCLIDEAN;
@@ -204,7 +206,7 @@ public IndexContext(@Nonnull String keyspace,
204206
{
205207
this.indexWriterConfig = IndexWriterConfig.emptyConfig();
206208
this.isAnalyzed = AbstractAnalyzer.isAnalyzed(Collections.emptyMap());
207-
this.analyzerFactory = AbstractAnalyzer.fromOptions(getValidator(), Collections.emptyMap());
209+
this.analyzerFactory = AbstractAnalyzer.fromOptions(columnName, validator, Collections.EMPTY_MAP);
208210
this.queryAnalyzerFactory = this.analyzerFactory;
209211
this.vectorSimilarityFunction = null;
210212
this.hasEuclideanSimilarityFunc = false;

src/java/org/apache/cassandra/index/sai/StorageAttachedIndex.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ public static Map<String, String> validateOptions(Map<String, String> options, T
329329
AbstractType<?> type = TypeUtil.cellValueType(target.left, target.right);
330330

331331
// Validate analyzers by building them
332-
try (AbstractAnalyzer.AnalyzerFactory analyzerFactory = AbstractAnalyzer.fromOptions(type, options))
332+
try (AbstractAnalyzer.AnalyzerFactory analyzerFactory = AbstractAnalyzer.fromOptions(targetColumn, type, options))
333333
{
334334
if (AbstractAnalyzer.hasQueryAnalyzer(options))
335335
AbstractAnalyzer.fromOptionsQueryAnalyzer(type, options).close();

src/java/org/apache/cassandra/index/sai/analyzer/AbstractAnalyzer.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ public static boolean isAnalyzed(Map<String, String> options) {
187187
return options.containsKey(LuceneAnalyzer.INDEX_ANALYZER) || NonTokenizingOptions.hasNonDefaultOptions(options);
188188
}
189189

190-
public static AnalyzerFactory fromOptions(AbstractType<?> type, Map<String, String> options)
190+
public static AnalyzerFactory fromOptions(String target, AbstractType<?> type, Map<String, String> options)
191191
{
192192
boolean containsIndexAnalyzer = options.containsKey(LuceneAnalyzer.INDEX_ANALYZER);
193193
boolean containsNonTokenizingOptions = NonTokenizingOptions.hasNonDefaultOptions(options);
@@ -206,6 +206,9 @@ public static AnalyzerFactory fromOptions(AbstractType<?> type, Map<String, Stri
206206
" combination of case_sensitive, normalize, or ascii options. options=" + options);
207207
}
208208

209+
if ((containsIndexAnalyzer || containsNonTokenizingOptions) && type.isCollection() && !type.isMultiCell())
210+
throw new InvalidRequestException("Cannot use an analyzer on " + target + " because it's a frozen collection.");
211+
209212
if (containsIndexAnalyzer)
210213
{
211214
String json = options.get(LuceneAnalyzer.INDEX_ANALYZER);

test/unit/org/apache/cassandra/cql3/CQLTester.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ public abstract class CQLTester
243243
private static final int ASSERTION_TIMEOUT_SECONDS = 15;
244244

245245
/**
246-
* Whether to use coorfinator execution in {@link #execute(String, Object...)}, so queries get full validation and
246+
* Whether to use coordinator execution in {@link #execute(String, Object...)}, so queries get full validation and
247247
* go through reconciliation. When enabled, calls to {@link #execute(String, Object...)} will behave as calls to
248248
* {@link #executeWithCoordinator(String, Object...)}. Otherwise, they will behave as calls to
249249
* {@link #executeInternal(String, Object...)}.

test/unit/org/apache/cassandra/index/sai/SAITester.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,23 @@ public static void setUpClass()
238238
CQLTester.enableCoordinatorExecution();
239239
}
240240

241+
/**
242+
* Creates a SAI index on the current table, waiting for it to become queryable.
243+
*
244+
* @param column the name of the indexed column, maybe with {@code FULL()}, {@code KEYS()} or {@code VALUES()} spec
245+
* @param options the index options, of the form {@code "{'option1': value1, 'option2': value2, ...}"}.
246+
* @return the name of the created index
247+
*/
248+
public String createSAIIndex(String column, @Nullable String options)
249+
{
250+
String query = String.format(CREATE_INDEX_TEMPLATE, column);
251+
252+
if (options != null)
253+
query += " WITH OPTIONS = " + options;
254+
255+
return createIndex(query);
256+
}
257+
241258
public static IndexContext createIndexContext(String name, AbstractType<?> validator, ColumnFamilyStore cfs)
242259
{
243260
return new IndexContext(cfs.getKeyspaceName(),

test/unit/org/apache/cassandra/index/sai/cql/AnalyzerTest.java

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,16 @@
1818

1919
package org.apache.cassandra.index.sai.cql;
2020

21+
import org.apache.cassandra.exceptions.InvalidRequestException;
22+
import org.apache.cassandra.index.sai.SAITester;
23+
import org.apache.cassandra.index.sai.analyzer.NonTokenizingOptions;
24+
import org.assertj.core.api.Assertions;
2125
import org.junit.Test;
2226

23-
public class AnalyzerTest extends VectorTester
27+
import javax.annotation.Nullable;
28+
import java.util.Arrays;
29+
30+
public class AnalyzerTest extends SAITester
2431
{
2532
@Test
2633
public void createAnalyzerWrongTypeTest()
@@ -49,4 +56,72 @@ public void createAnalyzerWrongTypeTest()
4956
execute("INSERT INTO %s (pk1, pk2, val) VALUES (0, 'c', -2)");
5057
execute("INSERT INTO %s (pk1, pk2, val) VALUES (1, 'c', -3)");
5158
}
59+
60+
/**
61+
* Test that we cannot use an analyzer, tokenizing or not, on a frozen collection.
62+
*/
63+
@Test
64+
public void analyzerOnFrozenCollectionTest()
65+
{
66+
createTable("CREATE TABLE %s (k int PRIMARY KEY, l frozen<list<text>>, s frozen<set<text>>, m frozen<map<text, text>>)");
67+
68+
for (String column : Arrays.asList("l", "s", "m"))
69+
{
70+
assertRejectsNonFullIndexCreationOnFrozenCollection(column);
71+
column = String.format("full(%s)", column);
72+
73+
// non-tokenizing options that produce an analyzer should be rejected
74+
assertRejectsAnalyzerOnFrozenCollection(column, String.format("{'%s': %s}", NonTokenizingOptions.CASE_SENSITIVE, false));
75+
assertRejectsAnalyzerOnFrozenCollection(column, String.format("{'%s': %s}", NonTokenizingOptions.NORMALIZE, true));
76+
assertRejectsAnalyzerOnFrozenCollection(column, String.format("{'%s': %s}", NonTokenizingOptions.ASCII, true));
77+
78+
// non-tokenizing options that do not produce an analyzer should be accepted
79+
assertAcceptsIndexOptions(column, String.format("{'%s': %s}", NonTokenizingOptions.CASE_SENSITIVE, true));
80+
assertAcceptsIndexOptions(column, String.format("{'%s': %s}", NonTokenizingOptions.NORMALIZE, false));
81+
assertAcceptsIndexOptions(column, String.format("{'%s': %s}", NonTokenizingOptions.ASCII, false));
82+
83+
// Lucene analyzer should always be rejected
84+
assertRejectsAnalyzerOnFrozenCollection(column, "{'index_analyzer': 'standard'}");
85+
assertRejectsAnalyzerOnFrozenCollection(column,
86+
"{'index_analyzer': " +
87+
" '{\"tokenizer\":{\"name\":\"ngram\", \"args\":{\"minGramSize\":\"2\", \"maxGramSize\":\"3\"}}," +
88+
" \"filters\":[{\"name\":\"lowercase\"}]}'}");
89+
assertRejectsAnalyzerOnFrozenCollection(column,
90+
"{'index_analyzer':'\n" +
91+
" {\"tokenizer\":{\"name\" : \"whitespace\"},\n" +
92+
" \"filters\":[{\"name\":\"stop\", \"args\": {\"words\": \"the, test\", \"format\": \"wordset\"}}]}'}");
93+
94+
// no options should be accepted
95+
assertAcceptsIndexOptions(column, null);
96+
assertAcceptsIndexOptions(column, "{}");
97+
}
98+
}
99+
100+
private void assertRejectsNonFullIndexCreationOnFrozenCollection(String column)
101+
{
102+
Assertions.assertThatThrownBy(() -> createSAIIndex(column, null))
103+
.isInstanceOf(InvalidRequestException.class)
104+
.hasMessageContaining("Cannot create values() index on frozen column " + column);
105+
106+
Assertions.assertThatThrownBy(() -> createSAIIndex("KEYS(" + column + ')', null))
107+
.isInstanceOf(InvalidRequestException.class)
108+
.hasMessageContaining("Cannot create keys() index on frozen column " + column);
109+
110+
Assertions.assertThatThrownBy(() -> createSAIIndex("VALUES(" + column + ')', null))
111+
.isInstanceOf(InvalidRequestException.class)
112+
.hasMessageContaining("Cannot create values() index on frozen column " + column);
113+
}
114+
115+
private void assertAcceptsIndexOptions(String column, @Nullable String options)
116+
{
117+
String index = createSAIIndex(column, options);
118+
dropIndex("DROP INDEX %s." + index); // clear for further tests
119+
}
120+
121+
private void assertRejectsAnalyzerOnFrozenCollection(String column, String options)
122+
{
123+
Assertions.assertThatThrownBy(() -> createSAIIndex(column, options))
124+
.isInstanceOf(InvalidRequestException.class)
125+
.hasMessageContaining("Cannot use an analyzer on " + column + " because it's a frozen collection.");
126+
}
52127
}

0 commit comments

Comments
 (0)