Skip to content

Commit 67662ec

Browse files
adelapenadjatnieks
authored andcommitted
CNDB-12739: Fix row filter ignoring distinct index and query analyzers (#1548)
Make RowFilter.Expression consider the two different index analyzers that an indexed column can have, one for write time (index_analyzer) and the other for read time (query_analyzer).
1 parent f174934 commit 67662ec

File tree

13 files changed

+580
-247
lines changed

13 files changed

+580
-247
lines changed

src/java/org/apache/cassandra/cql3/Operator.java

Lines changed: 162 additions & 44 deletions
Large diffs are not rendered by default.

src/java/org/apache/cassandra/cql3/conditions/ColumnCondition.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ else if (otherValue == null)
257257
// the condition value is not null, so only NEQ can return true
258258
return operator == Operator.NEQ;
259259
}
260-
return operator.isSatisfiedBy(type, otherValue, value, null); // We don't use any analyzers in LWT, see CNDB-11658
260+
return operator.isSatisfiedBy(type, otherValue, value, null, null); // We don't use any analyzers in LWT, see CNDB-11658
261261
}
262262
}
263263

src/java/org/apache/cassandra/cql3/conditions/ColumnConditions.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ public Set<ColumnMetadata> getAnalyzedColumns(IndexRegistry indexRegistry)
9393

9494
for (ColumnCondition condition : this)
9595
{
96-
if (indexRegistry.getAnalyzerFor(condition.column, condition.operator).isPresent())
96+
if (indexRegistry.getIndexAnalyzerFor(condition.column, condition.operator).isPresent())
9797
{
9898
analyzedColumns.add(condition.column);
9999
}

src/java/org/apache/cassandra/db/filter/RowFilter.java

Lines changed: 55 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ public boolean partitionKeyRestrictionsAreSatisfiedBy(DecoratedKey key, Abstract
326326
ByteBuffer value = keyValidator instanceof CompositeType
327327
? ((CompositeType) keyValidator).split(key.getKey())[e.column.position()]
328328
: key.getKey();
329-
if (!e.operator().isSatisfiedBy(e.column.type, value, e.value, e.analyzer()))
329+
if (!e.operator().isSatisfiedBy(e.column.type, value, e.value, e.indexAnalyzer(), e.queryAnalyzer()))
330330
return false;
331331
}
332332
return true;
@@ -343,7 +343,7 @@ public boolean clusteringKeyRestrictionsAreSatisfiedBy(Clustering<?> clustering)
343343
if (!e.column.isClusteringColumn())
344344
continue;
345345

346-
if (!e.operator().isSatisfiedBy(e.column.type, clustering.bufferAt(e.column.position()), e.value, e.analyzer()))
346+
if (!e.operator().isSatisfiedBy(e.column.type, clustering.bufferAt(e.column.position()), e.value, e.indexAnalyzer(), e.queryAnalyzer()))
347347
return false;
348348
}
349349
return true;
@@ -577,7 +577,7 @@ else if (builder.current.children.size() == 1 && builder.current.expressions.isE
577577
public SimpleExpression add(ColumnMetadata def, Operator op, ByteBuffer value)
578578
{
579579
assert op != Operator.ANN : "ANN expressions should be added with the addANNExpression method";
580-
SimpleExpression expression = new SimpleExpression(def, op, value, analyzer(def, op), null);
580+
SimpleExpression expression = new SimpleExpression(def, op, value, indexAnalyzer(def, op), queryAnalyzer(def, op), null);
581581
add(expression);
582582
return expression;
583583
}
@@ -591,18 +591,24 @@ public SimpleExpression add(ColumnMetadata def, Operator op, ByteBuffer value)
591591
*/
592592
public void addANNExpression(ColumnMetadata def, ByteBuffer value, ANNOptions annOptions)
593593
{
594-
add(new SimpleExpression(def, Operator.ANN, value, null, annOptions));
594+
add(new SimpleExpression(def, Operator.ANN, value, null, null, annOptions));
595595
}
596596

597597
public void addMapComparison(ColumnMetadata def, ByteBuffer key, Operator op, ByteBuffer value)
598598
{
599-
add(new MapComparisonExpression(def, key, op, value, analyzer(def, op)));
599+
add(new MapComparisonExpression(def, key, op, value, indexAnalyzer(def, op), queryAnalyzer(def, op)));
600600
}
601601

602602
@Nullable
603-
private Index.Analyzer analyzer(ColumnMetadata def, Operator op)
603+
private Index.Analyzer indexAnalyzer(ColumnMetadata def, Operator op)
604604
{
605-
return indexRegistry == null ? null : indexRegistry.getAnalyzerFor(def, op).orElse(null);
605+
return indexRegistry == null ? null : indexRegistry.getIndexAnalyzerFor(def, op).orElse(null);
606+
}
607+
608+
@Nullable
609+
private Index.Analyzer queryAnalyzer(ColumnMetadata def, Operator op)
610+
{
611+
return indexRegistry == null ? null : indexRegistry.getQueryAnalyzerFor(def, op).orElse(null);
606612
}
607613

608614
public void addGeoDistanceExpression(ColumnMetadata def, ByteBuffer point, Operator op, ByteBuffer distance)
@@ -981,7 +987,13 @@ public Operator operator()
981987
}
982988

983989
@Nullable
984-
public Index.Analyzer analyzer()
990+
public Index.Analyzer indexAnalyzer()
991+
{
992+
return null;
993+
}
994+
995+
@Nullable
996+
public Index.Analyzer queryAnalyzer()
985997
{
986998
return null;
987999
}
@@ -1176,7 +1188,9 @@ public Expression deserialize(DataInputPlus in, int version, TableMetadata metad
11761188
ByteBuffer name = ByteBufferUtil.readWithShortLength(in);
11771189
Operator operator = Operator.readFrom(in);
11781190
ColumnMetadata column = metadata.getColumn(name);
1179-
Index.Analyzer analyzer = IndexRegistry.obtain(metadata).getAnalyzerFor(column, operator).orElse(null);
1191+
IndexRegistry indexRegistry = IndexRegistry.obtain(metadata);
1192+
Index.Analyzer indexAnalyzer = indexRegistry.getIndexAnalyzerFor(column, operator).orElse(null);
1193+
Index.Analyzer queryAnalyzer = indexRegistry.getQueryAnalyzerFor(column, operator).orElse(null);
11801194

11811195
// Compact storage tables, when used with thrift, used to allow falling through this withouot throwing an
11821196
// exception. However, since thrift was removed in 4.0, this behaviour was not restored in CASSANDRA-16217
@@ -1188,11 +1202,11 @@ public Expression deserialize(DataInputPlus in, int version, TableMetadata metad
11881202
case SIMPLE:
11891203
ByteBuffer value = ByteBufferUtil.readWithShortLength(in);
11901204
ANNOptions annOptions = operator == Operator.ANN ? ANNOptions.serializer.deserialize(in, version) : null;
1191-
return new SimpleExpression(column, operator, value, analyzer, annOptions);
1205+
return new SimpleExpression(column, operator, value, indexAnalyzer, queryAnalyzer, annOptions);
11921206
case MAP_COMPARISON:
11931207
ByteBuffer key = ByteBufferUtil.readWithShortLength(in);
11941208
ByteBuffer val = ByteBufferUtil.readWithShortLength(in);
1195-
return new MapComparisonExpression(column, key, operator, val, analyzer);
1209+
return new MapComparisonExpression(column, key, operator, val, indexAnalyzer, queryAnalyzer);
11961210
case VECTOR_RADIUS:
11971211
Operator boundaryOperator = Operator.readFrom(in);
11981212
ByteBuffer distance = ByteBufferUtil.readWithShortLength(in);
@@ -1249,26 +1263,40 @@ public long serializedSize(Expression expression, int version)
12491263
public abstract static class AnalyzableExpression extends Expression
12501264
{
12511265
@Nullable
1252-
protected final Index.Analyzer analyzer;
1266+
protected final Index.Analyzer indexAnalyzer;
1267+
1268+
@Nullable
1269+
protected final Index.Analyzer queryAnalyzer;
12531270

1254-
public AnalyzableExpression(ColumnMetadata column, Operator operator, ByteBuffer value, @Nullable Index.Analyzer analyzer)
1271+
public AnalyzableExpression(ColumnMetadata column,
1272+
Operator operator,
1273+
ByteBuffer value,
1274+
@Nullable Index.Analyzer indexAnalyzer,
1275+
@Nullable Index.Analyzer queryAnalyzer)
12551276
{
12561277
super(column, operator, value);
1257-
this.analyzer = analyzer;
1278+
this.indexAnalyzer = indexAnalyzer;
1279+
this.queryAnalyzer = queryAnalyzer;
1280+
}
1281+
1282+
@Nullable
1283+
public final Index.Analyzer indexAnalyzer()
1284+
{
1285+
return indexAnalyzer;
12581286
}
12591287

12601288
@Nullable
1261-
public final Index.Analyzer analyzer()
1289+
public final Index.Analyzer queryAnalyzer()
12621290
{
1263-
return analyzer;
1291+
return queryAnalyzer;
12641292
}
12651293

12661294
@Override
12671295
public int numFilteredValues()
12681296
{
1269-
return analyzer == null
1297+
return indexAnalyzer == null
12701298
? super.numFilteredValues()
1271-
: analyzer().analyze(value).size();
1299+
: indexAnalyzer().analyze(value).size();
12721300
}
12731301
}
12741302

@@ -1283,10 +1311,11 @@ public static class SimpleExpression extends AnalyzableExpression
12831311
public SimpleExpression(ColumnMetadata column,
12841312
Operator operator,
12851313
ByteBuffer value,
1286-
@Nullable Index.Analyzer analyzer,
1314+
@Nullable Index.Analyzer indexAnalyzer,
1315+
@Nullable Index.Analyzer queryAnalyzer,
12871316
@Nullable ANNOptions annOptions)
12881317
{
1289-
super(column, operator, value, analyzer);
1318+
super(column, operator, value, indexAnalyzer, queryAnalyzer);
12901319
this.annOptions = annOptions;
12911320
}
12921321

@@ -1324,13 +1353,13 @@ public boolean isSatisfiedBy(TableMetadata metadata, DecoratedKey partitionKey,
13241353
return false;
13251354

13261355
ByteBuffer counterValue = LongType.instance.decompose(CounterContext.instance().total(foundValue, ByteBufferAccessor.instance));
1327-
return operator.isSatisfiedBy(LongType.instance, counterValue, value, analyzer);
1356+
return operator.isSatisfiedBy(LongType.instance, counterValue, value, indexAnalyzer, queryAnalyzer);
13281357
}
13291358
else
13301359
{
13311360
// Note that CQL expression are always of the form 'x < 4', i.e. the tested value is on the left.
13321361
ByteBuffer foundValue = getValue(metadata, partitionKey, row, nowInSec);
1333-
return foundValue != null && operator.isSatisfiedBy(column.type, foundValue, value, analyzer);
1362+
return foundValue != null && operator.isSatisfiedBy(column.type, foundValue, value, indexAnalyzer, queryAnalyzer);
13341363
}
13351364
}
13361365
case NEQ:
@@ -1344,7 +1373,7 @@ public boolean isSatisfiedBy(TableMetadata metadata, DecoratedKey partitionKey,
13441373
assert !column.isComplex() : "Only CONTAINS and CONTAINS_KEY are supported for collection types";
13451374
ByteBuffer foundValue = getValue(metadata, partitionKey, row, nowInSec);
13461375
// Note that CQL expression are always of the form 'x < 4', i.e. the tested value is on the left.
1347-
return foundValue != null && operator.isSatisfiedBy(column.type, foundValue, value, analyzer);
1376+
return foundValue != null && operator.isSatisfiedBy(column.type, foundValue, value, indexAnalyzer, queryAnalyzer);
13481377
}
13491378
case CONTAINS:
13501379
return contains(metadata, partitionKey, row, nowInSec);
@@ -1475,9 +1504,10 @@ public MapComparisonExpression(ColumnMetadata column,
14751504
ByteBuffer key,
14761505
Operator operator,
14771506
ByteBuffer value,
1478-
@Nullable Index.Analyzer analyzer)
1507+
@Nullable Index.Analyzer indexAnalyzer,
1508+
@Nullable Index.Analyzer queryAnalyzer)
14791509
{
1480-
super(column, operator, value, analyzer);
1510+
super(column, operator, value, indexAnalyzer, queryAnalyzer);
14811511
assert column.type instanceof MapType && (operator == Operator.EQ || operator == Operator.NEQ || operator.isSlice());
14821512
this.key = key;
14831513
}

src/java/org/apache/cassandra/index/Index.java

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -486,12 +486,23 @@ default RowFilter.CustomExpression customExpressionFor(TableMetadata metadata, B
486486
}
487487

488488
/**
489-
* Returns the {@link Analyzer} for this index, if any. If the index doesn't transform the column values, this
490-
* method will return an empty optional.
489+
* Returns the write-time {@link Analyzer} for this index, if any. If the index doesn't transform the column values,
490+
* this method will return an empty optional.
491491
*
492-
* @return the transforming column value analyzer for the index, if any
492+
* @return the write-time transforming column value analyzer for the index, if any
493493
*/
494-
default Optional<Analyzer> getAnalyzer()
494+
default Optional<Analyzer> getIndexAnalyzer()
495+
{
496+
return Optional.empty();
497+
}
498+
499+
/**
500+
* Returns the query-time {@link Analyzer} for this index, if any. If the index doesn't transform the column values,
501+
* this method will return an empty optional.
502+
*
503+
* @return the query-time transforming column value analyzer for the index, if any
504+
*/
505+
default Optional<Analyzer> getQueryAnalyzer()
495506
{
496507
return Optional.empty();
497508
}

src/java/org/apache/cassandra/index/IndexRegistry.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.util.Optional;
2626
import java.util.Set;
2727
import java.util.concurrent.Callable;
28+
import java.util.function.Function;
2829
import java.util.function.Predicate;
2930
import java.util.function.Supplier;
3031

@@ -326,13 +327,25 @@ default void registerIndex(Index index)
326327
Index getIndex(IndexMetadata indexMetadata);
327328
Collection<Index> listIndexes();
328329

329-
default Optional<Index.Analyzer> getAnalyzerFor(ColumnMetadata column, Operator operator)
330+
default Optional<Index.Analyzer> getIndexAnalyzerFor(ColumnMetadata column, Operator operator)
331+
{
332+
return getAnalyzerFor(column, operator, Index::getIndexAnalyzer);
333+
}
334+
335+
default Optional<Index.Analyzer> getQueryAnalyzerFor(ColumnMetadata column, Operator operator)
336+
{
337+
return getAnalyzerFor(column, operator, Index::getQueryAnalyzer);
338+
}
339+
340+
default Optional<Index.Analyzer> getAnalyzerFor(ColumnMetadata column,
341+
Operator operator,
342+
Function<Index, Optional<Index.Analyzer>> analyzerGetter)
330343
{
331344
for (Index index : listIndexes())
332345
{
333346
if (index.supportsExpression(column, operator))
334347
{
335-
Optional<Index.Analyzer> analyzer = index.getAnalyzer();
348+
Optional<Index.Analyzer> analyzer = analyzerGetter.apply(index);
336349
if (analyzer.isPresent())
337350
return analyzer;
338351
}

src/java/org/apache/cassandra/index/sai/StorageAttachedIndex.java

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -674,26 +674,36 @@ public AbstractType<?> customExpressionValueType()
674674
}
675675

676676
@Override
677-
public Optional<Analyzer> getAnalyzer()
677+
public Optional<Analyzer> getIndexAnalyzer()
678678
{
679-
if (!indexContext.isAnalyzed())
680-
return Optional.empty();
679+
return indexContext.isAnalyzed()
680+
? Optional.of(value -> analyze(indexContext.getAnalyzerFactory(), value))
681+
: Optional.empty();
682+
}
681683

682-
return Optional.of(value -> {
683-
List<ByteBuffer> tokens = new ArrayList<>();
684-
AbstractAnalyzer analyzer = indexContext.getQueryAnalyzerFactory().create();
685-
try
686-
{
687-
analyzer.reset(value);
688-
while (analyzer.hasNext())
689-
tokens.add(analyzer.next());
690-
}
691-
finally
692-
{
693-
analyzer.end();
694-
}
695-
return tokens;
696-
});
684+
@Override
685+
public Optional<Analyzer> getQueryAnalyzer()
686+
{
687+
return indexContext.isAnalyzed()
688+
? Optional.of(value -> analyze(indexContext.getQueryAnalyzerFactory(), value))
689+
: Optional.empty();
690+
}
691+
692+
private static List<ByteBuffer> analyze(AbstractAnalyzer.AnalyzerFactory factory, ByteBuffer value)
693+
{
694+
List<ByteBuffer> tokens = new ArrayList<>();
695+
AbstractAnalyzer analyzer = factory.create();
696+
try
697+
{
698+
analyzer.reset(value.duplicate());
699+
while (analyzer.hasNext())
700+
tokens.add(analyzer.next());
701+
}
702+
finally
703+
{
704+
analyzer.end();
705+
}
706+
return tokens;
697707
}
698708

699709
@Override

src/java/org/apache/cassandra/index/sai/plan/Operation.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,8 @@ static Node buildExpression(QueryController controller, RowFilter.Expression exp
309309
ListSerializer.readValue(expression.getIndexValue(),
310310
ByteBufferAccessor.instance,
311311
offset),
312-
expression.analyzer(),
312+
expression.indexAnalyzer(),
313+
expression.queryAnalyzer(),
313314
expression.annOptions())));
314315
offset += TypeSizes.INT_SIZE + ByteBufferAccessor.instance.getInt(expression.getIndexValue(), offset);
315316
}

test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.junit.Assert;
3535
import org.junit.BeforeClass;
3636

37+
import org.apache.cassandra.cql3.CQLTester;
3738
import org.apache.cassandra.db.marshal.*;
3839
import org.apache.cassandra.distributed.Cluster;
3940
import org.apache.cassandra.distributed.api.Feature;
@@ -210,4 +211,38 @@ void assertCannotStartDueToConfigurationException(Cluster cluster)
210211
Assert.assertEquals(ConfigurationException.class.getName(), tr.getClass().getName());
211212
}
212213
}
214+
215+
/**
216+
* Runs the given function before and after a flush of sstables. This is useful for checking that behavior is
217+
* the same whether data is in memtables or sstables.
218+
*
219+
* @param cluster the tested cluster
220+
* @param keyspace the keyspace to flush
221+
* @param runnable the test to run
222+
*/
223+
public static void beforeAndAfterFlush(Cluster cluster, String keyspace, CQLTester.CheckedFunction runnable) throws Throwable
224+
{
225+
try
226+
{
227+
runnable.apply();
228+
}
229+
catch (Throwable t)
230+
{
231+
throw new AssertionError("Test failed before flush:\n" + t, t);
232+
}
233+
234+
for (int i = 1; i <= cluster.size(); i++)
235+
{
236+
cluster.get(i).flush(keyspace);
237+
238+
try
239+
{
240+
runnable.apply();
241+
}
242+
catch (Throwable t)
243+
{
244+
throw new AssertionError("Test failed after flushing node " + i + ":\n" + t, t);
245+
}
246+
}
247+
}
213248
}

0 commit comments

Comments
 (0)