Skip to content

Commit fc5bb81

Browse files
adelapenadriftx
authored andcommitted
CNDB-13075: Reuse the analyzed tokens of the right operand of an analyzed expression (#1620)
Memoize the analyzed tokens of the right operand of an analyzed expression. Also add some refactoring and simplifications around how analysis is dealt with in `RowFilter` and `Operator`.
1 parent f24d46e commit fc5bb81

File tree

12 files changed

+290
-343
lines changed

12 files changed

+290
-343
lines changed

src/java/org/apache/cassandra/cql3/Operator.java

Lines changed: 55 additions & 186 deletions
Large diffs are not rendered by default.

src/java/org/apache/cassandra/cql3/conditions/ColumnCondition.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ else if (otherValue == null)
257257
// the condition value is not null, so only NEQ can return true
258258
return operator == Operator.NEQ;
259259
}
260-
return operator.isSatisfiedBy(type, otherValue, value, null, null); // We don't use any analyzers in LWT, see CNDB-11658
260+
return operator.isSatisfiedBy(type, otherValue, value); // We don't use any analyzers in LWT, see CNDB-11658
261261
}
262262
}
263263

src/java/org/apache/cassandra/cql3/conditions/ColumnConditions.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ public Set<ColumnMetadata> getAnalyzedColumns(IndexRegistry indexRegistry)
9393

9494
for (ColumnCondition condition : this)
9595
{
96-
if (indexRegistry.getIndexAnalyzerFor(condition.column, condition.operator).isPresent())
96+
if (indexRegistry.getAnalyzerFor(condition.column, condition.operator, null).isPresent())
9797
{
9898
analyzedColumns.add(condition.column);
9999
}

src/java/org/apache/cassandra/cql3/restrictions/SingleColumnRestriction.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1225,7 +1225,7 @@ public void addToRowFilter(RowFilter.Builder filter, IndexRegistry indexRegistry
12251225
{
12261226
var index = findSupportingIndex(indexRegistry);
12271227
var valueBytes = value.bindAndGet(options);
1228-
var terms = index.getQueryAnalyzer().get().analyze(valueBytes);
1228+
var terms = index.getAnalyzer(valueBytes).get().queriedTokens();
12291229
if (terms.isEmpty())
12301230
throw invalidRequest("BM25 query must contain at least one term (perhaps your analyzer is discarding tokens you didn't expect)");
12311231
filter.add(columnDef, Operator.BM25, valueBytes);

src/java/org/apache/cassandra/db/filter/RowFilter.java

Lines changed: 65 additions & 88 deletions
Large diffs are not rendered by default.

src/java/org/apache/cassandra/index/Index.java

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -486,23 +486,12 @@ default RowFilter.CustomExpression customExpressionFor(TableMetadata metadata, B
486486
}
487487

488488
/**
489-
* Returns the write-time {@link Analyzer} for this index, if any. If the index doesn't transform the column values,
489+
* Returns the {@link Analyzer} for this index, if any. If the index doesn't transform the column values,
490490
* this method will return an empty optional.
491491
*
492-
* @return the write-time transforming column value analyzer for the index, if any
492+
* @return the transforming column value analyzer for the index, if any
493493
*/
494-
default Optional<Analyzer> getIndexAnalyzer()
495-
{
496-
return Optional.empty();
497-
}
498-
499-
/**
500-
* Returns the query-time {@link Analyzer} for this index, if any. If the index doesn't transform the column values,
501-
* this method will return an empty optional.
502-
*
503-
* @return the query-time transforming column value analyzer for the index, if any
504-
*/
505-
default Optional<Analyzer> getQueryAnalyzer()
494+
default Optional<Analyzer> getAnalyzer(ByteBuffer queriedValue)
506495
{
507496
return Optional.empty();
508497
}
@@ -514,10 +503,11 @@ default Optional<Analyzer> getQueryAnalyzer()
514503
* index. It can be used to perform the same transformation on values that the index does when indexing. That way,
515504
* the CQL operator can replicate the index behaviour when filtering results.
516505
*/
517-
@FunctionalInterface
518506
interface Analyzer
519507
{
520-
List<ByteBuffer> analyze(ByteBuffer value);
508+
List<ByteBuffer> indexedTokens(ByteBuffer indexedValue);
509+
510+
List<ByteBuffer> queriedTokens();
521511
}
522512

523513
/**

src/java/org/apache/cassandra/index/IndexRegistry.java

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
*/
2121
package org.apache.cassandra.index;
2222

23+
import java.nio.ByteBuffer;
2324
import java.util.Collection;
2425
import java.util.Collections;
2526
import java.util.Optional;
@@ -315,25 +316,13 @@ default void registerIndex(Index index)
315316
Index getIndex(IndexMetadata indexMetadata);
316317
Collection<Index> listIndexes();
317318

318-
default Optional<Index.Analyzer> getIndexAnalyzerFor(ColumnMetadata column, Operator operator)
319-
{
320-
return getAnalyzerFor(column, operator, Index::getIndexAnalyzer);
321-
}
322-
323-
default Optional<Index.Analyzer> getQueryAnalyzerFor(ColumnMetadata column, Operator operator)
324-
{
325-
return getAnalyzerFor(column, operator, Index::getQueryAnalyzer);
326-
}
327-
328-
default Optional<Index.Analyzer> getAnalyzerFor(ColumnMetadata column,
329-
Operator operator,
330-
Function<Index, Optional<Index.Analyzer>> analyzerGetter)
319+
default Optional<Index.Analyzer> getAnalyzerFor(ColumnMetadata column, Operator operator, ByteBuffer value)
331320
{
332321
for (Index index : listIndexes())
333322
{
334323
if (index.supportsExpression(column, operator))
335324
{
336-
Optional<Index.Analyzer> analyzer = analyzerGetter.apply(index);
325+
Optional<Index.Analyzer> analyzer = index.getAnalyzer(value);
337326
if (analyzer.isPresent())
338327
return analyzer;
339328
}

src/java/org/apache/cassandra/index/sai/StorageAttachedIndex.java

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -681,23 +681,34 @@ public AbstractType<?> customExpressionValueType()
681681
}
682682

683683
@Override
684-
public Optional<Analyzer> getIndexAnalyzer()
684+
public Optional<Analyzer> getAnalyzer(ByteBuffer queriedValue)
685685
{
686-
return indexContext.isAnalyzed()
687-
? Optional.of(value -> analyze(indexContext.getAnalyzerFactory(), value))
688-
: Optional.empty();
689-
}
686+
if (!indexContext.isAnalyzed())
687+
return Optional.empty();
690688

691-
@Override
692-
public Optional<Analyzer> getQueryAnalyzer()
693-
{
694-
return indexContext.isAnalyzed()
695-
? Optional.of(value -> analyze(indexContext.getQueryAnalyzerFactory(), value))
696-
: Optional.empty();
689+
// memoize the analyzed queried value, so we don't have to re-analyze it for every evaluated column value
690+
List<ByteBuffer> queriedTokens = analyze(indexContext.getQueryAnalyzerFactory(), queriedValue);
691+
692+
return Optional.of(new Analyzer() {
693+
@Override
694+
public List<ByteBuffer> indexedTokens(ByteBuffer value)
695+
{
696+
return analyze(indexContext.getAnalyzerFactory(), value);
697+
}
698+
699+
@Override
700+
public List<ByteBuffer> queriedTokens()
701+
{
702+
return queriedTokens;
703+
}
704+
});
697705
}
698706

699707
private static List<ByteBuffer> analyze(AbstractAnalyzer.AnalyzerFactory factory, ByteBuffer value)
700708
{
709+
if (value == null)
710+
return null;
711+
701712
List<ByteBuffer> tokens = new ArrayList<>();
702713
AbstractAnalyzer analyzer = factory.create();
703714
try

src/java/org/apache/cassandra/index/sai/plan/Operation.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -310,8 +310,7 @@ static Node buildExpression(QueryController controller, RowFilter.Expression exp
310310
ListSerializer.readValue(expression.getIndexValue(),
311311
ByteBufferAccessor.instance,
312312
offset),
313-
expression.indexAnalyzer(),
314-
expression.queryAnalyzer(),
313+
expression.analyzer(),
315314
expression.annOptions())));
316315
offset += TypeSizes.INT_SIZE + ByteBufferAccessor.instance.getInt(expression.getIndexValue(), offset);
317316
}

src/java/org/apache/cassandra/index/sai/utils/PrimaryKeyWithSortKey.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,13 @@ public boolean isIndexDataValid(Row row, long nowInSecs)
6969
var cell = row.getCell(context.getDefinition());
7070
if (!cell.isLive(nowInSecs))
7171
return false;
72-
assert cell instanceof CellWithSourceTable : "Expected CellWithSource, got " + cell.getClass();
72+
// Check if the row is wrapped and if not, skip the source table check
73+
if (!(cell instanceof CellWithSourceTable))
74+
{
75+
// If the cell is not wrapped, we can't validate the source table,
76+
// so we just check if the index data matches the live data
77+
return isIndexDataEqualToLiveData(cell.buffer());
78+
}
7379
return sourceTable.equals(((CellWithSourceTable<?>) cell).sourceTable())
7480
&& isIndexDataEqualToLiveData(cell.buffer());
7581
}

0 commit comments

Comments
 (0)