Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion src/java/org/apache/cassandra/cql3/Operator.java
Original file line number Diff line number Diff line change
Expand Up @@ -490,9 +490,17 @@ public boolean isSatisfiedBy(AbstractType<?> type,
{
assert indexAnalyzer != null && queryAnalyzer != null : ": operation can only be computed by an indexed column with a configured analyzer";

List<ByteBuffer> leftTokens = indexAnalyzer.analyze(leftOperand);
List<ByteBuffer> rightTokens = queryAnalyzer.analyze(rightOperand);
return isSatisfiedBy(type, leftOperand, rightTokens, indexAnalyzer);
}

@Override
public boolean isSatisfiedBy(AbstractType<?> type,
ByteBuffer leftValue,
List<ByteBuffer> rightTokens,
Index.Analyzer indexAnalyzer)
{
List<ByteBuffer> leftTokens = indexAnalyzer.analyze(leftValue);
Iterator<ByteBuffer> it = rightTokens.iterator();

do
Expand Down Expand Up @@ -641,6 +649,22 @@ public abstract boolean isSatisfiedBy(AbstractType<?> type,
ByteBuffer rightOperand,
@Nullable Index.Analyzer indexAnalyzer,
@Nullable Index.Analyzer queryAnalyzer);
/**
* Whether 2 analyzable values satisfy this operator (given the type they should be compared with).
*
* @param type the type of the values to compare.
* @param leftOperand the left operand of the comparison.
* @param rightTokens the right operand of the comparison decomposed as analyzed tokens.
* @param indexAnalyzer an index-provided function to transform the left-side compared value before comparison,
* it shouldn't be {@code null}.
*/
public boolean isSatisfiedBy(AbstractType<?> type,
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This new method can be used in a future improvement patch to cache the tokens of the right, fixed part of the operation in the calling RowFilter.Expression. That way we wouldn't need to analyze that fixed part once and again for every evaluated row. I'm leaving that improvement out of this ticket so we can focus on fixing the correctness bug as soon as possible.

ByteBuffer leftOperand,
List<ByteBuffer> rightTokens,
Index.Analyzer indexAnalyzer)
{
throw new UnsupportedOperationException();

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the record - Test coverage complains we don't cover this.....

}

public int serializedSize()
{
Expand Down
48 changes: 26 additions & 22 deletions src/java/org/apache/cassandra/db/filter/RowFilter.java
Original file line number Diff line number Diff line change
Expand Up @@ -1260,22 +1260,28 @@ public boolean isSatisfiedBy(TableMetadata metadata, DecoratedKey partitionKey,
private boolean contains(TableMetadata metadata, DecoratedKey partitionKey, Row row)
{
assert column.type.isCollection();
CollectionType<?> type = (CollectionType<?>)column.type;
assert (indexAnalyzer == null) == (queryAnalyzer == null);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the assertion test coverage complains about

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is something that shouldn't happen given how the expressions are built (see here). I can only reproduce this by convoluted synthetic means which probably don't give too much value.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my opinion, the point of assertions is to test invariants that are not currently reachable but might become reachable in the future but that would break this code.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my opinion, the point of assertions is to test invariants that are not currently reachable but might become reachable in the future but that would break this code.

Exactly, as mentioned in Slack I marked them mostly for history.
If later someone wants to check the coverage for an old PR - they see only the percentage and they cannot get to the details in Sonar anymore.


CollectionType<?> type = (CollectionType<?>) column.type;
List<ByteBuffer> analyzedValues = queryAnalyzer == null ? null : queryAnalyzer.analyze(value);

if (column.isComplex())
{
ComplexColumnData complexData = row.getComplexColumnData(column);
if (complexData != null)
{
AbstractType<?> elementType = type.kind == CollectionType.Kind.SET ? type.nameComparator() : type.valueComparator();
for (Cell<?> cell : complexData)
{
if (type.kind == CollectionType.Kind.SET)
ByteBuffer elementValue = type.kind == CollectionType.Kind.SET ? cell.path().get(0) : cell.buffer();
if (analyzedValues == null)
{
if (type.nameComparator().compare(cell.path().get(0), value) == 0)
if (elementType.compare(elementValue, value) == 0)
return true;
}
else
{
if (type.valueComparator().compare(cell.buffer(), value) == 0)
if (Operator.ANALYZER_MATCHES.isSatisfiedBy(elementType, elementValue, analyzedValues, indexAnalyzer))
return true;
}
}
Expand All @@ -1285,37 +1291,35 @@ private boolean contains(TableMetadata metadata, DecoratedKey partitionKey, Row
else
{
ByteBuffer foundValue = getValue(metadata, partitionKey, row);
if (foundValue == null)
return false;

switch (type.kind)
{
case LIST:
ListType<?> listType = (ListType<?>)type;
return listType.compose(foundValue).contains(listType.getElementsType().compose(value));
case SET:
SetType<?> setType = (SetType<?>)type;
return setType.compose(foundValue).contains(setType.getElementsType().compose(value));
case MAP:
MapType<?,?> mapType = (MapType<?, ?>)type;
return mapType.compose(foundValue).containsValue(mapType.getValuesType().compose(value));
}
throw new AssertionError();
return foundValue != null && Operator.CONTAINS.isSatisfiedBy(type, foundValue, value, indexAnalyzer, queryAnalyzer);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is so much better!

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test coverage complains we cover 3 of 4 conditions only

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason we cannot do a similar logical simplification in the column.isComplex() case?

}
}

private boolean containsKey(TableMetadata metadata, DecoratedKey partitionKey, Row row)
{
assert column.type.isCollection() && column.type instanceof MapType;
MapType<?, ?> mapType = (MapType<?, ?>)column.type;
MapType<?, ?> mapType = (MapType<?, ?>) column.type;
if (column.isComplex())
{
if (queryAnalyzer != null)
{
assert indexAnalyzer != null;
List<ByteBuffer> values = queryAnalyzer.analyze(value);
for (Cell<?> cell : row.getComplexColumnData(column))
{
AbstractType<?> elementType = mapType.nameComparator();
ByteBuffer elementValue = cell.path().get(0);
if (Operator.ANALYZER_MATCHES.isSatisfiedBy(elementType, elementValue, values, indexAnalyzer))
return true;
}
return false;
}
return row.getCell(column, CellPath.create(value)) != null;
}
else
{
ByteBuffer foundValue = getValue(metadata, partitionKey, row);
return foundValue != null && mapType.getSerializer().getSerializedValue(foundValue, value, mapType.getKeysType()) != null;
return foundValue != null && Operator.CONTAINS_KEY.isSatisfiedBy(mapType, foundValue, value, indexAnalyzer, queryAnalyzer);
}
}

Expand Down
165 changes: 165 additions & 0 deletions test/unit/org/apache/cassandra/index/sai/cql/LuceneAnalyzerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -863,6 +863,171 @@ public void testClientWarningOnNGram()
" \"filters\":[{\"name\":\"porterstem\"}]}'}");
}

@Test
public void testAnalyzerOnSet() throws Throwable
{
createTable("CREATE TABLE %s (id text PRIMARY KEY, genres set<text>)");
execute("INSERT INTO %s (id, genres) VALUES ('1', {'Horror', 'comedy'})");

assertRowsNet(executeNet("SELECT id FROM %s WHERE genres CONTAINS 'Horror' ALLOW FILTERING"), row("1"));
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres NOT CONTAINS 'Horror' ALLOW FILTERING"));
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres CONTAINS 'horror' ALLOW FILTERING"));
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres NOT CONTAINS 'horror' ALLOW FILTERING"), row("1"));

createIndex("CREATE CUSTOM INDEX ON %s(genres) USING 'StorageAttachedIndex' WITH OPTIONS = { 'index_analyzer':'STANDARD'}");

beforeAndAfterFlush(() -> {
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres CONTAINS 'horror'"), row("1"));
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres NOT CONTAINS 'horror'"));
});
}

@Test
public void testAnalyzerOnSetWithDistinctQueryAnalyzer() throws Throwable
{
createTable("CREATE TABLE %s (k int, c int, v set<text>, PRIMARY KEY(k, c))");
createIndex("CREATE CUSTOM INDEX ON %s(v) USING 'StorageAttachedIndex' WITH OPTIONS = {" +
"'index_analyzer': '{" +
" \"tokenizer\" : { \"name\" : \"whitespace\", \"args\" : {} }," +
" \"filters\" : [ { \"name\" : \"lowercase\", \"args\": {} }, " +
" { \"name\" : \"edgengram\", \"args\": { \"minGramSize\":\"1\", \"maxGramSize\":\"30\" } }]," +
" \"charFilters\" : []}', " +
"'query_analyzer': '{" +
" \"tokenizer\" : { \"name\" : \"whitespace\", \"args\" : {} }," +
" \"filters\" : [ {\"name\" : \"lowercase\",\"args\": {}} ]}'}");

execute("INSERT INTO %s (k, c, v) VALUES (0, 1, {'astra quick fox', 'astra quick foxes', 'astra4', 'astra5 -1@a#', 'lazy dog'})");
execute("INSERT INTO %s (k, c, v) VALUES (0, 2, {'astra quick fox'})");
execute("INSERT INTO %s (k, c, v) VALUES (0, 3, {'astra quick foxes'})");
execute("INSERT INTO %s (k, c, v) VALUES (0, 4, {'astra4'})");
execute("INSERT INTO %s (k, c, v) VALUES (0, 5, {'astra5 -1@a#'})");
execute("INSERT INTO %s (k, c, v) VALUES (0, 6, {'lazy dog'})");

beforeAndAfterFlush(() -> {
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS 'ast'"), row(1), row(2), row(3), row(4), row(5));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS 'astra'"), row(1), row(2), row(3), row(4), row(5));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS 'astra4'"), row(1), row(4));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS 'astra5'"), row(1), row(5));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS 'astra9'"));

assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS 'ast'"), row(6));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS 'astra'"), row(6));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS 'astra4'"), row(2), row(3), row(5), row(6));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS 'astra5'"), row(2), row(3), row(4), row(6));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS 'astra9'"), row(1), row(2), row(3), row(4), row(5), row(6));
});
}

@Test
public void testAnalyzerOnList() throws Throwable
{
createTable("CREATE TABLE %s (id text PRIMARY KEY, genres list<text>)");
execute("INSERT INTO %s (id, genres) VALUES ('1', ['Horror', 'comedy'])");

assertRowsNet(executeNet("SELECT id FROM %s WHERE genres CONTAINS 'Horror' ALLOW FILTERING"), row("1"));
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres NOT CONTAINS 'Horror' ALLOW FILTERING"));
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres CONTAINS 'horror' ALLOW FILTERING"));
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres NOT CONTAINS 'horror' ALLOW FILTERING"), row("1"));

createIndex("CREATE CUSTOM INDEX ON %s(genres) USING 'StorageAttachedIndex' WITH OPTIONS = { 'index_analyzer':'STANDARD'}");

beforeAndAfterFlush(() -> {
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres CONTAINS 'horror'"), row("1"));
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres NOT CONTAINS 'horror'"));
});
}

@Test
public void testAnalyzerOnListWithDistinctQueryAnalyzer() throws Throwable
{
createTable("CREATE TABLE %s (k int, c int, v list<text>, PRIMARY KEY(k, c))");
createIndex("CREATE CUSTOM INDEX ON %s(v) USING 'StorageAttachedIndex' WITH OPTIONS = {" +
"'index_analyzer': '{" +
" \"tokenizer\" : { \"name\" : \"whitespace\", \"args\" : {} }," +
" \"filters\" : [ { \"name\" : \"lowercase\", \"args\": {} }, " +
" { \"name\" : \"edgengram\", \"args\": { \"minGramSize\":\"1\", \"maxGramSize\":\"30\" } }]," +
" \"charFilters\" : []}', " +
"'query_analyzer': '{" +
" \"tokenizer\" : { \"name\" : \"whitespace\", \"args\" : {} }," +
" \"filters\" : [ {\"name\" : \"lowercase\",\"args\": {}} ]}'}");

execute("INSERT INTO %s (k, c, v) VALUES (0, 1, ['astra quick fox', 'astra quick foxes', 'astra4', 'astra5 -1@a#', 'lazy dog'])");
execute("INSERT INTO %s (k, c, v) VALUES (0, 2, ['astra quick fox'])");
execute("INSERT INTO %s (k, c, v) VALUES (0, 3, ['astra quick foxes'])");
execute("INSERT INTO %s (k, c, v) VALUES (0, 4, ['astra4'])");
execute("INSERT INTO %s (k, c, v) VALUES (0, 5, ['astra5 -1@a#'])");
execute("INSERT INTO %s (k, c, v) VALUES (0, 6, ['lazy dog'])");

beforeAndAfterFlush(() -> {
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS 'ast'"), row(1), row(2), row(3), row(4), row(5));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS 'astra'"), row(1), row(2), row(3), row(4), row(5));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS 'astra4'"), row(1), row(4));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS 'astra5'"), row(1), row(5));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS 'astra9'"));

assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS 'ast'"), row(6));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS 'astra'"), row(6));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS 'astra4'"), row(2), row(3), row(5), row(6));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS 'astra5'"), row(2), row(3), row(4), row(6));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS 'astra9'"), row(1), row(2), row(3), row(4), row(5), row(6));
});
}

@Test
public void testAnalyzerOnMapKeys() throws Throwable
{
createTable("CREATE TABLE %s (id text PRIMARY KEY, genres map<text, int>)");
execute("INSERT INTO %s (id, genres) VALUES ('1', {'Horror' : 1, 'comedy' : 2})");

assertRowsNet(executeNet("SELECT id FROM %s WHERE genres CONTAINS KEY 'Horror' ALLOW FILTERING"), row("1"));
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres NOT CONTAINS KEY 'Horror' ALLOW FILTERING"));
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres CONTAINS KEY 'horror' ALLOW FILTERING"));
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres NOT CONTAINS KEY 'horror' ALLOW FILTERING"), row("1"));

createIndex("CREATE CUSTOM INDEX ON %s(KEYS(genres)) USING 'StorageAttachedIndex' WITH OPTIONS = { 'index_analyzer':'STANDARD'}");

beforeAndAfterFlush(() -> {
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres CONTAINS KEY 'horror'"), row("1"));
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres NOT CONTAINS KEY 'horror'"));
});
}

@Test
public void testAnalyzerOnMapKeysWithDistinctQueryAnalyzer() throws Throwable
{
createTable("CREATE TABLE %s (k int, c int, v map<text, int>, PRIMARY KEY(k, c))");
createIndex("CREATE CUSTOM INDEX ON %s(KEYS(v)) USING 'StorageAttachedIndex' WITH OPTIONS = {" +
"'index_analyzer': '{" +
" \"tokenizer\" : { \"name\" : \"whitespace\", \"args\" : {} }," +
" \"filters\" : [ { \"name\" : \"lowercase\", \"args\": {} }, " +
" { \"name\" : \"edgengram\", \"args\": { \"minGramSize\":\"1\", \"maxGramSize\":\"30\" } }]," +
" \"charFilters\" : []}', " +
"'query_analyzer': '{" +
" \"tokenizer\" : { \"name\" : \"whitespace\", \"args\" : {} }," +
" \"filters\" : [ {\"name\" : \"lowercase\",\"args\": {}} ]}'}");

execute("INSERT INTO %s (k, c, v) VALUES (0, 1, {'astra quick fox':0, 'astra quick foxes':0, 'astra4':0, 'astra5 -1@a#':0, 'lazy dog':0})");
execute("INSERT INTO %s (k, c, v) VALUES (0, 2, {'astra quick fox':0})");
execute("INSERT INTO %s (k, c, v) VALUES (0, 3, {'astra quick foxes':0})");
execute("INSERT INTO %s (k, c, v) VALUES (0, 4, {'astra4':0})");
execute("INSERT INTO %s (k, c, v) VALUES (0, 5, {'astra5 -1@a#':0})");
execute("INSERT INTO %s (k, c, v) VALUES (0, 6, {'lazy dog':0})");

beforeAndAfterFlush(() -> {
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS KEY 'ast'"), row(1), row(2), row(3), row(4), row(5));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS KEY 'astra'"), row(1), row(2), row(3), row(4), row(5));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS KEY 'astra4'"), row(1), row(4));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS KEY 'astra5'"), row(1), row(5));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS KEY 'astra9'"));

assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS KEY 'ast'"), row(6));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS KEY 'astra'"), row(6));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS KEY 'astra4'"), row(2), row(3), row(5), row(6));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS KEY 'astra5'"), row(2), row(3), row(4), row(6));
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS KEY 'astra9'"), row(1), row(2), row(3), row(4), row(5), row(6));
});
}

private void assertClientWarningOnNGram(String indexOptions)
{
createIndexFromOptions(indexOptions);
Expand Down