Skip to content

Commit 44d9a62

Browse files
adelapenadjatnieks
authored andcommitted
CNDB-13022: Fix analyzers for collections (#1583)
Fix `RowFilter` application of analyzers to queries on collections. `RowFilter.SimpleExpression#isSatisfiedBy` for `CONTAINS` and `CONTAINS_KEY` operators should extract the collection values and check them with `Operator.ANALYZER_MATCHES` rather than with equality.
1 parent 60a1245 commit 44d9a62

File tree

3 files changed

+216
-23
lines changed

3 files changed

+216
-23
lines changed

src/java/org/apache/cassandra/cql3/Operator.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -490,9 +490,17 @@ public boolean isSatisfiedBy(AbstractType<?> type,
490490
{
491491
assert indexAnalyzer != null && queryAnalyzer != null : ": operation can only be computed by an indexed column with a configured analyzer";
492492

493-
List<ByteBuffer> leftTokens = indexAnalyzer.analyze(leftOperand);
494493
List<ByteBuffer> rightTokens = queryAnalyzer.analyze(rightOperand);
494+
return isSatisfiedBy(type, leftOperand, rightTokens, indexAnalyzer);
495+
}
495496

497+
@Override
498+
public boolean isSatisfiedBy(AbstractType<?> type,
499+
ByteBuffer leftValue,
500+
List<ByteBuffer> rightTokens,
501+
Index.Analyzer indexAnalyzer)
502+
{
503+
List<ByteBuffer> leftTokens = indexAnalyzer.analyze(leftValue);
496504
Iterator<ByteBuffer> it = rightTokens.iterator();
497505

498506
do
@@ -640,6 +648,22 @@ public abstract boolean isSatisfiedBy(AbstractType<?> type,
640648
ByteBuffer rightOperand,
641649
@Nullable Index.Analyzer indexAnalyzer,
642650
@Nullable Index.Analyzer queryAnalyzer);
651+
/**
652+
* Whether 2 analyzable values satisfy this operator (given the type they should be compared with).
653+
*
654+
* @param type the type of the values to compare.
655+
* @param leftOperand the left operand of the comparison.
656+
* @param rightTokens the right operand of the comparison decomposed as analyzed tokens.
657+
* @param indexAnalyzer an index-provided function to transform the left-side compared value before comparison,
658+
* it shouldn't be {@code null}.
659+
*/
660+
public boolean isSatisfiedBy(AbstractType<?> type,
661+
ByteBuffer leftOperand,
662+
List<ByteBuffer> rightTokens,
663+
Index.Analyzer indexAnalyzer)
664+
{
665+
throw new UnsupportedOperationException();
666+
}
643667

644668
public int serializedSize()
645669
{

src/java/org/apache/cassandra/db/filter/RowFilter.java

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1372,22 +1372,28 @@ public boolean isSatisfiedBy(TableMetadata metadata, DecoratedKey partitionKey,
13721372
private boolean contains(TableMetadata metadata, DecoratedKey partitionKey, Row row)
13731373
{
13741374
assert column.type.isCollection();
1375-
CollectionType<?> type = (CollectionType<?>)column.type;
1375+
assert (indexAnalyzer == null) == (queryAnalyzer == null);
1376+
1377+
CollectionType<?> type = (CollectionType<?>) column.type;
1378+
List<ByteBuffer> analyzedValues = queryAnalyzer == null ? null : queryAnalyzer.analyze(value);
1379+
13761380
if (column.isComplex())
13771381
{
13781382
ComplexColumnData complexData = row.getComplexColumnData(column);
13791383
if (complexData != null)
13801384
{
1385+
AbstractType<?> elementType = type.kind == CollectionType.Kind.SET ? type.nameComparator() : type.valueComparator();
13811386
for (Cell<?> cell : complexData)
13821387
{
1383-
if (type.kind == CollectionType.Kind.SET)
1388+
ByteBuffer elementValue = type.kind == CollectionType.Kind.SET ? cell.path().get(0) : cell.buffer();
1389+
if (analyzedValues == null)
13841390
{
1385-
if (type.nameComparator().compare(cell.path().get(0), value) == 0)
1391+
if (elementType.compare(elementValue, value) == 0)
13861392
return true;
13871393
}
13881394
else
13891395
{
1390-
if (type.valueComparator().compare(cell.buffer(), value) == 0)
1396+
if (Operator.ANALYZER_MATCHES.isSatisfiedBy(elementType, elementValue, analyzedValues, indexAnalyzer))
13911397
return true;
13921398
}
13931399
}
@@ -1397,37 +1403,35 @@ private boolean contains(TableMetadata metadata, DecoratedKey partitionKey, Row
13971403
else
13981404
{
13991405
ByteBuffer foundValue = getValue(metadata, partitionKey, row);
1400-
if (foundValue == null)
1401-
return false;
1402-
1403-
switch (type.kind)
1404-
{
1405-
case LIST:
1406-
ListType<?> listType = (ListType<?>)type;
1407-
return listType.compose(foundValue).contains(listType.getElementsType().compose(value));
1408-
case SET:
1409-
SetType<?> setType = (SetType<?>)type;
1410-
return setType.compose(foundValue).contains(setType.getElementsType().compose(value));
1411-
case MAP:
1412-
MapType<?,?> mapType = (MapType<?, ?>)type;
1413-
return mapType.compose(foundValue).containsValue(mapType.getValuesType().compose(value));
1414-
}
1415-
throw new AssertionError();
1406+
return foundValue != null && Operator.CONTAINS.isSatisfiedBy(type, foundValue, value, indexAnalyzer, queryAnalyzer);
14161407
}
14171408
}
14181409

14191410
private boolean containsKey(TableMetadata metadata, DecoratedKey partitionKey, Row row)
14201411
{
14211412
assert column.type.isCollection() && column.type instanceof MapType;
1422-
MapType<?, ?> mapType = (MapType<?, ?>)column.type;
1413+
MapType<?, ?> mapType = (MapType<?, ?>) column.type;
14231414
if (column.isComplex())
14241415
{
1416+
if (queryAnalyzer != null)
1417+
{
1418+
assert indexAnalyzer != null;
1419+
List<ByteBuffer> values = queryAnalyzer.analyze(value);
1420+
for (Cell<?> cell : row.getComplexColumnData(column))
1421+
{
1422+
AbstractType<?> elementType = mapType.nameComparator();
1423+
ByteBuffer elementValue = cell.path().get(0);
1424+
if (Operator.ANALYZER_MATCHES.isSatisfiedBy(elementType, elementValue, values, indexAnalyzer))
1425+
return true;
1426+
}
1427+
return false;
1428+
}
14251429
return row.getCell(column, CellPath.create(value)) != null;
14261430
}
14271431
else
14281432
{
14291433
ByteBuffer foundValue = getValue(metadata, partitionKey, row);
1430-
return foundValue != null && mapType.getSerializer().getSerializedValue(foundValue, value, mapType.getKeysType()) != null;
1434+
return foundValue != null && Operator.CONTAINS_KEY.isSatisfiedBy(mapType, foundValue, value, indexAnalyzer, queryAnalyzer);
14311435
}
14321436
}
14331437

test/unit/org/apache/cassandra/index/sai/cql/LuceneAnalyzerTest.java

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -862,6 +862,171 @@ public void testClientWarningOnNGram()
862862
" \"filters\":[{\"name\":\"porterstem\"}]}'}");
863863
}
864864

865+
@Test
866+
public void testAnalyzerOnSet() throws Throwable
867+
{
868+
createTable("CREATE TABLE %s (id text PRIMARY KEY, genres set<text>)");
869+
execute("INSERT INTO %s (id, genres) VALUES ('1', {'Horror', 'comedy'})");
870+
871+
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres CONTAINS 'Horror' ALLOW FILTERING"), row("1"));
872+
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres NOT CONTAINS 'Horror' ALLOW FILTERING"));
873+
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres CONTAINS 'horror' ALLOW FILTERING"));
874+
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres NOT CONTAINS 'horror' ALLOW FILTERING"), row("1"));
875+
876+
createIndex("CREATE CUSTOM INDEX ON %s(genres) USING 'StorageAttachedIndex' WITH OPTIONS = { 'index_analyzer':'STANDARD'}");
877+
878+
beforeAndAfterFlush(() -> {
879+
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres CONTAINS 'horror'"), row("1"));
880+
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres NOT CONTAINS 'horror'"));
881+
});
882+
}
883+
884+
@Test
885+
public void testAnalyzerOnSetWithDistinctQueryAnalyzer() throws Throwable
886+
{
887+
createTable("CREATE TABLE %s (k int, c int, v set<text>, PRIMARY KEY(k, c))");
888+
createIndex("CREATE CUSTOM INDEX ON %s(v) USING 'StorageAttachedIndex' WITH OPTIONS = {" +
889+
"'index_analyzer': '{" +
890+
" \"tokenizer\" : { \"name\" : \"whitespace\", \"args\" : {} }," +
891+
" \"filters\" : [ { \"name\" : \"lowercase\", \"args\": {} }, " +
892+
" { \"name\" : \"edgengram\", \"args\": { \"minGramSize\":\"1\", \"maxGramSize\":\"30\" } }]," +
893+
" \"charFilters\" : []}', " +
894+
"'query_analyzer': '{" +
895+
" \"tokenizer\" : { \"name\" : \"whitespace\", \"args\" : {} }," +
896+
" \"filters\" : [ {\"name\" : \"lowercase\",\"args\": {}} ]}'}");
897+
898+
execute("INSERT INTO %s (k, c, v) VALUES (0, 1, {'astra quick fox', 'astra quick foxes', 'astra4', 'astra5 -1@a#', 'lazy dog'})");
899+
execute("INSERT INTO %s (k, c, v) VALUES (0, 2, {'astra quick fox'})");
900+
execute("INSERT INTO %s (k, c, v) VALUES (0, 3, {'astra quick foxes'})");
901+
execute("INSERT INTO %s (k, c, v) VALUES (0, 4, {'astra4'})");
902+
execute("INSERT INTO %s (k, c, v) VALUES (0, 5, {'astra5 -1@a#'})");
903+
execute("INSERT INTO %s (k, c, v) VALUES (0, 6, {'lazy dog'})");
904+
905+
beforeAndAfterFlush(() -> {
906+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS 'ast'"), row(1), row(2), row(3), row(4), row(5));
907+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS 'astra'"), row(1), row(2), row(3), row(4), row(5));
908+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS 'astra4'"), row(1), row(4));
909+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS 'astra5'"), row(1), row(5));
910+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS 'astra9'"));
911+
912+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS 'ast'"), row(6));
913+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS 'astra'"), row(6));
914+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS 'astra4'"), row(2), row(3), row(5), row(6));
915+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS 'astra5'"), row(2), row(3), row(4), row(6));
916+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS 'astra9'"), row(1), row(2), row(3), row(4), row(5), row(6));
917+
});
918+
}
919+
920+
@Test
921+
public void testAnalyzerOnList() throws Throwable
922+
{
923+
createTable("CREATE TABLE %s (id text PRIMARY KEY, genres list<text>)");
924+
execute("INSERT INTO %s (id, genres) VALUES ('1', ['Horror', 'comedy'])");
925+
926+
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres CONTAINS 'Horror' ALLOW FILTERING"), row("1"));
927+
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres NOT CONTAINS 'Horror' ALLOW FILTERING"));
928+
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres CONTAINS 'horror' ALLOW FILTERING"));
929+
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres NOT CONTAINS 'horror' ALLOW FILTERING"), row("1"));
930+
931+
createIndex("CREATE CUSTOM INDEX ON %s(genres) USING 'StorageAttachedIndex' WITH OPTIONS = { 'index_analyzer':'STANDARD'}");
932+
933+
beforeAndAfterFlush(() -> {
934+
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres CONTAINS 'horror'"), row("1"));
935+
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres NOT CONTAINS 'horror'"));
936+
});
937+
}
938+
939+
@Test
940+
public void testAnalyzerOnListWithDistinctQueryAnalyzer() throws Throwable
941+
{
942+
createTable("CREATE TABLE %s (k int, c int, v list<text>, PRIMARY KEY(k, c))");
943+
createIndex("CREATE CUSTOM INDEX ON %s(v) USING 'StorageAttachedIndex' WITH OPTIONS = {" +
944+
"'index_analyzer': '{" +
945+
" \"tokenizer\" : { \"name\" : \"whitespace\", \"args\" : {} }," +
946+
" \"filters\" : [ { \"name\" : \"lowercase\", \"args\": {} }, " +
947+
" { \"name\" : \"edgengram\", \"args\": { \"minGramSize\":\"1\", \"maxGramSize\":\"30\" } }]," +
948+
" \"charFilters\" : []}', " +
949+
"'query_analyzer': '{" +
950+
" \"tokenizer\" : { \"name\" : \"whitespace\", \"args\" : {} }," +
951+
" \"filters\" : [ {\"name\" : \"lowercase\",\"args\": {}} ]}'}");
952+
953+
execute("INSERT INTO %s (k, c, v) VALUES (0, 1, ['astra quick fox', 'astra quick foxes', 'astra4', 'astra5 -1@a#', 'lazy dog'])");
954+
execute("INSERT INTO %s (k, c, v) VALUES (0, 2, ['astra quick fox'])");
955+
execute("INSERT INTO %s (k, c, v) VALUES (0, 3, ['astra quick foxes'])");
956+
execute("INSERT INTO %s (k, c, v) VALUES (0, 4, ['astra4'])");
957+
execute("INSERT INTO %s (k, c, v) VALUES (0, 5, ['astra5 -1@a#'])");
958+
execute("INSERT INTO %s (k, c, v) VALUES (0, 6, ['lazy dog'])");
959+
960+
beforeAndAfterFlush(() -> {
961+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS 'ast'"), row(1), row(2), row(3), row(4), row(5));
962+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS 'astra'"), row(1), row(2), row(3), row(4), row(5));
963+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS 'astra4'"), row(1), row(4));
964+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS 'astra5'"), row(1), row(5));
965+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS 'astra9'"));
966+
967+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS 'ast'"), row(6));
968+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS 'astra'"), row(6));
969+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS 'astra4'"), row(2), row(3), row(5), row(6));
970+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS 'astra5'"), row(2), row(3), row(4), row(6));
971+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS 'astra9'"), row(1), row(2), row(3), row(4), row(5), row(6));
972+
});
973+
}
974+
975+
@Test
976+
public void testAnalyzerOnMapKeys() throws Throwable
977+
{
978+
createTable("CREATE TABLE %s (id text PRIMARY KEY, genres map<text, int>)");
979+
execute("INSERT INTO %s (id, genres) VALUES ('1', {'Horror' : 1, 'comedy' : 2})");
980+
981+
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres CONTAINS KEY 'Horror' ALLOW FILTERING"), row("1"));
982+
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres NOT CONTAINS KEY 'Horror' ALLOW FILTERING"));
983+
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres CONTAINS KEY 'horror' ALLOW FILTERING"));
984+
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres NOT CONTAINS KEY 'horror' ALLOW FILTERING"), row("1"));
985+
986+
createIndex("CREATE CUSTOM INDEX ON %s(KEYS(genres)) USING 'StorageAttachedIndex' WITH OPTIONS = { 'index_analyzer':'STANDARD'}");
987+
988+
beforeAndAfterFlush(() -> {
989+
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres CONTAINS KEY 'horror'"), row("1"));
990+
assertRowsNet(executeNet("SELECT id FROM %s WHERE genres NOT CONTAINS KEY 'horror'"));
991+
});
992+
}
993+
994+
@Test
995+
public void testAnalyzerOnMapKeysWithDistinctQueryAnalyzer() throws Throwable
996+
{
997+
createTable("CREATE TABLE %s (k int, c int, v map<text, int>, PRIMARY KEY(k, c))");
998+
createIndex("CREATE CUSTOM INDEX ON %s(KEYS(v)) USING 'StorageAttachedIndex' WITH OPTIONS = {" +
999+
"'index_analyzer': '{" +
1000+
" \"tokenizer\" : { \"name\" : \"whitespace\", \"args\" : {} }," +
1001+
" \"filters\" : [ { \"name\" : \"lowercase\", \"args\": {} }, " +
1002+
" { \"name\" : \"edgengram\", \"args\": { \"minGramSize\":\"1\", \"maxGramSize\":\"30\" } }]," +
1003+
" \"charFilters\" : []}', " +
1004+
"'query_analyzer': '{" +
1005+
" \"tokenizer\" : { \"name\" : \"whitespace\", \"args\" : {} }," +
1006+
" \"filters\" : [ {\"name\" : \"lowercase\",\"args\": {}} ]}'}");
1007+
1008+
execute("INSERT INTO %s (k, c, v) VALUES (0, 1, {'astra quick fox':0, 'astra quick foxes':0, 'astra4':0, 'astra5 -1@a#':0, 'lazy dog':0})");
1009+
execute("INSERT INTO %s (k, c, v) VALUES (0, 2, {'astra quick fox':0})");
1010+
execute("INSERT INTO %s (k, c, v) VALUES (0, 3, {'astra quick foxes':0})");
1011+
execute("INSERT INTO %s (k, c, v) VALUES (0, 4, {'astra4':0})");
1012+
execute("INSERT INTO %s (k, c, v) VALUES (0, 5, {'astra5 -1@a#':0})");
1013+
execute("INSERT INTO %s (k, c, v) VALUES (0, 6, {'lazy dog':0})");
1014+
1015+
beforeAndAfterFlush(() -> {
1016+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS KEY 'ast'"), row(1), row(2), row(3), row(4), row(5));
1017+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS KEY 'astra'"), row(1), row(2), row(3), row(4), row(5));
1018+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS KEY 'astra4'"), row(1), row(4));
1019+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS KEY 'astra5'"), row(1), row(5));
1020+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v CONTAINS KEY 'astra9'"));
1021+
1022+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS KEY 'ast'"), row(6));
1023+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS KEY 'astra'"), row(6));
1024+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS KEY 'astra4'"), row(2), row(3), row(5), row(6));
1025+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS KEY 'astra5'"), row(2), row(3), row(4), row(6));
1026+
assertRowsNet(executeNet("SELECT c FROM %s WHERE v NOT CONTAINS KEY 'astra9'"), row(1), row(2), row(3), row(4), row(5), row(6));
1027+
});
1028+
}
1029+
8651030
private void assertClientWarningOnNGram(String indexOptions)
8661031
{
8671032
createIndexFromOptions(indexOptions);

0 commit comments

Comments
 (0)