Skip to content

Commit 1226594

Browse files
authored
CNDB-13408 Improve BM25 test with better logic (#1963)
Code to generate collection values for BM25 test is difficult to follow. This commit refactors the generation code into methods with simpler code. Also this commit removes unused last column in the dataset.
1 parent ee54a0a commit 1226594

File tree

1 file changed

+47
-41
lines changed

1 file changed

+47
-41
lines changed

test/unit/org/apache/cassandra/index/sai/cql/BM25Test.java

Lines changed: 47 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,26 +1067,26 @@ private void assertRejectsBM25OnNonRegularColumn(String query, String columnType
10671067

10681068
public final static Object[][] DATASET =
10691069
{
1070-
{ 0, "Climate", 5, "Climate change is a pressing issue. Climate patterns are shifting globally. Scientists study climate data daily.", 1 },
1071-
{ 1, "Technology", 3, "Technology is advancing. New technology in AI and robotics is groundbreaking.", 1 },
1072-
{ 2, "Economy", 4, "The economy is recovering. Economy experts are optimistic. However, the global economy still faces risks.", 1 },
1073-
{ 3, "Health", 3, "Health is wealth. Health policies need to be improved to ensure better public health outcomes.", 1 },
1074-
{ 4, "Education", 2, "Education is the foundation of success. Online education is booming.", 4 },
1075-
{ 5, "Climate", 4, "Climate and health are closely linked. Climate affects air quality and health outcomes.", 2 },
1076-
{ 6, "Education", 3, "Technology and education go hand in hand. EdTech is revolutionizing education through technology.", 3 },
1077-
{ 7, "Economy", 3, "The global economy is influenced by technology. Fintech is a key part of the economy today.", 2 },
1078-
{ 8, "Health", 3, "Education and health programs must be prioritized. Health education is vital in schools.", 2 },
1079-
{ 9, "Mixed", 3, "Technology, economy, and education are pillars of development.", 2 },
1080-
{ 10, "Climate", 5, "Climate climate climate. It's everywhere. Climate drives political and economic decisions.", 1 },
1081-
{ 11, "Health", 2, "Health concerns rise with climate issues. Health organizations are sounding the alarm.", 2 },
1082-
{ 12, "Economy", 3, "The economy is fluctuating. Uncertainty looms over the economy.", 1 },
1083-
{ 13, "Health", 3, "Cutting-edge technology is transforming healthcare. Healthtech merges health and technology.", 1 },
1084-
{ 14, "Education", 2, "Education reforms are underway. Education experts suggest holistic changes.", 1 },
1085-
{ 15, "Climate", 4, "Climate affects the economy and health. Climate events cost billions annually.", 1 },
1086-
{ 16, "Technology", 3, "Technology is the backbone of the modern economy. Without technology, economic growth stagnates.", 2 },
1087-
{ 17, "Health", 2, "Health is discussed less than economy or climate or technology, but health matters deeply.", 1 },
1088-
{ 18, "Climate", 5, "Climate change, climate policies, climate research—climate is the buzzword of our time.", 2 },
1089-
{ 19, "Mixed", 3, "Investments in education and technology will shape the future of the global economy.", 1 }
1070+
{ 0, "Climate", 5, "Climate change is a pressing issue. Climate patterns are shifting globally. Scientists study climate data daily." },
1071+
{ 1, "Technology", 3, "Technology is advancing. New technology in AI and robotics is groundbreaking." },
1072+
{ 2, "Economy", 4, "The economy is recovering. Economy experts are optimistic. However, the global economy still faces risks." },
1073+
{ 3, "Health", 3, "Health is wealth. Health policies need to be improved to ensure better public health outcomes." },
1074+
{ 4, "Education", 2, "Education is the foundation of success. Online education is booming." },
1075+
{ 5, "Climate", 4, "Climate and health are closely linked. Climate affects air quality and health outcomes." },
1076+
{ 6, "Education", 3, "Technology and education go hand in hand. EdTech is revolutionizing education through technology." },
1077+
{ 7, "Economy", 3, "The global economy is influenced by technology. Fintech is a key part of the economy today." },
1078+
{ 8, "Health", 3, "Education and health programs must be prioritized. Health education is vital in schools." },
1079+
{ 9, "Mixed", 3, "Technology, economy, and education are pillars of development." },
1080+
{ 10, "Climate", 5, "Climate climate climate. It's everywhere. Climate drives political and economic decisions." },
1081+
{ 11, "Health", 2, "Health concerns rise with climate issues. Health organizations are sounding the alarm." },
1082+
{ 12, "Economy", 3, "The economy is fluctuating. Uncertainty looms over the economy." },
1083+
{ 13, "Health", 3, "Cutting-edge technology is transforming healthcare. Healthtech merges health and technology." },
1084+
{ 14, "Education", 2, "Education reforms are underway. Education experts suggest holistic changes." },
1085+
{ 15, "Climate", 4, "Climate affects the economy and health. Climate events cost billions annually." },
1086+
{ 16, "Technology", 3, "Technology is the backbone of the modern economy. Without technology, economic growth stagnates." },
1087+
{ 17, "Health", 2, "Health is discussed less than economy or climate or technology, but health matters deeply." },
1088+
{ 18, "Climate", 5, "Climate change, climate policies, climate research—climate is the buzzword of our time." },
1089+
{ 19, "Mixed", 3, "Investments in education and technology will shape the future of the global economy." }
10901090
};
10911091

10921092
private void analyzeDataset(String term)
@@ -1129,36 +1129,42 @@ private void insertPrimitiveData(int start, int end)
11291129

11301130
public static void insertCollectionData(SAITester tester)
11311131
{
1132-
int setsize = 1;
11331132
for (int row = 0; row < DATASET.length; row++)
1134-
{
1135-
var set = new HashSet<String>();
1136-
for (int j = 0; j < setsize; j++)
1137-
set.add((String) DATASET[row - j][3]);
1138-
if (setsize >= 3)
1139-
setsize -= 2;
1140-
else
1141-
setsize++;
1142-
var map = new HashMap<Integer, String>();
1143-
var map_text = new HashMap<String, String>();
1144-
for (int j = 0; j <= row && j < 3; j++)
1145-
{
1146-
map.putIfAbsent((Integer) DATASET[row - j][2], (String) DATASET[row - j][1]);
1147-
map_text.putIfAbsent((String) DATASET[row - j][1], (String) DATASET[row - j][3]);
1148-
}
1149-
11501133
tester.execute(
11511134
"INSERT INTO %s (id, category, score, body, bodyset, map_category, map_body) " +
11521135
"VALUES (?, ?, ?, ?, ?, ?, ?)",
11531136
DATASET[row][0],
11541137
DATASET[row][1],
11551138
DATASET[row][2],
11561139
DATASET[row][3],
1157-
set,
1158-
map,
1159-
map_text
1140+
generateSetColumnValue(row),
1141+
generateMapValueIntKey(row),
1142+
generateMapValueTextKey(row)
11601143
);
1161-
}
1144+
}
1145+
1146+
private static HashSet<String> generateSetColumnValue(int row)
1147+
{
1148+
HashSet<String> setColumnValue = new HashSet<>();
1149+
for (int i = 0; i < row % 3 + 1; i++)
1150+
setColumnValue.add((String) DATASET[row - i][3]);
1151+
return setColumnValue;
1152+
}
1153+
1154+
private static HashMap<Integer, String> generateMapValueIntKey(int row)
1155+
{
1156+
HashMap<Integer, String> mapIntKey = new HashMap<>();
1157+
for (int j = 0; j <= row && j < 3; j++)
1158+
mapIntKey.putIfAbsent((Integer) DATASET[row - j][2], (String) DATASET[row - j][1]);
1159+
return mapIntKey;
1160+
}
1161+
1162+
private static HashMap<String, String> generateMapValueTextKey(int row)
1163+
{
1164+
HashMap<String, String> mapTextKey = new HashMap<>();
1165+
for (int j = 0; j <= row && j < 3; j++)
1166+
mapTextKey.putIfAbsent((String) DATASET[row - j][1], (String) DATASET[row - j][3]);
1167+
return mapTextKey;
11621168
}
11631169

11641170
private void executeQuery(List<Integer> expected, String query, Object... values) throws Throwable

0 commit comments

Comments
 (0)