Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion apiary.apib
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,7 @@ The repository path is relative to source root.
+ repository - repository path with native path separators (of the machine
running the service) starting with path separator for which to return type

## Search [/search{?full,def,symbol,path,hist,type,projects,maxresults,start}]
## Search [/search{?full,def,symbol,path,hist,type,projects,maxresults,start,sort}]

## return search results [GET]

Expand All @@ -657,6 +657,15 @@ The repository path is relative to source root.
+ projects (optional, string) - projects to search in
+ maxresults (optional, string) - maximum number of documents whose hits will be returned (default 1000)
+ start (optional, string) - start index from which to return results
+ sort: relevancy (optional, enum[string])
+ Enum
+ relevancy
+ fullpath
+ lastmodtime
+ Description: Sort order for results. Possible values:
- `relevancy`: Sort by Lucene score (most relevant first).
- `fullpath`: Sort by file path (alphabetical).
- `lastmodtime`: Sort by last modification date (newest first).

+ Response 200 (application/json)
+ Body
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.util.Version;
import org.opengrok.indexer.analysis.AbstractAnalyzer;
Expand All @@ -66,6 +70,7 @@
import org.opengrok.indexer.util.Statistics;
import org.opengrok.indexer.util.TandemPath;
import org.opengrok.indexer.web.Prefix;
import org.opengrok.indexer.web.SortOrder;

/**
* This is an encapsulation of the details on how to search in the index database.
Expand Down Expand Up @@ -114,6 +119,10 @@ public class SearchEngine {
* Holds value of property type.
*/
private String type;
/**
* Holds value of property sort.
*/
private SortOrder sortOrder;
/**
* Holds value of property indexDatabase.
*/
Expand All @@ -132,7 +141,7 @@ public class SearchEngine {
int cachePages = RuntimeEnvironment.getInstance().getCachePages();
int totalHits = 0;
private ScoreDoc[] hits;
private TopScoreDocCollector collector;
private TopDocsCollector<?> collector;
private IndexSearcher searcher;
boolean allCollected;
private final ArrayList<SuperIndexSearcher> searcherList = new ArrayList<>();
Expand Down Expand Up @@ -181,6 +190,10 @@ private void searchSingleDatabase(boolean paging) throws IOException {
SuperIndexSearcher superIndexSearcher = RuntimeEnvironment.getInstance().getSuperIndexSearcher("");
searcherList.add(superIndexSearcher);
searcher = superIndexSearcher;
// If a field-based sort is requested, collect all hits (disable paging optimization)
if (sortOrder != SortOrder.RELEVANCY) {
paging = false;
}
searchIndex(superIndexSearcher, paging);
}

Expand All @@ -205,16 +218,33 @@ private void searchMultiDatabase(List<Project> projectList, boolean paging) thro
}

private void searchIndex(IndexSearcher searcher, boolean paging) throws IOException {
collector = TopScoreDocCollector.create(hitsPerPage * cachePages, Short.MAX_VALUE);
Statistics stat = new Statistics();
Sort luceneSort = null;
if (getSortOrder() == SortOrder.LASTMODIFIED) {
luceneSort = new Sort(new SortField(QueryBuilder.DATE, SortField.Type.STRING, true));
} else if (getSortOrder() == SortOrder.BY_PATH) {
luceneSort = new Sort(new SortField(QueryBuilder.FULLPATH, SortField.Type.STRING));
}
if (luceneSort == null) {
collector = TopScoreDocCollector.create(hitsPerPage * cachePages, Short.MAX_VALUE);
} else {
collector = TopFieldCollector.create(luceneSort, hitsPerPage * cachePages, Short.MAX_VALUE);
}
searcher.search(query, collector);
totalHits = collector.getTotalHits();
Statistics stat = new Statistics();
stat.report(LOGGER, Level.FINEST, "search via SearchEngine done",
"search.latency", new String[]{"category", "engine",
"outcome", totalHits > 0 ? "success" : "empty"});
if (!paging && totalHits > 0) {
collector = TopScoreDocCollector.create(totalHits, Short.MAX_VALUE);
searcher.search(query, collector);
if (luceneSort == null) {
if (!paging && totalHits > 0) {
collector = TopScoreDocCollector.create(totalHits, Short.MAX_VALUE);
searcher.search(query, collector);
}
} else {
if (!paging && totalHits > 0) {
collector = TopFieldCollector.create(luceneSort, totalHits, Short.MAX_VALUE);
searcher.search(query, collector);
}
}
hits = collector.topDocs().scoreDocs;
StoredFields storedFields = searcher.storedFields();
Expand Down Expand Up @@ -645,4 +675,22 @@ public String getType() {
public void setType(String fileType) {
this.type = fileType;
}

/**
* Getter for property sort.
*
* @return Value of property sortOrder.
*/
public SortOrder getSortOrder() {
return this.sortOrder;
}

/**
* Setter for property sort.
*
* @param sortOrder New value of property sortOrder.
*/
public void setSortOrder(SortOrder sortOrder) {
this.sortOrder = sortOrder;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@
package org.opengrok.indexer.search;

import java.io.File;
import java.net.URL;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.TreeSet;

import org.junit.jupiter.api.AfterAll;
Expand All @@ -36,7 +40,9 @@
import org.opengrok.indexer.util.TestRepository;

import org.opengrok.indexer.history.RepositoryFactory;
import org.opengrok.indexer.web.SortOrder;

import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNull;
Expand All @@ -55,7 +61,9 @@ class SearchEngineTest {
@BeforeAll
static void setUpClass() throws Exception {
repository = new TestRepository();
repository.create(HistoryGuru.class.getResource("/repositories"));
URL url = HistoryGuru.class.getResource("/repositories");
repository.createEmpty();
repository.copyDirectoryWithUniqueModifiedTime(Path.of(url.toURI()), Path.of(repository.getSourceRoot()));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add assertNotNull for the url before the toURI() conversion like it is done in some other test classes to avoid code smells reported e.g. by SonarQube.


RuntimeEnvironment env = RuntimeEnvironment.getInstance();
env.setSourceRoot(repository.getSourceRoot());
Expand Down Expand Up @@ -148,6 +156,69 @@ void testGetQuery() throws Exception {
instance.getQuery());
}

@Test
void testSortOrderLastModified() {
SearchEngine instance = new SearchEngine();
instance.setFile("main.c");
instance.setFreetext("arguments");
instance.setSortOrder(SortOrder.LASTMODIFIED);
int hitsCount = instance.search();
List<Hit> hits = new ArrayList<>();
instance.results(0, hitsCount, hits);
assertTrue(hits.size() > 1, "Should return at least 2 hits for RELEVANCY sort to check order");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not check the exact count ? If count is different than the array length, it does not make sense to proceed with comparison.


List<String> results = new ArrayList<>();
for (Hit hit : hits) {
results.add(hit.getPath() + "@" + hit.getLineno());
}
Comment on lines +170 to +173
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer something like this over iterating:

        String[] results = hits.stream().
                map(hit -> hit.getPath() + "@" + hit.getLineno()).
                toArray(String[]::new);

Then for the assertArrayEquals call below toArray is not necessary.

final String[] expectedResults = {
"/teamware/main.c@5",
"/rcs_test/main.c@5",
"/mercurial/main.c@5",
"/git/main.c@5",
"/cvs_test/cvsrepo/main.c@7",
"/bazaar/main.c@5"
};

assertArrayEquals(expectedResults, results.toArray());

instance.destroy();
}

@Test
void testSortOrderByPath() {
SearchEngine instance = new SearchEngine();
instance.setFile("main.c OR header.h");
instance.setFreetext("arguments OR stdio");
instance.setSortOrder(SortOrder.BY_PATH);
int hitsCount = instance.search();
List<Hit> hits = new ArrayList<>();
instance.results(0, hitsCount, hits);
assertTrue(hits.size() > 1, "Should return at least 2 hits for RELEVANCY sort to check order");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto


List<String> results = new ArrayList<>();
for (Hit hit : hits) {
results.add(hit.getPath() + "@" + hit.getLineno());
}
Comment on lines +199 to +202
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

final String[] expectedResults = {
"/bazaar/header.h@2",
"/bazaar/main.c@5",
"/cvs_test/cvsrepo/main.c@7",
"/git/header.h@2",
"/git/main.c@5",
"/mercurial/header.h@2",
"/mercurial/main.c@5",
"/rcs_test/header.h@2",
"/rcs_test/main.c@5",
"/teamware/header.h@2",
"/teamware/main.c@5"
};

assertArrayEquals(expectedResults, results.toArray());

instance.destroy();
}

/* see https://github.com/oracle/opengrok/issues/2030
@Test
void testSearch() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;

Expand Down Expand Up @@ -133,6 +134,54 @@ public void copyDirectory(Path src, Path dest) throws IOException {
}
}

/**
* Assumes the destination directory exists.
* @param src source directory
* @param dest destination directory
* @throws IOException on error
*/
public void copyDirectoryWithUniqueModifiedTime(Path src, Path dest) throws IOException {
// Create a deterministic order of paths for creation time, so last modified time indexing is stable in tests
// note we cannot use Files.copy(sourceFile, destPath, REPLACE_EXISTING, COPY_ATTRIBUTES)
// as the original creation time is the user checkout and not different accross files
Comment on lines +144 to +146
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps move this comment to the method's javadoc so that it becomes more clear what it does.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, there is a typo in "accross".

List<Path> allPaths;
try (Stream<Path> stream = Files.walk(src)) {
allPaths = stream.filter(p -> !p.equals(src)).sorted().toList();
}
// Set base time to now, and go ahead in time for each subsequent path by 1 minute
java.time.Instant baseTime = java.time.Instant.now();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer to import java.time.Instant; and then use the Instant standalone.

for (int i = 0; i < allPaths.size(); i++) {
Path sourcePath = allPaths.get(i);
Path destRelativePath = getDestinationRelativePath(src, sourcePath);
Path destPath = dest.resolve(destRelativePath);
var fileTime = java.nio.file.attribute.FileTime.from(baseTime.plusSeconds(i * 60L));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto for the java.nio.file.attribute.FileTime import.

if (Files.isDirectory(sourcePath)) {
if (!Files.exists(destPath)) {
Files.createDirectories(destPath);
}
try {
Files.setLastModifiedTime(destPath, fileTime);
Files.setAttribute(destPath, "basic:creationTime", fileTime);
} catch (Exception ignored) {
// Not all filesystems support creationTime
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the creation time necessary for the SearchEngine tests or for tests in general ? Also, I'd prefer having any exceptions from the last modified time to be propagated because otherwise some of the tests would fail anyway and it would not be clear why.

}
} else {
// Ensure parent directory exists before copying file
Path parentDir = destPath.getParent();
if (parentDir != null && !Files.exists(parentDir)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no need to check whether the destination exists for Files.createDirectories().

Also, I am pretty sure @ginoaugustine would come with a way how to use Optional for this nullable variable.

Files.createDirectories(parentDir);
}
Files.copy(sourcePath, destPath, REPLACE_EXISTING, COPY_ATTRIBUTES);
Files.setLastModifiedTime(destPath, fileTime);
try {
Files.setAttribute(destPath, "basic:creationTime", fileTime);
} catch (Exception ignored) {
// Not all filesystems support creationTime
}
}
}
}

private Path getDestinationRelativePath(Path sourceDirectory, Path sourceFile) {
// possibly strip zip filesystem for the startsWith method to work
var relativePath = Path.of(sourceDirectory.relativize(sourceFile).toString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.opengrok.indexer.search.Hit;
import org.opengrok.indexer.search.SearchEngine;
import org.opengrok.indexer.web.QueryParameters;
import org.opengrok.indexer.web.SortOrder;
import org.opengrok.web.PageConfig;
import org.opengrok.web.api.v1.filter.CorsEnable;
import org.opengrok.web.api.v1.suggester.provider.service.SuggesterService;
Expand All @@ -58,6 +59,7 @@ public class SearchController {
public static final String PATH = "search";

private static final int MAX_RESULTS = 1000;
private static final String DEFAULT_SORT_ORDER = "relevancy";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should probably say this needs to be equal to SortOrder.RELEVANCY.name. Maybe add a test for it.

Cannot use it directly as as @DefaultValue needs a constant and besides the name field is currently private.


private final SuggesterService suggester;

Expand All @@ -81,9 +83,10 @@ public SearchResult search(
@QueryParam("projects") final List<String> projects,
@QueryParam("maxresults") // Akin to QueryParameters.COUNT_PARAM
@DefaultValue(MAX_RESULTS + "") final int maxResults,
@QueryParam(QueryParameters.START_PARAM) @DefaultValue(0 + "") final int startDocIndex
@QueryParam(QueryParameters.START_PARAM) @DefaultValue(0 + "") final int startDocIndex,
@QueryParam(QueryParameters.SORT_PARAM) @DefaultValue(DEFAULT_SORT_ORDER) final String sort
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice to add a test that ensures the default sort order (perhaps it would be sufficient to call just getSortOrder() on default SearchEngine construction and check its value rather than having a full blown test that checks the data returned from search which might change across Lucene upgrades)

) {
try (SearchEngineWrapper engine = new SearchEngineWrapper(full, def, symbol, path, hist, type)) {
try (SearchEngineWrapper engine = new SearchEngineWrapper(full, def, symbol, path, hist, type, SortOrder.get(sort))) {

if (!engine.isValid()) {
throw new WebApplicationException("Invalid request", Response.Status.BAD_REQUEST);
Expand Down Expand Up @@ -119,14 +122,16 @@ private SearchEngineWrapper(
final String symbol,
final String path,
final String hist,
final String type
final String type,
final SortOrder sortOrder
) {
engine.setFreetext(full);
engine.setDefinition(def);
engine.setSymbol(symbol);
engine.setFile(path);
engine.setHistory(hist);
engine.setType(type);
engine.setSortOrder(sortOrder);
}

public List<Hit> search(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public class IncomingFilter implements ContainerRequestFilter, ConfigurationChan
/**
* Endpoint paths that are exempted from this filter.
* @see SearchController#search(HttpServletRequest, String, String, String, String, String, String,
* java.util.List, int, int)
* java.util.List, int, int, String)
* @see SuggesterController#getSuggestions(org.opengrok.web.api.v1.suggester.model.SuggesterQueryData)
* @see SuggesterController#getConfig()
*/
Expand Down
Loading