Skip to content
This repository was archived by the owner on Feb 22, 2024. It is now read-only.

Commit f487aa0

Browse files
committed
casesensitivity, stemming added as configurable query param
1 parent be81083 commit f487aa0

File tree

3 files changed

+29
-21
lines changed

3 files changed

+29
-21
lines changed

src/main/java/eu/fusepool/p3/dictionarymatcher/DictionaryAnnotator.java

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import opennlp.tools.tokenize.TokenizerME;
1313
import opennlp.tools.tokenize.TokenizerModel;
1414
import opennlp.tools.util.Span;
15+
import org.apache.commons.lang.StringUtils;
1516
import org.arabidopsis.ahocorasick.AhoCorasick;
1617
import org.arabidopsis.ahocorasick.SearchResult;
1718
import org.tartarus.snowball.SnowballStemmer;
@@ -73,29 +74,29 @@ public DictionaryAnnotator(DictionaryStore _dictionary, String _stemmingLanguage
7374
}
7475

7576
// if no stemming language configuration is provided set stemming language to None
76-
if (stemmingLanguage == null || stemmingLanguage.isEmpty()) {
77-
stemmingLanguage = "None";
77+
if (StringUtils.isBlank(stemmingLanguage)) {
78+
stemmingLanguage = "none";
7879
}
7980
// create a mapping between the language and the name of the class
8081
// responsible for the stemming of the current language
8182
languages = new HashMap<>();
82-
languages.put("None", "");
83-
languages.put("Danish", "danishStemmer");
84-
languages.put("Dutch", "dutchStemmer");
85-
languages.put("English", "englishStemmer");
86-
languages.put("Finnish", "finnishStemmer");
87-
languages.put("French", "frenchStemmer");
88-
languages.put("German", "germanStemmer");
89-
languages.put("Hungarian", "hungarianStemmer");
90-
languages.put("Italian", "italianStemmer");
91-
languages.put("Norwegian", "norwegianStemmer");
92-
//languages.put("english2", "porterStemmer");
93-
languages.put("Portuguese", "portugueseStemmer");
94-
languages.put("Romanian", "romanianStemmer");
95-
languages.put("Russian", "russianStemmer");
96-
languages.put("Spanish", "spanishStemmer");
97-
languages.put("Swedish", "swedishStemmer");
98-
languages.put("Turkish", "turkishStemmer");
83+
languages.put("none", "");
84+
languages.put("danish", "danishStemmer");
85+
languages.put("dutch", "dutchStemmer");
86+
languages.put("english", "englishStemmer");
87+
languages.put("finnish", "finnishStemmer");
88+
languages.put("french", "frenchStemmer");
89+
languages.put("german", "germanStemmer");
90+
languages.put("hungarian", "hungarianStemmer");
91+
languages.put("italian", "italianStemmer");
92+
languages.put("norwegian", "norwegianStemmer");
93+
languages.put("english2", "porterStemmer");
94+
languages.put("portuguese", "portugueseStemmer");
95+
languages.put("romanian", "romanianStemmer");
96+
languages.put("russian", "russianStemmer");
97+
languages.put("spanish", "spanishStemmer");
98+
languages.put("swedish", "swedishStemmer");
99+
languages.put("turkish", "turkishStemmer");
99100

100101
originalDictionary = new DictionaryStore();
101102
processedDictionary = new DictionaryStore();

src/main/java/eu/fusepool/p3/transformer/dictionarymatcher/Arguments.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ public interface Arguments extends ArgumentsWithHelp {
1010
description = "The port on which the proxy shall listen")
1111
public int getPort();
1212

13-
@CommandLine(longName = "enableCors", shortName = {"C"},
13+
@CommandLine(longName = "enableCors", shortName = {"C"}, required = false,
1414
description = "Enable a liberal CORS policy",
1515
isSwitch = true)
1616
public boolean enableCors();

src/main/java/eu/fusepool/p3/transformer/dictionarymatcher/DictionaryMatcherTransformer.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,19 @@ public DictionaryMatcherTransformer(String queryString) {
7272
throw new TransformerException(HttpServletResponse.SC_BAD_REQUEST, "ERROR: Badly formatted query string: \"" + queryString + "\" \nUsage: http://<transformer>/?taxonomy=<taxonomy_URI>");
7373
}
7474

75+
// get taxonomy URI
7576
String taxonomy = queryParams.get("taxonomy");
7677

7778
if (StringUtils.isEmpty(taxonomy)) {
7879
throw new TransformerException(HttpServletResponse.SC_BAD_REQUEST, "ERROR: Taxonomy URI was not provided! \nUsage: http://<transformer>/?taxonomy=<taxonomy_URI>");
7980
}
8081

82+
// get case sensitivity
83+
boolean caseSensitivity = queryParams.get("casesensitive") != null;
84+
85+
// get stemming language
86+
String stemmingLanguage = queryParams.get("stemming");
87+
8188
// create new dictionaryAnnotator if it does not exists
8289
if (dictionaryAnnotator == null) {
8390
long start, end;
@@ -90,7 +97,7 @@ public DictionaryMatcherTransformer(String queryString) {
9097
System.out.print(" (" + dictionary.GetSize() + ") and creating transformer ...");
9198

9299
// create the dictionary annotator instance
93-
dictionaryAnnotator = new DictionaryAnnotator(dictionary, "English", false, 0, false); // TODO get settings from query string
100+
dictionaryAnnotator = new DictionaryAnnotator(dictionary, stemmingLanguage, caseSensitivity, 0, false);
94101

95102
end = System.currentTimeMillis();
96103
System.out.println(" done [" + Double.toString((double) (end - start) / 1000) + " sec] .");

0 commit comments

Comments
 (0)