diff --git a/tests/test_en_durations.py b/tests/test_en_durations.py new file mode 100644 index 0000000..e6aadcb --- /dev/null +++ b/tests/test_en_durations.py @@ -0,0 +1,57 @@ +import pytest +import spacy +from spacy.language import Language + +label = "timexy_label" +lang = "en" + + +@pytest.fixture() +def nlp() -> Language: + nlp = spacy.blank(lang) + nlp.add_pipe("timexy", config={"label": label}) + return nlp + + +def test_digit_years(nlp: Language) -> None: + doc = nlp("I will try that in 1 year and 2 years") + + for e in doc.ents: + assert e.label_ == label + + +def test_word_years(nlp: Language) -> None: + doc = nlp("I will try that in one year and two years") + + for e in doc.ents: + assert e.label_ == label + + +def test_digit_hyphen_years(nlp: Language) -> None: + doc = nlp("1-year repeat is ideal.") + + assert len(doc.ents) == 1 + assert doc.ents[0].label_ == label + + +def test_word_hyphen_years(nlp: Language) -> None: + doc = nlp("one-year repeat is ideal.") + + assert len(doc.ents) == 1 + assert doc.ents[0].label_ == label + + +def test_digit_hours(nlp: Language) -> None: + doc = nlp("repeat in 48-72 hours") + + assert len(doc.ents) == 1 + assert doc.ents[0].label_ == label + + +def test_word_hours(nlp: Language) -> None: + doc = nlp("I will try that in one hour and two hours") + print(doc.ents) + + assert len(doc.ents) == 2 + for e in doc.ents: + assert e.label_ == label diff --git a/timexy/languages/en.py b/timexy/languages/en.py index f01101b..d1f5851 100644 --- a/timexy/languages/en.py +++ b/timexy/languages/en.py @@ -8,6 +8,7 @@ "M": ["month", "months"], "W": ["week", "weeks"], "D": ["day", "days"], + "H": ["hour", "hours"], }, num_words=[ "zero", diff --git a/timexy/timexy.py b/timexy/timexy.py index cedb15d..493432d 100644 --- a/timexy/timexy.py +++ b/timexy/timexy.py @@ -75,7 +75,7 @@ def __init__( for val in vals: self.matcher.add( key, - [[{"IS_DIGIT": True}, {"TEXT": val}]], + [[{"IS_DIGIT": True}, {"TEXT": "-", "OP": "?"}, {"TEXT": val}]], ) self.matcher.add( @@ -83,6 +83,7 @@ def __init__( [ [ {"LOWER": {"IN": self.timexy_lang.num_words}}, + {"TEXT": "-", "OP": "?"}, {"LOWER": val.lower()}, ] ],