Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions tests/test_en_durations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import pytest
import spacy
from spacy.language import Language

label = "timexy_label"
lang = "en"


@pytest.fixture()
def nlp() -> Language:
nlp = spacy.blank(lang)
nlp.add_pipe("timexy", config={"label": label})
return nlp


def test_digit_years(nlp: Language) -> None:
doc = nlp("I will try that in 1 year and 2 years")

for e in doc.ents:
assert e.label_ == label


def test_word_years(nlp: Language) -> None:
doc = nlp("I will try that in one year and two years")

for e in doc.ents:
assert e.label_ == label


def test_digit_hyphen_years(nlp: Language) -> None:
doc = nlp("1-year repeat is ideal.")

assert len(doc.ents) == 1
assert doc.ents[0].label_ == label


def test_word_hyphen_years(nlp: Language) -> None:
doc = nlp("one-year repeat is ideal.")

assert len(doc.ents) == 1
assert doc.ents[0].label_ == label


def test_digit_hours(nlp: Language) -> None:
doc = nlp("repeat in 48-72 hours")

assert len(doc.ents) == 1
assert doc.ents[0].label_ == label


def test_word_hours(nlp: Language) -> None:
doc = nlp("I will try that in one hour and two hours")
print(doc.ents)

assert len(doc.ents) == 2
for e in doc.ents:
assert e.label_ == label
1 change: 1 addition & 0 deletions timexy/languages/en.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"M": ["month", "months"],
"W": ["week", "weeks"],
"D": ["day", "days"],
"H": ["hour", "hours"],
},
num_words=[
"zero",
Expand Down
3 changes: 2 additions & 1 deletion timexy/timexy.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,15 @@ def __init__(
for val in vals:
self.matcher.add(
key,
[[{"IS_DIGIT": True}, {"TEXT": val}]],
[[{"IS_DIGIT": True}, {"TEXT": "-", "OP": "?"}, {"TEXT": val}]],
)

self.matcher.add(
key,
[
[
{"LOWER": {"IN": self.timexy_lang.num_words}},
{"TEXT": "-", "OP": "?"},
{"LOWER": val.lower()},
]
],
Expand Down