Skip to content

Commit

Permalink
Merge pull request #9 from jenojp/develop
Browse files Browse the repository at this point in the history
Spacy 3.3 support
  • Loading branch information
jenojp authored May 25, 2022
2 parents ce7edbb + 95c6a0a commit 8a30323
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 23 deletions.
32 changes: 16 additions & 16 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,34 +12,34 @@ trigger:

strategy:
matrix:
Python36Linux:
imageName: 'ubuntu-18.04'
python.version: '3.6'
Python37Linux:
imageName: 'ubuntu-18.04'
imageName: 'ubuntu-20.04'
python.version: '3.7'
Python38Linux:
imageName: 'ubuntu-18.04'
imageName: 'ubuntu-20.04'
python.version: '3.8'
Python36Mac:
imageName: 'macos-10.15'
python.version: '3.6'
Python39Linux:
imageName: 'ubuntu-20.04'
python.version: '3.9'
Python37Mac:
imageName: 'macos-10.15'
imageName: 'macos-11'
python.version: '3.7'
Python38Mac:
imageName: 'macos-10.15'
imageName: 'macos-11'
python.version: '3.8'
Python36Windows:
imageName: 'vs2017-win2016'
python.version: '3.6'
Python39Mac:
imageName: 'macos-11'
python.version: '3.9'
Python37Windows:
imageName: 'vs2017-win2016'
imageName: 'windows-2019'
python.version: '3.7'
Python38Windows:
imageName: 'vs2017-win2016'
imageName: 'windows-2019'
python.version: '3.8'
maxParallel: 4
Python39Windows:
imageName: 'windows-2019'
python.version: '3.9'
maxParallel: 9

pool:
vmImage: $(imageName)
Expand Down
2 changes: 2 additions & 0 deletions extractacy/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def __call__(self, doc):
in the pipeline, if available.
"""
matches = self.matcher(doc)
for match_id, start, end in matches:
print(self.nlp.vocab.strings[match_id], start, end)
for e in doc.ents:
if e.label_ not in self.ent_patterns.keys():
e._.value_extract = []
Expand Down
25 changes: 20 additions & 5 deletions extractacy/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ def build_docs():
"Discharge Date: 11/15/2008. Patient had temp reading of 102.6 degrees. Insurance claim sent to patient's account on file: 1112223. 12/31/2008: Payment received.",
[
("Discharge Date", ["11/15/2008"]),
("11/15/2008", []),
# ("11/15/2008", []),
("temp", ["102.6 degrees"]),
("102.6 degrees", []),
("account", ["1112223"]),
("1112223", []),
("12/31/2008", []),
# ("12/31/2008", []),
("Payment received", ["12/31/2008"]),
],
)
Expand Down Expand Up @@ -60,6 +60,7 @@ def build_docs():
(
"We believe 01/01/1980 is his date of birth but it could also be 01/02/1980",
[
("01/01/1980",[]),
("date of birth", ["01/01/1980", "01/02/1980"]),
("01/02/1980",[])
],
Expand All @@ -78,14 +79,17 @@ def build_docs():
docs.append(
(
"We believe 01/01/1980 is his date of birth",
[("date of birth", ["01/01/1980"])],
[
("01/01/1980", []),
("date of birth", ["01/01/1980"])
],
)
)
# test outside boundary
docs.append(
(
"Discharge date unknown. 12/12/1999 date of confirmation.",
[("Discharge date", []), ("12/12/1999 date", [])],
[("Discharge date", []), ("12/12/1999", [])],
)
)

Expand Down Expand Up @@ -164,9 +168,20 @@ def test():
docs = build_docs()
for d in docs:
doc = nlp(d[0])
print()
print()
print("sentences...")
for s in doc.sents:
print(s)
print("tokens....")
print([t.text for t in doc])
print("entities....")
for e in doc.ents:
print(e.text, e.start, e.end)
print("value extracts....")
for i, e in enumerate(doc.ents):
print(e.text, e._.value_extract)
print([t.text for t in doc])
for i, e in enumerate(doc.ents):
assert (e.text, e._.value_extract) == d[1][i]


Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

setup(
name = 'extractacy',
version = 'v1.0.1',
version = 'v1.0.2',
url = 'https://github.com/jenojp/extractacy',
author = 'Jeno Pizarro',
author_email = '[email protected]',
Expand All @@ -24,7 +24,7 @@
packages = find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]),
license="MIT",
install_requires=[
"spacy>=3.0.1,<3.2.0",
"spacy>=3.0.1,<4.0.0",
],
tests_require=[
"pytest",
Expand Down

0 comments on commit 8a30323

Please sign in to comment.