From 4eff1566530a29044d1dd87a9bb8bb4144113ebb Mon Sep 17 00:00:00 2001 From: Jeno Pizarro Date: Thu, 20 Jan 2022 07:45:42 -0500 Subject: [PATCH 1/3] Update setup.py --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index b913a64..92c2d1e 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name = 'extractacy', - version = 'v1.0.1', + version = 'v1.0.2', url = 'https://github.com/jenojp/extractacy', author = 'Jeno Pizarro', author_email = 'jenopizzaro@gmail.com', @@ -24,7 +24,7 @@ packages = find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]), license="MIT", install_requires=[ - "spacy>=3.0.1,<3.2.0", + "spacy>=3.0.1,<3.3.0", ], tests_require=[ "pytest", From 53312c7af505c2caf7d6f7c5dc9328d2225a3f11 Mon Sep 17 00:00:00 2001 From: Jeno Pizarro Date: Wed, 25 May 2022 15:54:38 -0400 Subject: [PATCH 2/3] update to support > spacy 3.1 --- azure-pipelines.yml | 54 +++++++++++++++++++++---------------------- extractacy/extract.py | 2 ++ extractacy/test.py | 25 ++++++++++++++++---- setup.py | 2 +- 4 files changed, 50 insertions(+), 33 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 39b3636..44d21e1 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -12,34 +12,34 @@ trigger: strategy: matrix: - Python36Linux: - imageName: 'ubuntu-18.04' - python.version: '3.6' Python37Linux: - imageName: 'ubuntu-18.04' - python.version: '3.7' - Python38Linux: - imageName: 'ubuntu-18.04' - python.version: '3.8' - Python36Mac: - imageName: 'macos-10.15' - python.version: '3.6' - Python37Mac: - imageName: 'macos-10.15' - python.version: '3.7' - Python38Mac: - imageName: 'macos-10.15' - python.version: '3.8' - Python36Windows: - imageName: 'vs2017-win2016' - python.version: '3.6' - Python37Windows: - imageName: 'vs2017-win2016' - python.version: '3.7' - Python38Windows: - imageName: 'vs2017-win2016' - python.version: '3.8' - maxParallel: 4 + imageName: 'ubuntu-20.04' + python.version: '3.7' + Python38Linux: + imageName: 'ubuntu-20.04' + python.version: '3.8' + Python39Linux: + imageName: 'ubuntu-20.04' + python.version: '3.9' + Python37Mac: + imageName: 'macos-11' + python.version: '3.7' + Python38Mac: + imageName: 'macos-11' + python.version: '3.8' + Python39Mac: + imageName: 'macos-11' + python.version: '3.9' + Python37Windows: + imageName: 'windows-2019' + python.version: '3.7' + Python38Windows: + imageName: 'windows-2019' + python.version: '3.8' + Python39Windows: + imageName: 'windows-2019' + python.version: '3.9' + maxParallel: 9 pool: vmImage: $(imageName) diff --git a/extractacy/extract.py b/extractacy/extract.py index c8c3431..c7c69f2 100644 --- a/extractacy/extract.py +++ b/extractacy/extract.py @@ -23,6 +23,8 @@ def __call__(self, doc): in the pipeline, if available. """ matches = self.matcher(doc) + for match_id, start, end in matches: + print(self.nlp.vocab.strings[match_id], start, end) for e in doc.ents: if e.label_ not in self.ent_patterns.keys(): e._.value_extract = [] diff --git a/extractacy/test.py b/extractacy/test.py index 18041b7..847b460 100644 --- a/extractacy/test.py +++ b/extractacy/test.py @@ -11,12 +11,12 @@ def build_docs(): "Discharge Date: 11/15/2008. Patient had temp reading of 102.6 degrees. Insurance claim sent to patient's account on file: 1112223. 12/31/2008: Payment received.", [ ("Discharge Date", ["11/15/2008"]), - ("11/15/2008", []), + # ("11/15/2008", []), ("temp", ["102.6 degrees"]), ("102.6 degrees", []), ("account", ["1112223"]), ("1112223", []), - ("12/31/2008", []), + # ("12/31/2008", []), ("Payment received", ["12/31/2008"]), ], ) @@ -60,6 +60,7 @@ def build_docs(): ( "We believe 01/01/1980 is his date of birth but it could also be 01/02/1980", [ + ("01/01/1980",[]), ("date of birth", ["01/01/1980", "01/02/1980"]), ("01/02/1980",[]) ], @@ -78,14 +79,17 @@ def build_docs(): docs.append( ( "We believe 01/01/1980 is his date of birth", - [("date of birth", ["01/01/1980"])], + [ + ("01/01/1980", []), + ("date of birth", ["01/01/1980"]) + ], ) ) # test outside boundary docs.append( ( "Discharge date unknown. 12/12/1999 date of confirmation.", - [("Discharge date", []), ("12/12/1999 date", [])], + [("Discharge date", []), ("12/12/1999", [])], ) ) @@ -164,9 +168,20 @@ def test(): docs = build_docs() for d in docs: doc = nlp(d[0]) + print() + print() + print("sentences...") + for s in doc.sents: + print(s) + print("tokens....") + print([t.text for t in doc]) + print("entities....") + for e in doc.ents: + print(e.text, e.start, e.end) + print("value extracts....") for i, e in enumerate(doc.ents): print(e.text, e._.value_extract) - print([t.text for t in doc]) + for i, e in enumerate(doc.ents): assert (e.text, e._.value_extract) == d[1][i] diff --git a/setup.py b/setup.py index 92c2d1e..8647409 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ packages = find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]), license="MIT", install_requires=[ - "spacy>=3.0.1,<3.3.0", + "spacy>=3.0.1,<4.0.0", ], tests_require=[ "pytest", From 95c6a0aaf8059a132b5060f09dac76cbd0cd66d2 Mon Sep 17 00:00:00 2001 From: Jeno Pizarro Date: Wed, 25 May 2022 16:04:03 -0400 Subject: [PATCH 3/3] Update azure-pipelines.yml --- azure-pipelines.yml | 52 ++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 44d21e1..208adfe 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -13,32 +13,32 @@ trigger: strategy: matrix: Python37Linux: - imageName: 'ubuntu-20.04' - python.version: '3.7' - Python38Linux: - imageName: 'ubuntu-20.04' - python.version: '3.8' - Python39Linux: - imageName: 'ubuntu-20.04' - python.version: '3.9' - Python37Mac: - imageName: 'macos-11' - python.version: '3.7' - Python38Mac: - imageName: 'macos-11' - python.version: '3.8' - Python39Mac: - imageName: 'macos-11' - python.version: '3.9' - Python37Windows: - imageName: 'windows-2019' - python.version: '3.7' - Python38Windows: - imageName: 'windows-2019' - python.version: '3.8' - Python39Windows: - imageName: 'windows-2019' - python.version: '3.9' + imageName: 'ubuntu-20.04' + python.version: '3.7' + Python38Linux: + imageName: 'ubuntu-20.04' + python.version: '3.8' + Python39Linux: + imageName: 'ubuntu-20.04' + python.version: '3.9' + Python37Mac: + imageName: 'macos-11' + python.version: '3.7' + Python38Mac: + imageName: 'macos-11' + python.version: '3.8' + Python39Mac: + imageName: 'macos-11' + python.version: '3.9' + Python37Windows: + imageName: 'windows-2019' + python.version: '3.7' + Python38Windows: + imageName: 'windows-2019' + python.version: '3.8' + Python39Windows: + imageName: 'windows-2019' + python.version: '3.9' maxParallel: 9 pool: