From 0a2aef720c2a5818997db10cd916d839572382c3 Mon Sep 17 00:00:00 2001 From: Gabo Date: Fri, 1 Nov 2024 14:15:59 +0100 Subject: [PATCH] Remove unnecessary classes --- pyproject.toml | 4 ++-- requirements.txt | 2 +- .../TrainableEntityExtractor.py | 8 ++++---- .../data/ExtractionTask.py | 8 -------- .../data/LogSeverity.py | 6 ++++++ .../data/LogsMessage.py | 19 ------------------- .../data/Options.py | 9 --------- src/trainable_entity_extractor/data/Params.py | 10 ---------- .../data/ResultsMessage.py | 16 ---------------- .../extractors/ToTextExtractor.py | 4 ++-- .../PdfToMultiOptionExtractor.py | 4 ++-- src/trainable_entity_extractor/send_logs.py | 6 +++--- 12 files changed, 20 insertions(+), 76 deletions(-) delete mode 100644 src/trainable_entity_extractor/data/ExtractionTask.py create mode 100644 src/trainable_entity_extractor/data/LogSeverity.py delete mode 100644 src/trainable_entity_extractor/data/LogsMessage.py delete mode 100644 src/trainable_entity_extractor/data/Options.py delete mode 100644 src/trainable_entity_extractor/data/Params.py delete mode 100644 src/trainable_entity_extractor/data/ResultsMessage.py diff --git a/pyproject.toml b/pyproject.toml index 365d8f9..f07dbc1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,12 +1,12 @@ [project] name = "trainable-entity-extractor" -version = "2024.11.1.5003" +version = "2024.11.1.5004" description = "This tool is a trainable text/PDF to entity extractor" license = { file = "LICENSE" } authors = [{ name = "HURIDOCS" }] requires-python = ">= 3.11" dependencies = [ - "pdf-document-layout-analysis @ git+https://github.com/huridocs/pdf-document-layout-analysis@3b990f6dce978feebd59ae0c7de472e5e311387f", + "pdf-document-layout-analysis @ git+https://github.com/huridocs/pdf-document-layout-analysis@2f634aa02deb7d1cd53370ec18ee62a325943016", "python-Levenshtein==0.25.1", "tdda==2.0.9", "datasets==2.19.0", diff --git a/requirements.txt b/requirements.txt index cd5d4cc..2490221 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/huridocs/pdf-document-layout-analysis@949014eb06c1e5010f1fa4e89c8c48ad0ebebea4 +git+https://github.com/huridocs/pdf-document-layout-analysis@2f634aa02deb7d1cd53370ec18ee62a325943016 python-Levenshtein==0.25.1 tdda==2.0.9 datasets==2.19.0 diff --git a/src/trainable_entity_extractor/TrainableEntityExtractor.py b/src/trainable_entity_extractor/TrainableEntityExtractor.py index 01085c3..7c97b5e 100755 --- a/src/trainable_entity_extractor/TrainableEntityExtractor.py +++ b/src/trainable_entity_extractor/TrainableEntityExtractor.py @@ -1,7 +1,7 @@ from time import time from trainable_entity_extractor.data.ExtractionIdentifier import ExtractionIdentifier -from trainable_entity_extractor.data.LogsMessage import Severity +from trainable_entity_extractor.data.LogSeverity import LogSeverity from trainable_entity_extractor.data.PredictionSample import PredictionSample from trainable_entity_extractor.data.Suggestion import Suggestion from trainable_entity_extractor.extractors.ExtractorBase import ExtractorBase @@ -50,14 +50,14 @@ def train(self, extraction_data: ExtractionData) -> (bool, str): self.extraction_identifier.save_extractor_used(extractor_instance.get_name()) return extractor_instance.create_model(extraction_data) - send_logs(self.extraction_identifier, "Error creating extractor", Severity.error) + send_logs(self.extraction_identifier, "Error creating extractor", LogSeverity.error) return False, "Error creating extractor" def predict(self, prediction_samples: list[PredictionSample]) -> list[Suggestion]: extractor_name = self.extraction_identifier.get_extractor_used() if not extractor_name: - send_logs(self.extraction_identifier, f"No extractor available", Severity.error) + send_logs(self.extraction_identifier, f"No extractor available", LogSeverity.error) return [] for extractor in self.EXTRACTORS: @@ -71,5 +71,5 @@ def predict(self, prediction_samples: list[PredictionSample]) -> list[Suggestion send_logs(self.extraction_identifier, message) return suggestions - send_logs(self.extraction_identifier, f"No extractor available", Severity.error) + send_logs(self.extraction_identifier, f"No extractor available", LogSeverity.error) return [] diff --git a/src/trainable_entity_extractor/data/ExtractionTask.py b/src/trainable_entity_extractor/data/ExtractionTask.py deleted file mode 100644 index 42dd5c9..0000000 --- a/src/trainable_entity_extractor/data/ExtractionTask.py +++ /dev/null @@ -1,8 +0,0 @@ -from pydantic import BaseModel -from trainable_entity_extractor.data.Params import Params - - -class ExtractionTask(BaseModel): - tenant: str - task: str - params: Params diff --git a/src/trainable_entity_extractor/data/LogSeverity.py b/src/trainable_entity_extractor/data/LogSeverity.py new file mode 100644 index 0000000..42b40d8 --- /dev/null +++ b/src/trainable_entity_extractor/data/LogSeverity.py @@ -0,0 +1,6 @@ +from enum import Enum + + +class LogSeverity(str, Enum): + error = "error" + info = "info" diff --git a/src/trainable_entity_extractor/data/LogsMessage.py b/src/trainable_entity_extractor/data/LogsMessage.py deleted file mode 100644 index 7b1eaba..0000000 --- a/src/trainable_entity_extractor/data/LogsMessage.py +++ /dev/null @@ -1,19 +0,0 @@ -import json -from enum import Enum - -from pydantic import BaseModel - - -class Severity(str, Enum): - error = "error" - info = "info" - - -class LogsMessage(BaseModel): - tenant: str - extraction_name: str - severity: Severity - message: str - - def dump(self): - return json.loads(self.model_dump_json()) diff --git a/src/trainable_entity_extractor/data/Options.py b/src/trainable_entity_extractor/data/Options.py deleted file mode 100644 index c8c93ff..0000000 --- a/src/trainable_entity_extractor/data/Options.py +++ /dev/null @@ -1,9 +0,0 @@ -from pydantic import BaseModel - -from trainable_entity_extractor.data.Option import Option - - -class Options(BaseModel): - tenant: str - extraction_id: str - options: list[Option] diff --git a/src/trainable_entity_extractor/data/Params.py b/src/trainable_entity_extractor/data/Params.py deleted file mode 100644 index b987c05..0000000 --- a/src/trainable_entity_extractor/data/Params.py +++ /dev/null @@ -1,10 +0,0 @@ -from pydantic import BaseModel - -from trainable_entity_extractor.data.Option import Option - - -class Params(BaseModel): - id: str - options: list[Option] = list() - multi_value: bool = False - metadata: dict[str, str] = dict() diff --git a/src/trainable_entity_extractor/data/ResultsMessage.py b/src/trainable_entity_extractor/data/ResultsMessage.py deleted file mode 100644 index 4aa84a1..0000000 --- a/src/trainable_entity_extractor/data/ResultsMessage.py +++ /dev/null @@ -1,16 +0,0 @@ -from typing import Optional - -from pydantic import BaseModel -from trainable_entity_extractor.data.Params import Params - - -class ResultsMessage(BaseModel): - tenant: str - task: str - params: Params - success: bool - error_message: str - data_url: Optional[str] = None - - def to_string(self): - return f"tenant: {self.tenant}, id: {self.params.id}, task: {self.task}, success: {self.success}, error_message: {self.error_message}" diff --git a/src/trainable_entity_extractor/extractors/ToTextExtractor.py b/src/trainable_entity_extractor/extractors/ToTextExtractor.py index 59f107c..738e703 100644 --- a/src/trainable_entity_extractor/extractors/ToTextExtractor.py +++ b/src/trainable_entity_extractor/extractors/ToTextExtractor.py @@ -1,7 +1,7 @@ from trainable_entity_extractor.config import config_logger from trainable_entity_extractor.data.ExtractionData import ExtractionData from trainable_entity_extractor.data.ExtractionIdentifier import ExtractionIdentifier -from trainable_entity_extractor.data.LogsMessage import Severity +from trainable_entity_extractor.data.LogSeverity import LogSeverity from trainable_entity_extractor.data.PredictionSample import PredictionSample from trainable_entity_extractor.data.Suggestion import Suggestion from trainable_entity_extractor.extractors.ExtractorBase import ExtractorBase @@ -99,7 +99,7 @@ def get_best_method(self, extraction_data: ExtractionData): performance = method_instance.performance(training_set, test_set) except Exception as e: message = f"Error checking {method_instance.get_name()}" - send_logs(self.extraction_identifier, message, Severity.error, e) + send_logs(self.extraction_identifier, message, LogSeverity.error, e) performance = 0 performance_log += f"{method_instance.get_name()}: {round(performance, 2)}%\n" send_logs(self.extraction_identifier, f"Performance {method_instance.get_name()}: {performance}%") diff --git a/src/trainable_entity_extractor/extractors/pdf_to_multi_option_extractor/PdfToMultiOptionExtractor.py b/src/trainable_entity_extractor/extractors/pdf_to_multi_option_extractor/PdfToMultiOptionExtractor.py index 027dfed..dee6ed2 100644 --- a/src/trainable_entity_extractor/extractors/pdf_to_multi_option_extractor/PdfToMultiOptionExtractor.py +++ b/src/trainable_entity_extractor/extractors/pdf_to_multi_option_extractor/PdfToMultiOptionExtractor.py @@ -4,7 +4,7 @@ from pathlib import Path from trainable_entity_extractor.data.ExtractionIdentifier import ExtractionIdentifier -from trainable_entity_extractor.data.LogsMessage import Severity +from trainable_entity_extractor.data.LogSeverity import LogSeverity from trainable_entity_extractor.data.Option import Option from trainable_entity_extractor.data.PredictionSample import PredictionSample from trainable_entity_extractor.data.Suggestion import Suggestion @@ -221,7 +221,7 @@ def get_method_performance( try: performance = method.get_performance(train_set, test_set) except Exception as e: - severity = Severity.error if method.REPORT_ERRORS else Severity.info + severity = LogSeverity.error if method.REPORT_ERRORS else LogSeverity.info send_logs(self.extraction_identifier, f"Error checking {method.get_name()}", severity, e) performance = 0 diff --git a/src/trainable_entity_extractor/send_logs.py b/src/trainable_entity_extractor/send_logs.py index 085c12b..4396a20 100644 --- a/src/trainable_entity_extractor/send_logs.py +++ b/src/trainable_entity_extractor/send_logs.py @@ -2,16 +2,16 @@ from trainable_entity_extractor.config import config_logger from trainable_entity_extractor.data.ExtractionIdentifier import ExtractionIdentifier -from trainable_entity_extractor.data.LogsMessage import Severity +from trainable_entity_extractor.data.LogSeverity import LogSeverity def send_logs( extraction_identifier: ExtractionIdentifier, message: str, - severity: Severity = Severity.info, + severity: LogSeverity = LogSeverity.info, exception: Exception = None, ): - if severity != Severity.error: + if severity != LogSeverity.error: config_logger.info(message + " for " + extraction_identifier.model_dump_json()) return