From 0a2aef720c2a5818997db10cd916d839572382c3 Mon Sep 17 00:00:00 2001
From: Gabo <gabriel.piles.glez@gmail.com>
Date: Fri, 1 Nov 2024 14:15:59 +0100
Subject: [PATCH] Remove unnecessary classes

---
 pyproject.toml                                |  4 ++--
 requirements.txt                              |  2 +-
 .../TrainableEntityExtractor.py               |  8 ++++----
 .../data/ExtractionTask.py                    |  8 --------
 .../data/LogSeverity.py                       |  6 ++++++
 .../data/LogsMessage.py                       | 19 -------------------
 .../data/Options.py                           |  9 ---------
 src/trainable_entity_extractor/data/Params.py | 10 ----------
 .../data/ResultsMessage.py                    | 16 ----------------
 .../extractors/ToTextExtractor.py             |  4 ++--
 .../PdfToMultiOptionExtractor.py              |  4 ++--
 src/trainable_entity_extractor/send_logs.py   |  6 +++---
 12 files changed, 20 insertions(+), 76 deletions(-)
 delete mode 100644 src/trainable_entity_extractor/data/ExtractionTask.py
 create mode 100644 src/trainable_entity_extractor/data/LogSeverity.py
 delete mode 100644 src/trainable_entity_extractor/data/LogsMessage.py
 delete mode 100644 src/trainable_entity_extractor/data/Options.py
 delete mode 100644 src/trainable_entity_extractor/data/Params.py
 delete mode 100644 src/trainable_entity_extractor/data/ResultsMessage.py

diff --git a/pyproject.toml b/pyproject.toml
index 365d8f9..f07dbc1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,12 +1,12 @@
 [project]
 name = "trainable-entity-extractor"
-version = "2024.11.1.5003"
+version = "2024.11.1.5004"
 description = "This tool is a trainable text/PDF to entity extractor"
 license = { file = "LICENSE" }
 authors = [{ name = "HURIDOCS" }]
 requires-python = ">= 3.11"
 dependencies = [
-    "pdf-document-layout-analysis @ git+https://github.com/huridocs/pdf-document-layout-analysis@3b990f6dce978feebd59ae0c7de472e5e311387f",
+    "pdf-document-layout-analysis @ git+https://github.com/huridocs/pdf-document-layout-analysis@2f634aa02deb7d1cd53370ec18ee62a325943016",
     "python-Levenshtein==0.25.1",
     "tdda==2.0.9",
     "datasets==2.19.0",
diff --git a/requirements.txt b/requirements.txt
index cd5d4cc..2490221 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-git+https://github.com/huridocs/pdf-document-layout-analysis@949014eb06c1e5010f1fa4e89c8c48ad0ebebea4
+git+https://github.com/huridocs/pdf-document-layout-analysis@2f634aa02deb7d1cd53370ec18ee62a325943016
 python-Levenshtein==0.25.1
 tdda==2.0.9
 datasets==2.19.0
diff --git a/src/trainable_entity_extractor/TrainableEntityExtractor.py b/src/trainable_entity_extractor/TrainableEntityExtractor.py
index 01085c3..7c97b5e 100755
--- a/src/trainable_entity_extractor/TrainableEntityExtractor.py
+++ b/src/trainable_entity_extractor/TrainableEntityExtractor.py
@@ -1,7 +1,7 @@
 from time import time
 
 from trainable_entity_extractor.data.ExtractionIdentifier import ExtractionIdentifier
-from trainable_entity_extractor.data.LogsMessage import Severity
+from trainable_entity_extractor.data.LogSeverity import LogSeverity
 from trainable_entity_extractor.data.PredictionSample import PredictionSample
 from trainable_entity_extractor.data.Suggestion import Suggestion
 from trainable_entity_extractor.extractors.ExtractorBase import ExtractorBase
@@ -50,14 +50,14 @@ def train(self, extraction_data: ExtractionData) -> (bool, str):
             self.extraction_identifier.save_extractor_used(extractor_instance.get_name())
             return extractor_instance.create_model(extraction_data)
 
-        send_logs(self.extraction_identifier, "Error creating extractor", Severity.error)
+        send_logs(self.extraction_identifier, "Error creating extractor", LogSeverity.error)
 
         return False, "Error creating extractor"
 
     def predict(self, prediction_samples: list[PredictionSample]) -> list[Suggestion]:
         extractor_name = self.extraction_identifier.get_extractor_used()
         if not extractor_name:
-            send_logs(self.extraction_identifier, f"No extractor available", Severity.error)
+            send_logs(self.extraction_identifier, f"No extractor available", LogSeverity.error)
             return []
 
         for extractor in self.EXTRACTORS:
@@ -71,5 +71,5 @@ def predict(self, prediction_samples: list[PredictionSample]) -> list[Suggestion
             send_logs(self.extraction_identifier, message)
             return suggestions
 
-        send_logs(self.extraction_identifier, f"No extractor available", Severity.error)
+        send_logs(self.extraction_identifier, f"No extractor available", LogSeverity.error)
         return []
diff --git a/src/trainable_entity_extractor/data/ExtractionTask.py b/src/trainable_entity_extractor/data/ExtractionTask.py
deleted file mode 100644
index 42dd5c9..0000000
--- a/src/trainable_entity_extractor/data/ExtractionTask.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from pydantic import BaseModel
-from trainable_entity_extractor.data.Params import Params
-
-
-class ExtractionTask(BaseModel):
-    tenant: str
-    task: str
-    params: Params
diff --git a/src/trainable_entity_extractor/data/LogSeverity.py b/src/trainable_entity_extractor/data/LogSeverity.py
new file mode 100644
index 0000000..42b40d8
--- /dev/null
+++ b/src/trainable_entity_extractor/data/LogSeverity.py
@@ -0,0 +1,6 @@
+from enum import Enum
+
+
+class LogSeverity(str, Enum):
+    error = "error"
+    info = "info"
diff --git a/src/trainable_entity_extractor/data/LogsMessage.py b/src/trainable_entity_extractor/data/LogsMessage.py
deleted file mode 100644
index 7b1eaba..0000000
--- a/src/trainable_entity_extractor/data/LogsMessage.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import json
-from enum import Enum
-
-from pydantic import BaseModel
-
-
-class Severity(str, Enum):
-    error = "error"
-    info = "info"
-
-
-class LogsMessage(BaseModel):
-    tenant: str
-    extraction_name: str
-    severity: Severity
-    message: str
-
-    def dump(self):
-        return json.loads(self.model_dump_json())
diff --git a/src/trainable_entity_extractor/data/Options.py b/src/trainable_entity_extractor/data/Options.py
deleted file mode 100644
index c8c93ff..0000000
--- a/src/trainable_entity_extractor/data/Options.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from pydantic import BaseModel
-
-from trainable_entity_extractor.data.Option import Option
-
-
-class Options(BaseModel):
-    tenant: str
-    extraction_id: str
-    options: list[Option]
diff --git a/src/trainable_entity_extractor/data/Params.py b/src/trainable_entity_extractor/data/Params.py
deleted file mode 100644
index b987c05..0000000
--- a/src/trainable_entity_extractor/data/Params.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from pydantic import BaseModel
-
-from trainable_entity_extractor.data.Option import Option
-
-
-class Params(BaseModel):
-    id: str
-    options: list[Option] = list()
-    multi_value: bool = False
-    metadata: dict[str, str] = dict()
diff --git a/src/trainable_entity_extractor/data/ResultsMessage.py b/src/trainable_entity_extractor/data/ResultsMessage.py
deleted file mode 100644
index 4aa84a1..0000000
--- a/src/trainable_entity_extractor/data/ResultsMessage.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from typing import Optional
-
-from pydantic import BaseModel
-from trainable_entity_extractor.data.Params import Params
-
-
-class ResultsMessage(BaseModel):
-    tenant: str
-    task: str
-    params: Params
-    success: bool
-    error_message: str
-    data_url: Optional[str] = None
-
-    def to_string(self):
-        return f"tenant: {self.tenant}, id: {self.params.id}, task: {self.task}, success: {self.success}, error_message: {self.error_message}"
diff --git a/src/trainable_entity_extractor/extractors/ToTextExtractor.py b/src/trainable_entity_extractor/extractors/ToTextExtractor.py
index 59f107c..738e703 100644
--- a/src/trainable_entity_extractor/extractors/ToTextExtractor.py
+++ b/src/trainable_entity_extractor/extractors/ToTextExtractor.py
@@ -1,7 +1,7 @@
 from trainable_entity_extractor.config import config_logger
 from trainable_entity_extractor.data.ExtractionData import ExtractionData
 from trainable_entity_extractor.data.ExtractionIdentifier import ExtractionIdentifier
-from trainable_entity_extractor.data.LogsMessage import Severity
+from trainable_entity_extractor.data.LogSeverity import LogSeverity
 from trainable_entity_extractor.data.PredictionSample import PredictionSample
 from trainable_entity_extractor.data.Suggestion import Suggestion
 from trainable_entity_extractor.extractors.ExtractorBase import ExtractorBase
@@ -99,7 +99,7 @@ def get_best_method(self, extraction_data: ExtractionData):
                 performance = method_instance.performance(training_set, test_set)
             except Exception as e:
                 message = f"Error checking {method_instance.get_name()}"
-                send_logs(self.extraction_identifier, message, Severity.error, e)
+                send_logs(self.extraction_identifier, message, LogSeverity.error, e)
                 performance = 0
             performance_log += f"{method_instance.get_name()}: {round(performance, 2)}%\n"
             send_logs(self.extraction_identifier, f"Performance {method_instance.get_name()}: {performance}%")
diff --git a/src/trainable_entity_extractor/extractors/pdf_to_multi_option_extractor/PdfToMultiOptionExtractor.py b/src/trainable_entity_extractor/extractors/pdf_to_multi_option_extractor/PdfToMultiOptionExtractor.py
index 027dfed..dee6ed2 100644
--- a/src/trainable_entity_extractor/extractors/pdf_to_multi_option_extractor/PdfToMultiOptionExtractor.py
+++ b/src/trainable_entity_extractor/extractors/pdf_to_multi_option_extractor/PdfToMultiOptionExtractor.py
@@ -4,7 +4,7 @@
 from pathlib import Path
 
 from trainable_entity_extractor.data.ExtractionIdentifier import ExtractionIdentifier
-from trainable_entity_extractor.data.LogsMessage import Severity
+from trainable_entity_extractor.data.LogSeverity import LogSeverity
 from trainable_entity_extractor.data.Option import Option
 from trainable_entity_extractor.data.PredictionSample import PredictionSample
 from trainable_entity_extractor.data.Suggestion import Suggestion
@@ -221,7 +221,7 @@ def get_method_performance(
         try:
             performance = method.get_performance(train_set, test_set)
         except Exception as e:
-            severity = Severity.error if method.REPORT_ERRORS else Severity.info
+            severity = LogSeverity.error if method.REPORT_ERRORS else LogSeverity.info
             send_logs(self.extraction_identifier, f"Error checking {method.get_name()}", severity, e)
             performance = 0
 
diff --git a/src/trainable_entity_extractor/send_logs.py b/src/trainable_entity_extractor/send_logs.py
index 085c12b..4396a20 100644
--- a/src/trainable_entity_extractor/send_logs.py
+++ b/src/trainable_entity_extractor/send_logs.py
@@ -2,16 +2,16 @@
 
 from trainable_entity_extractor.config import config_logger
 from trainable_entity_extractor.data.ExtractionIdentifier import ExtractionIdentifier
-from trainable_entity_extractor.data.LogsMessage import Severity
+from trainable_entity_extractor.data.LogSeverity import LogSeverity
 
 
 def send_logs(
     extraction_identifier: ExtractionIdentifier,
     message: str,
-    severity: Severity = Severity.info,
+    severity: LogSeverity = LogSeverity.info,
     exception: Exception = None,
 ):
-    if severity != Severity.error:
+    if severity != LogSeverity.error:
         config_logger.info(message + " for " + extraction_identifier.model_dump_json())
         return