Skip to content

Commit

Permalink
Add near fast segment selector
Browse files Browse the repository at this point in the history
  • Loading branch information
gabriel-piles committed Dec 3, 2024
1 parent a1e957b commit 04df2a2
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 176 deletions.
124 changes: 76 additions & 48 deletions performance_results/segment_selector.md
Original file line number Diff line number Diff line change
@@ -1,49 +1,77 @@
```
+----------------------------------------------------------------------------------------+
|method |dataset | precision| recall| seconds|
+------------------------+--------------------------------+----------+----------+--------+
|FastSegmentSelector |date | 70.37| 27.54| 1|
+------------------------+--------------------------------+----------+----------+--------+
|SegmentSelector |date | 97.1| 97.1| 6|
+------------------------+--------------------------------+----------+----------+--------+
|FastSegmentSelector |decides | 66.67| 17.07| 1|
+------------------------+--------------------------------+----------+----------+--------+
|SegmentSelector |decides | 84.62| 73.33| 42|
+------------------------+--------------------------------+----------+----------+--------+
|FastSegmentSelector |first_paragraph_having_seen | 66.67| 2.9| 2|
+------------------------+--------------------------------+----------+----------+--------+
|SegmentSelector |first_paragraph_having_seen | 100.0| 94.2| 18|
+------------------------+--------------------------------+----------+----------+--------+
|FastSegmentSelector |plan_many_date | 51.35| 23.75| 3|
+------------------------+--------------------------------+----------+----------+--------+
|SegmentSelector |plan_many_date | 97.56| 100.0| 12|
+------------------------+--------------------------------+----------+----------+--------+
|FastSegmentSelector |plan_many_title | 90.48| 43.18| 3|
+------------------------+--------------------------------+----------+----------+--------+
|SegmentSelector |plan_many_title | 86.67| 88.64| 11|
+------------------------+--------------------------------+----------+----------+--------+
|FastSegmentSelector |president | 90.48| 27.94| 1|
+------------------------+--------------------------------+----------+----------+--------+
|SegmentSelector |president | 90.28| 95.59| 6|
+------------------------+--------------------------------+----------+----------+--------+
|FastSegmentSelector |rightdocs_titles | 0.0| 0.0| 0|
+------------------------+--------------------------------+----------+----------+--------+
|SegmentSelector |rightdocs_titles | 96.97| 88.89| 3|
+------------------------+--------------------------------+----------+----------+--------+
|FastSegmentSelector |secretary | 0.0| 0.0| 1|
+------------------------+--------------------------------+----------+----------+--------+
|SegmentSelector |secretary | 97.01| 97.01| 6|
+------------------------+--------------------------------+----------+----------+--------+
|FastSegmentSelector |semantic_president | 0.0| 0.0| 18|
+------------------------+--------------------------------+----------+----------+--------+
|SegmentSelector |semantic_president | 98.96| 60.2| 210|
+------------------------+--------------------------------+----------+----------+--------+
|FastSegmentSelector |signatories | 0.0| 0.0| 1|
+------------------------+--------------------------------+----------+----------+--------+
|SegmentSelector |signatories | 97.58| 94.15| 7|
+------------------------+--------------------------------+----------+----------+--------+
|FastSegmentSelector |Average | 43.6| 14.24| 3|
+------------------------+--------------------------------+----------+----------+--------+
|SegmentSelector |Average | 94.67| 88.91| 32|
+----------------------------------------------------------------------------------------+
```
+------------------------------------------------------------------------------------------------------+
|method |dataset | samples| precision| recall| seconds|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|FastSegmentSelector |date | 137| 70.37| 27.54| 2|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|NearFastSegmentSelector |date | 137| 33.0| 95.65| 1|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|SegmentSelector |date | 137| 97.1| 97.1| 6|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|FastSegmentSelector |decides | 137| 68.82| 17.07| 1|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|NearFastSegmentSelector |decides | 137| 41.94| 47.2| 1|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|SegmentSelector |decides | 137| 85.49| 73.87| 42|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|FastSegmentSelector |first_paragraph_having_seen | 137| 66.67| 2.9| 1|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|NearFastSegmentSelector |first_paragraph_having_seen | 137| 33.33| 85.51| 1|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|SegmentSelector |first_paragraph_having_seen | 137| 100.0| 94.2| 14|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|FastSegmentSelector |plan_many_date | 159| 51.35| 23.75| 3|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|NearFastSegmentSelector |plan_many_date | 159| 32.89| 93.75| 3|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|SegmentSelector |plan_many_date | 159| 97.56| 100.0| 10|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|FastSegmentSelector |plan_many_title | 159| 90.48| 43.18| 3|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|NearFastSegmentSelector |plan_many_title | 159| 31.31| 70.45| 3|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|SegmentSelector |plan_many_title | 159| 86.67| 88.64| 15|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|FastSegmentSelector |president | 137| 90.48| 27.94| 1|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|NearFastSegmentSelector |president | 137| 30.9| 80.88| 4|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|SegmentSelector |president | 137| 90.28| 95.59| 11|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|FastSegmentSelector |president_empty_labels | 142| 90.48| 28.79| 3|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|NearFastSegmentSelector |president_empty_labels | 142| 30.29| 80.3| 2|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|SegmentSelector |president_empty_labels | 142| 90.0| 95.45| 23|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|FastSegmentSelector |rightdocs_titles | 142| 0.0| 0.0| 1|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|NearFastSegmentSelector |rightdocs_titles | 142| 33.67| 93.06| 1|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|SegmentSelector |rightdocs_titles | 142| 96.97| 88.89| 5|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|FastSegmentSelector |secretary | 137| 0.0| 0.0| 1|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|NearFastSegmentSelector |secretary | 137| 27.37| 77.61| 1|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|SegmentSelector |secretary | 137| 97.01| 97.01| 6|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|FastSegmentSelector |semantic_president | 250| 0.0| 0.0| 17|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|NearFastSegmentSelector |semantic_president | 250| 31.53| 9.81| 18|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|SegmentSelector |semantic_president | 250| 98.96| 60.2| 209|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|FastSegmentSelector |signatories | 137| 0.0| 0.0| 1|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|NearFastSegmentSelector |signatories | 137| 50.48| 91.81| 1|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|SegmentSelector |signatories | 137| 97.58| 94.15| 7|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|FastSegmentSelector |Average | 1674| 48.06| 15.56| 3|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|NearFastSegmentSelector |Average | 1674| 34.25| 75.09| 3|
+----------------------------+--------------------------------+---------+----------+----------+--------+
|SegmentSelector |Average | 1674| 94.33| 89.55| 32|
+------------------------------------------------------------------------------------------------------+
```
7 changes: 6 additions & 1 deletion src/performance_segment_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,19 @@
from py_markdown_table.markdown_table import markdown_table
from trainable_entity_extractor.data.ExtractionIdentifier import ExtractionIdentifier
from trainable_entity_extractor.extractors.segment_selector.FastSegmentSelector import FastSegmentSelector
from trainable_entity_extractor.extractors.segment_selector.NearFastSegmentSelector import NearFastSegmentSelector
from trainable_entity_extractor.extractors.segment_selector.SegmentSelector import SegmentSelector
from trainable_entity_extractor.extractors.segment_selector.SegmentSelectorBase import SegmentSelectorBase
from trainable_entity_extractor.extractors.segment_selector.SegmentSelectorResults import SegmentSelectorResults
from trainable_entity_extractor.extractors.segment_selector.get_data_for_performance import get_data_for_performance

extraction_identifier = ExtractionIdentifier(run_name="benchmark", extraction_name="segment_selector")

METHODS: list[SegmentSelectorBase] = [FastSegmentSelector(extraction_identifier), SegmentSelector(extraction_identifier)]
METHODS: list[SegmentSelectorBase] = [
FastSegmentSelector(extraction_identifier),
NearFastSegmentSelector(extraction_identifier),
SegmentSelector(extraction_identifier),
]


def get_train_test(pdfs_data):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def predictions_scores_to_segments(segments: list[PdfDataSegment], prediction_sc
predicted_segments.append(segment)
continue

if len(prediction_scores) >= i + 1 and prediction_scores[i + 1] > 0.5:
if len(prediction_scores) > i + 1 and prediction_scores[i + 1] > 0.5:
predicted_segments.append(segment)
continue

Expand Down

This file was deleted.

0 comments on commit 04df2a2

Please sign in to comment.