From 06a5c0cad99e7576883d359dddcbc8d54bb01d9e Mon Sep 17 00:00:00 2001 From: alanakbik Date: Sat, 11 Jan 2025 15:42:13 +0100 Subject: [PATCH] Mypy fix --- flair/datasets/sequence_labeling.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/flair/datasets/sequence_labeling.py b/flair/datasets/sequence_labeling.py index 479e9e71e5..80fc6d38ba 100644 --- a/flair/datasets/sequence_labeling.py +++ b/flair/datasets/sequence_labeling.py @@ -521,6 +521,12 @@ def __init__( self.default_whitespace_after = default_whitespace_after self.documents_as_sentences = documents_as_sentences + if documents_as_sentences and not document_separator_token: + log.error( + "document_as_sentences was set to True, but no document_separator_token was provided. Please set" + "a value for document_separator_token in order to enable the document_as_sentence functionality." + ) + # store either Sentence objects in memory, or only file offsets self.in_memory = in_memory @@ -834,7 +840,7 @@ def _remap_label(self, tag): def __line_completes_sentence(self, line: str) -> bool: - if self.documents_as_sentences: + if self.documents_as_sentences and self.document_separator_token: if line.startswith(self.document_separator_token): return True else: