From f767951150a9d64c301f54a24dfd56ad6d3ec7cd Mon Sep 17 00:00:00 2001 From: Felix <65565033+fexfl@users.noreply.github.com> Date: Thu, 26 Dec 2024 09:40:20 +0100 Subject: [PATCH] Removed batch print statements --- mailcom/parse.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mailcom/parse.py b/mailcom/parse.py index 12fc739..6f18192 100644 --- a/mailcom/parse.py +++ b/mailcom/parse.py @@ -230,9 +230,7 @@ def pseudonymize(self, text: str): ] pseudonymized_batches = [] for batch in batches: - print(batch) batch = self.concatenate(batch) - print(batch) batch = self.pseudonymize_email_addresses(batch) ner = self.get_ner(batch) ps_sent = " ".join(self.pseudonymize_ne(ner, batch)) if ner else batch @@ -272,7 +270,7 @@ def make_dir(path: str): pseudonymizer = Pseudonymize() pseudonymizer.init_spacy("fr") pseudonymizer.init_transformers() - pseudonymizer.set_sentence_batch_size(1000) + pseudonymizer.set_sentence_batch_size(2) for file in io.email_list: print("Parsing input file {}".format(file)) text = io.get_text(file)