From a5c733c09200b69cd2952fe0dd57ccb3566acaf0 Mon Sep 17 00:00:00 2001 From: Alexis de Lattre Date: Thu, 1 Aug 2024 21:43:09 +0000 Subject: [PATCH] [FIX] account_invoice_import_simple_pdf: add seek(0) to avoid error "empty file" Add try/except on pypdf text extraction --- .../wizard/account_invoice_import.py | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/account_invoice_import_simple_pdf/wizard/account_invoice_import.py b/account_invoice_import_simple_pdf/wizard/account_invoice_import.py index fd0f904e06..8d103e2cfa 100644 --- a/account_invoice_import_simple_pdf/wizard/account_invoice_import.py +++ b/account_invoice_import_simple_pdf/wizard/account_invoice_import.py @@ -71,16 +71,19 @@ def _simple_pdf_text_extraction_pymupdf(self, fileobj, test_info): @api.model def _simple_pdf_text_extraction_pypdf(self, fileobj, test_info): res = False - reader = pypdf.PdfReader(fileobj.name) - pages = [] - for pdf_page in reader.pages: - pages.append(pdf_page.extract_text()) - res = { - "all": "\n\n".join(pages), - "first": pages and pages[0] or "", - } - test_info["text_extraction"] = "pypdf %s" % pypdf.__version__ - logger.info("Text extraction made with pypdf %s", pypdf.__version__) + try: + reader = pypdf.PdfReader(fileobj.name) + pages = [] + for pdf_page in reader.pages: + pages.append(pdf_page.extract_text()) + res = { + "all": "\n\n".join(pages), + "first": pages and pages[0] or "", + } + test_info["text_extraction"] = "pypdf %s" % pypdf.__version__ + logger.info("Text extraction made with pypdf %s", pypdf.__version__) + except Exception as e: + logger.warning("Text extraction with pypdf failed. Error: %s", e) return res @api.model @@ -184,6 +187,7 @@ def simple_pdf_text_extraction(self, file_data, test_info): "wb", prefix="odoo-simple-pdf-", suffix=".pdf" ) as fileobj: fileobj.write(file_data) + fileobj.seek(0) # Extract text from PDF # Very interesting reading: # https://dida.do/blog/how-to-extract-text-from-pdf