From a5c733c09200b69cd2952fe0dd57ccb3566acaf0 Mon Sep 17 00:00:00 2001
From: Alexis de Lattre <alexis.delattre@akretion.com>
Date: Thu, 1 Aug 2024 21:43:09 +0000
Subject: [PATCH] [FIX] account_invoice_import_simple_pdf: add seek(0) to avoid
 error "empty file"

Add try/except on pypdf text extraction
---
 .../wizard/account_invoice_import.py          | 24 +++++++++++--------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/account_invoice_import_simple_pdf/wizard/account_invoice_import.py b/account_invoice_import_simple_pdf/wizard/account_invoice_import.py
index fd0f904e06..8d103e2cfa 100644
--- a/account_invoice_import_simple_pdf/wizard/account_invoice_import.py
+++ b/account_invoice_import_simple_pdf/wizard/account_invoice_import.py
@@ -71,16 +71,19 @@ def _simple_pdf_text_extraction_pymupdf(self, fileobj, test_info):
     @api.model
     def _simple_pdf_text_extraction_pypdf(self, fileobj, test_info):
         res = False
-        reader = pypdf.PdfReader(fileobj.name)
-        pages = []
-        for pdf_page in reader.pages:
-            pages.append(pdf_page.extract_text())
-            res = {
-                "all": "\n\n".join(pages),
-                "first": pages and pages[0] or "",
-            }
-        test_info["text_extraction"] = "pypdf %s" % pypdf.__version__
-        logger.info("Text extraction made with pypdf %s", pypdf.__version__)
+        try:
+            reader = pypdf.PdfReader(fileobj.name)
+            pages = []
+            for pdf_page in reader.pages:
+                pages.append(pdf_page.extract_text())
+                res = {
+                    "all": "\n\n".join(pages),
+                    "first": pages and pages[0] or "",
+                }
+            test_info["text_extraction"] = "pypdf %s" % pypdf.__version__
+            logger.info("Text extraction made with pypdf %s", pypdf.__version__)
+        except Exception as e:
+            logger.warning("Text extraction with pypdf failed. Error: %s", e)
         return res
 
     @api.model
@@ -184,6 +187,7 @@ def simple_pdf_text_extraction(self, file_data, test_info):
             "wb", prefix="odoo-simple-pdf-", suffix=".pdf"
         ) as fileobj:
             fileobj.write(file_data)
+            fileobj.seek(0)
             # Extract text from PDF
             # Very interesting reading:
             # https://dida.do/blog/how-to-extract-text-from-pdf