Skip to content

Commit

Permalink
[FIX] account_invoice_import_simple_pdf: add seek(0) to avoid error "…
Browse files Browse the repository at this point in the history
…empty file"

Add try/except on pypdf text extraction
  • Loading branch information
alexis-via committed Aug 1, 2024
1 parent 1bddcdf commit a5c733c
Showing 1 changed file with 14 additions and 10 deletions.
24 changes: 14 additions & 10 deletions account_invoice_import_simple_pdf/wizard/account_invoice_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,16 +71,19 @@ def _simple_pdf_text_extraction_pymupdf(self, fileobj, test_info):
@api.model
def _simple_pdf_text_extraction_pypdf(self, fileobj, test_info):
res = False
reader = pypdf.PdfReader(fileobj.name)
pages = []
for pdf_page in reader.pages:
pages.append(pdf_page.extract_text())
res = {
"all": "\n\n".join(pages),
"first": pages and pages[0] or "",
}
test_info["text_extraction"] = "pypdf %s" % pypdf.__version__
logger.info("Text extraction made with pypdf %s", pypdf.__version__)
try:
reader = pypdf.PdfReader(fileobj.name)
pages = []
for pdf_page in reader.pages:
pages.append(pdf_page.extract_text())
res = {
"all": "\n\n".join(pages),
"first": pages and pages[0] or "",
}
test_info["text_extraction"] = "pypdf %s" % pypdf.__version__
logger.info("Text extraction made with pypdf %s", pypdf.__version__)
except Exception as e:
logger.warning("Text extraction with pypdf failed. Error: %s", e)

Check warning on line 86 in account_invoice_import_simple_pdf/wizard/account_invoice_import.py

View check run for this annotation

Codecov / codecov/patch

account_invoice_import_simple_pdf/wizard/account_invoice_import.py#L85-L86

Added lines #L85 - L86 were not covered by tests
return res

@api.model
Expand Down Expand Up @@ -184,6 +187,7 @@ def simple_pdf_text_extraction(self, file_data, test_info):
"wb", prefix="odoo-simple-pdf-", suffix=".pdf"
) as fileobj:
fileobj.write(file_data)
fileobj.seek(0)
# Extract text from PDF
# Very interesting reading:
# https://dida.do/blog/how-to-extract-text-from-pdf
Expand Down

0 comments on commit a5c733c

Please sign in to comment.