Skip to content

Commit

Permalink
Fix docx upload with content type (SEA-1476).
Browse files Browse the repository at this point in the history
  • Loading branch information
cyrillkuettel committed Aug 29, 2024
1 parent 1948fe9 commit 7e9c87c
Show file tree
Hide file tree
Showing 19 changed files with 189 additions and 268 deletions.
1 change: 0 additions & 1 deletion src/privatim/cli/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ def add_user(
click.echo("First name and last name are required.")
return


user = User(email=email, first_name=first_name, last_name=last_name)
user.generate_profile_picture(dbsession)
user.set_password(password)
Expand Down
4 changes: 4 additions & 0 deletions src/privatim/flash.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from typing import TYPE_CHECKING

from privatim.i18n import translate

if TYPE_CHECKING:
from pyramid.interfaces import IRequest
from typing import Literal
Expand All @@ -21,6 +24,7 @@ def __init__(self, request: 'IRequest') -> None:
def add(self, message: str, typ: 'MessageType' = 'info') -> None:
if typ == 'error':
typ = 'danger'
message = translate(message)
self._request.session.flash({'type': typ, 'message': message})

def pop(self) -> list[dict[str, str]]:
Expand Down
9 changes: 7 additions & 2 deletions src/privatim/forms/fields/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,12 @@ def create(self) -> SearchableFile | None:
assert self.file is not None
self.file.seek(0)
assert self.filename is not None
return SearchableFile(filename=self.filename, content=self.file.read())

return SearchableFile(
filename=self.filename,
content=self.file.read(),
content_type=self.data['mimetype'] if self.data else None,
)

def populate_obj(self, obj: object, name: str) -> None:

Expand Down Expand Up @@ -528,7 +533,7 @@ def process_data(self, value: SearchableFile | None) -> None:
self.data = {
'filename': value.filename,
'size': size,
'mimetype': value.content_type
'mimetype': value.file.content_type
}
else:
super().process_data(value)
Expand Down
9 changes: 9 additions & 0 deletions src/privatim/forms/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,15 @@
r'^(?=.{8,})(?=.*[a-z])(?=.*[A-Z])(?=.*[\d])(?=.*[\W]).*$'
)

DEFAULT_DOCX_MIME = ('application/vnd.openxmlformats-officedocument'
'.wordprocessingml.document')
word_mimetypes = {
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
'application/vnd.ms-word.document.macroEnabled.12',
'application/vnd.ms-word.template.macroEnabled.12',
}


class FileSizeLimit:
""" Makes sure an uploaded file is not bigger than the given number of
Expand Down
Binary file modified src/privatim/locale/de/LC_MESSAGES/privatim.mo
Binary file not shown.
22 changes: 13 additions & 9 deletions src/privatim/locale/de/LC_MESSAGES/privatim.po
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE 1.0\n"
"POT-Creation-Date: 2024-08-28 10:51+0200\n"
"POT-Creation-Date: 2024-08-29 15:52+0200\n"
"PO-Revision-Date: 2024-05-21 21:20+0200\n"
"Last-Translator: cyrill <[email protected]>\n"
"Language-Team: German <[email protected]>\n"
Expand Down Expand Up @@ -75,7 +75,7 @@ msgstr "Löschen"
msgid "Comments"
msgstr "Kommentare"

#: src/privatim/layouts/macros.pt
#: src/privatim/layouts/macros.pt src/privatim/views/consultations.py
#: src/privatim/views/templates/search_results.pt
msgid "Deleted User"
msgstr "Gelöschter Benutzer"
Expand Down Expand Up @@ -391,7 +391,7 @@ msgid ""
"Password must have minimal length of 8 characters, contain one upper case "
"letter, one lower case letter, one digit and one special character."
msgstr ""
"Das Passwort muss mindestens 8 Zeichen lang sein und einen Großbuchstaben, "
"Das Passwort muss mindestens 8 Zeichen lang sein und einen Grossbuchstaben, "
"einen Kleinbuchstaben, eine Ziffer und ein Sonderzeichen enthalten."

#: src/privatim/views/comment.py
Expand Down Expand Up @@ -483,8 +483,9 @@ msgid "Save"
msgstr "Speichern"

#: src/privatim/views/templates/consultations.pt
msgid "Unknown Creator"
msgstr "Unbekannter Ersteller"
#: src/privatim/views/templates/activities.pt
msgid "by"
msgstr "von "

#: src/privatim/views/templates/meeting.pt
msgid "Back to Meetings"
Expand Down Expand Up @@ -624,10 +625,6 @@ msgstr "Sitzung geplant"
msgid "Comment Added"
msgstr "Kommentar hinzugefügt"

#: src/privatim/views/templates/activities.pt
msgid "by"
msgstr "von "

#: src/privatim/views/templates/activities.pt
#: src/privatim/forms/agenda_item_form.py
msgid "Date:"
Expand Down Expand Up @@ -1028,3 +1025,10 @@ msgstr "Gremium:"
#: src/privatim/reporting/template/report.pt
msgid "Attendees:"
msgstr "Teilnehmende:"

#, python-format
#~ msgid "Successfully deleted user: ${full_name}."
#~ msgstr "Benutzer ${full_name} erfolgreich gelöscht."

#~ msgid "Unknown Creator"
#~ msgstr "Unbekannter Ersteller"
Binary file modified src/privatim/locale/fr/LC_MESSAGES/privatim.mo
Binary file not shown.
25 changes: 15 additions & 10 deletions src/privatim/locale/fr/LC_MESSAGES/privatim.po
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE 1.0\n"
"POT-Creation-Date: 2024-08-28 10:51+0200\n"
"POT-Creation-Date: 2024-08-29 15:52+0200\n"
"PO-Revision-Date: 2024-04-11 15:53+0200\n"
"Last-Translator: cyrill <[email protected]>\n"
"Language-Team: French <[email protected]>\n"
Expand Down Expand Up @@ -72,7 +72,7 @@ msgstr "Supprimer"
msgid "Comments"
msgstr "Commentaires"

#: src/privatim/layouts/macros.pt
#: src/privatim/layouts/macros.pt src/privatim/views/consultations.py
#: src/privatim/views/templates/search_results.pt
msgid "Deleted User"
msgstr "Utilisateur supprimé"
Expand Down Expand Up @@ -294,8 +294,9 @@ msgstr "Liste des personnes"
msgid ""
"Successfully added user ${first_name} ${last_name}.An email has been sent to "
"the requested account with further information."
msgstr "L'utilisateur ${first_name} ${last_name} a été ajouté avec succès. "
"Un e-mail a été envoyé au compte demandé avec des informations complémentaires. "
msgstr ""
"L'utilisateur ${first_name} ${last_name} a été ajouté avec succès. Un e-mail "
"a été envoyé au compte demandé avec des informations complémentaires. "

#: src/privatim/views/people.py src/privatim/views/templates/people.pt
msgid "Add User"
Expand Down Expand Up @@ -481,8 +482,9 @@ msgid "Save"
msgstr "Sauver"

#: src/privatim/views/templates/consultations.pt
msgid "Unknown Creator"
msgstr "Créateur inconnu"
#: src/privatim/views/templates/activities.pt
msgid "by"
msgstr "de "

#: src/privatim/views/templates/meeting.pt
msgid "Back to Meetings"
Expand Down Expand Up @@ -621,10 +623,6 @@ msgstr "Réunion prévue"
msgid "Comment Added"
msgstr "Commentaire ajouté"

#: src/privatim/views/templates/activities.pt
msgid "by"
msgstr "de "

#: src/privatim/views/templates/activities.pt
#: src/privatim/forms/agenda_item_form.py
msgid "Date:"
Expand Down Expand Up @@ -1023,3 +1021,10 @@ msgstr "Comité:"
#: src/privatim/reporting/template/report.pt
msgid "Attendees:"
msgstr "Participants:"

#, python-format
#~ msgid "Successfully deleted user: ${full_name}."
#~ msgstr "Utilisateur supprimé avec succès: ${full_name}."

#~ msgid "Unknown Creator"
#~ msgstr "Créateur inconnu"
11 changes: 4 additions & 7 deletions src/privatim/locale/privatim.pot
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE 1.0\n"
"POT-Creation-Date: 2024-08-28 10:51+0200\n"
"POT-Creation-Date: 2024-08-29 15:52+0200\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <[email protected]>\n"
Expand Down Expand Up @@ -74,7 +74,7 @@ msgstr ""
msgid "Comments"
msgstr ""

#: ./src/privatim/layouts/macros.pt
#: ./src/privatim/layouts/macros.pt ./src/privatim/views/consultations.py
#: ./src/privatim/views/templates/search_results.pt
msgid "Deleted User"
msgstr ""
Expand Down Expand Up @@ -471,7 +471,8 @@ msgid "Save"
msgstr ""

#: ./src/privatim/views/templates/consultations.pt
msgid "Unknown Creator"
#: ./src/privatim/views/templates/activities.pt
msgid "by"
msgstr ""

#: ./src/privatim/views/templates/meeting.pt
Expand Down Expand Up @@ -612,10 +613,6 @@ msgstr ""
msgid "Comment Added"
msgstr ""

#: ./src/privatim/views/templates/activities.pt
msgid "by"
msgstr ""

#: ./src/privatim/views/templates/activities.pt
#: ./src/privatim/forms/agenda_item_form.py
msgid "Date:"
Expand Down
86 changes: 66 additions & 20 deletions src/privatim/models/file.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import uuid
from io import BytesIO
import logging

import magic
from sqlalchemy.dialects.postgresql import TSVECTOR
from sqlalchemy_file import File
from sqlalchemy.orm import (
Expand All @@ -11,14 +13,18 @@
declared_attr,
)

from privatim.forms.validators import word_mimetypes, DEFAULT_DOCX_MIME
from privatim.models.soft_delete import SoftDeleteMixin
from privatim.models.utils import extract_pdf_info, word_count, get_docx_text
from privatim.orm.uuid_type import UUIDStr as UUIDStrType
from privatim.orm.abstract import AbstractFile
from sqlalchemy import Text, Integer, ForeignKey, Computed, Index


from typing import TYPE_CHECKING
logger = logging.getLogger('privatim.models.file')


from typing import TYPE_CHECKING # noqa:E402
if TYPE_CHECKING:
from privatim.models import Consultation

Expand Down Expand Up @@ -71,6 +77,13 @@ class SearchableFile(AbstractFile, SoftDeleteMixin):
nullable=True
)

@property
def content_type(self) -> str:
if self.file:
return self.file.content_type
else:
return ''

@declared_attr # type:ignore[arg-type]
def __table_args__(cls) -> tuple[Index, ...]:
return (
Expand All @@ -92,44 +105,77 @@ def __init__(
self.id = str(uuid.uuid4())
self.filename = filename

content_type = self.maybe_handle_octet_stream(
content, content_type, filename
)

if content_type is None:
content_type = self.get_content_type(filename)
content_type = self.get_content_type(content)

if content_type == 'application/pdf':
pages, extract = extract_pdf_info(BytesIO(content))
self.extract = (extract or '').strip()
self.pages_count = pages
self.word_count = word_count(extract)
elif content_type.startswith(
'application/vnd.openxmlformats-officedocument.wordprocessingml'
):
docx_text = get_docx_text(BytesIO(content))
self.extract = (docx_text or '').strip()
elif content_type in word_mimetypes:
self.extract = (get_docx_text(BytesIO(content)) or '').strip()
elif content_type == 'text/plain':
self.extract = content.decode('utf-8').strip()
self.pages_count = None # Not applicable for text files
self.word_count = word_count(content.decode('utf-8'))
elif content_type == 'application/octet-stream':
self.extract = content.decode('utf-8').strip()
self.pages_count = None # Not applicable for text files
self.word_count = word_count(content.decode('utf-8'))
else:
raise ValueError(f'Unsupported file type: {self.content_type}')
logger.info(f'Unsupported file type: {content_type}')
raise ValueError(f'Unsupported file type: {content_type}')

self.file = File(
content=content,
filename=filename,
content_type=content_type,
)

def maybe_handle_octet_stream(
self,
content: bytes,
content_type: str | None,
filename: str
) -> str | None:
""" Tries to determine the actual file if the content type is
advertised by the request is 'application/octet-stream'. """
if content_type is None:
return None

if content_type and content_type == 'application/octet-stream':
logger.info(
f'Got octet-stream from form file upload.'
f'Filename' f'={filename}'
)
# Saw this happen with a docx if uploading in field 'additional
# file'
content_type = self.get_content_type(content)

# Fallback to filename guess:
if content_type == 'application/octet-stream':
extension = filename.lower().split('.')[-1]
if extension == 'pdf':
content_type = 'application/pdf'
elif extension in ['docx', 'doc', 'docm']:
content_type = DEFAULT_DOCX_MIME
elif extension == 'txt':
content_type = 'text/plain'
else:
raise ValueError(f'Unsupported file type: {extension}')
return content_type

@staticmethod
def get_content_type(filename: str) -> str:
def get_content_type(content: bytes) -> str:
"""
Determine the content type based on the file extension.
Determine the content type of a file using libmagic.
"""
extension = filename.lower().split('.')[-1]
if extension == 'pdf':
return 'application/pdf'
elif extension in ['docx', 'doc', 'docm']:
return ('application/vnd.openxmlformats-officedocument'
'.multiprocessing.document')
elif extension == 'txt':
return 'text/plain'
else:
raise ValueError(f'Unsupported file type: {extension}')

mime = magic.Magic(mime=True)
file_type = mime.from_buffer(content)
return file_type
4 changes: 0 additions & 4 deletions src/privatim/orm/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,6 @@ def __init__(self, filename: str, content: bytes) -> None:
def content(self) -> bytes:
return self.file.file.read()

@property
def content_type(self) -> str:
return self.file.content_type

def __acl__(self) -> list['ACL']:
return [
(Allow, Authenticated, ['view']),
Expand Down
5 changes: 2 additions & 3 deletions src/privatim/views/consultations.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,8 @@ def consultations_view(request: 'IRequest') -> 'RenderData':
'creator_pic_id': _cons.creator.picture.id if _cons.creator else
None,
'title': Markup(_cons.title),
'creator': _cons.creator,
'has_creator': _cons.creator is not None,
'fullname': _cons.creator.fullname if _cons.creator else None,
'display_name': _cons.creator.fullname if _cons.creator
else _('Deleted User'),
'description': Markup(_cons.description),
'created': _cons.created
} for _cons in session.scalars(stmt).unique().all()
Expand Down
3 changes: 2 additions & 1 deletion src/privatim/views/password_retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ def mail_retrieval(email: str, request: 'IRequest') -> None:
'action_url': request.route_url(
'password_change',
_query={'token': token_obj.token}
)
),
'product_name': 'privatim',
},
tag='password-reset',
)
Expand Down
Loading

0 comments on commit 7e9c87c

Please sign in to comment.