diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 64918a8..6a16ea3 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -23,12 +23,22 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + - name: Shutdown Ubuntu MySQL (SUDO) + run: sudo service mysql stop + - name: set up mysql + uses: mirromutth/mysql-action@v1.1 + with: + mysql version: '9.0.1' + mysql database: 'email_donations' + mysql user: 'donor' + mysql password: ${{ secrets.MYSQL_PASSWORD }} - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install -r requirements.txt - name: run base tests env: + MYSQL_PASSWORD: ${{ secrets.MYSQL_PASSWORD }} FLASK_SECRET_KEY: ${{ secrets.FLASK_SECRET_KEY }} run: | cd src/app diff --git a/Dockerfile b/Dockerfile index 14486ef..481fe14 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,6 +5,4 @@ COPY ./src/app /app COPY ./requirements.txt /app RUN pip install --upgrade pip -RUN pip install -r requirements.txt - -ENV FLASK_SECRET_KEY "sdfk" \ No newline at end of file +RUN pip install -r requirements.txt \ No newline at end of file diff --git a/README.md b/README.md index 004d322..35c6778 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,10 @@ # donation-webserver Webserver for email donation + +# Install [mysql-client](https://github.com/PyMySQL/mysqlclient) dependencies + +``` +sudo apt-get install python3-dev default-libmysqlclient-dev build-essential pkg-config + + +``` diff --git a/docker-compose.yml b/docker-compose.yml index 34e1c54..1ad3732 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,6 +9,24 @@ services: - ./src/nginx/keys/:/etc/nginx/ssl/:ro webapp: build: . + env_file: + - flask.env command: gunicorn --bind 0.0.0.0:8000 wsgi:app expose: - "8000" + db: + image: mysql:9.0.1 + restart: always + env_file: + - db.env + command: --pid-file /var/lib/mysql/mysqld.pid + ports: + - '3306:3306' + expose: + - '3306' + volumes: + - my-db:/var/lib/mysql + +volumes: + my-db: + diff --git a/requirements.txt b/requirements.txt index eaa1397..95a0c35 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,9 @@ flask flask-sqlalchemy sqlalchemy +sqlalchemy-utils +cryptography +mysqlclient gunicorn pytest pytest-cov \ No newline at end of file diff --git a/src/app/website/__init__.py b/src/app/website/__init__.py index 0ce11af..1ea8f3c 100644 --- a/src/app/website/__init__.py +++ b/src/app/website/__init__.py @@ -1,10 +1,10 @@ from flask import Flask from flask_sqlalchemy import SQLAlchemy from sqlalchemy.orm import DeclarativeBase -from os import path - -DB_NAME = "email_donations.db" +import os +DB_NAME = "email_donations" +PASSWD = os.getenv("MYSQL_PASSWORD") class Base(DeclarativeBase): pass @@ -17,7 +17,8 @@ def create_app(): app = Flask(__name__) app.config.from_prefixed_env() # reads the key from FLASK_SECRET_KEY env var - app.config["SQLALCHEMY_DATABASE_URI"] = f"sqlite:///{DB_NAME}" + # app.config['SQLALCHEMY_DATABASE_URI'] = f'mysql://donor:{PASSWD}@localhost/{DB_NAME}' + app.config['SQLALCHEMY_DATABASE_URI'] = f'mysql://donor:{PASSWD}@127.0.0.1/{DB_NAME}' db.init_app(app) from .views import views @@ -35,8 +36,3 @@ def create_app(): return app - -def create_database(app): - if not path.exists("website/" + DB_NAME): - db.create_all(app=app) - print("Created Database!") diff --git a/src/app/website/donate.py b/src/app/website/donate.py index 7719ce2..f4c6d86 100644 --- a/src/app/website/donate.py +++ b/src/app/website/donate.py @@ -1,11 +1,11 @@ from flask import Blueprint, render_template, request, flash, redirect, url_for from werkzeug.security import generate_password_hash +from sqlalchemy import func, cast, VARBINARY from .models import RawData from . import db donate = Blueprint("donate", __name__) - @donate.route("/donation", methods=["GET", "POST"]) def donation(): if request.method == "GET": @@ -18,14 +18,18 @@ def donation(): else: # at the moment we are generating the hash checksum for the raw text new_submission = RawData( - donation=text, - checksum=generate_password_hash(text, method="pbkdf2:sha256"), + donation=text + # checksum=generate_password_hash(text, method="pbkdf2:sha256"), ) # add to db db.session.add(new_submission) # make commit to db db.session.commit() flash("Text input received", category="success") + # results = db.session.query(RawData).filter_by( + # donation='text').all() + # for result in results: + # print(f"ID: {result.donor_id}, Donation: {result.donation}") # redirect to homepage return redirect(url_for("views.home")) diff --git a/src/app/website/models.py b/src/app/website/models.py index 6af6f8b..933c474 100644 --- a/src/app/website/models.py +++ b/src/app/website/models.py @@ -1,35 +1,66 @@ from sqlalchemy.orm import Mapped, mapped_column, relationship -from sqlalchemy import Integer, String, DateTime, ForeignKey - -# import hashlib -from sqlalchemy.sql import func +from sqlalchemy import Integer, String, DateTime, ForeignKey, LargeBinary, type_coerce, Unicode, BLOB +from sqlalchemy_utils import StringEncryptedType +from sqlalchemy_utils.types.encrypted.encrypted_type import AesGcmEngine, AesEngine +from sqlalchemy.dialects.mysql import VARBINARY, CHAR +from sqlalchemy.types import TypeDecorator +from sqlalchemy.sql import func, cast import datetime +import cryptography from typing import List from . import db +secret_key = "1234" + +class EncType(TypeDecorator): + impl = LargeBinary + + def bind_expression(self, bindvalue): + return func.aes_encrypt( + type_coerce(bindvalue, CHAR()), func.unhex(func.sha2(secret_key, 512)), + ) + + def column_expression(self, col): + return cast( + func.aes_decrypt(col, func.unhex(func.sha2(secret_key, 512)),), + CHAR(charset="utf8"), + ) + + # the raw data model class RawData(db.Model): # the submission id donor_id: Mapped[int] = mapped_column(primary_key=True) # should this be the donated data as zip? - donation: Mapped[str] = mapped_column(String, nullable=True) + # use all the emails as string and encrypt + # but somewhere we need to store the blob of the zip file + # here we could encrypt the whole column as it is never touched again + # if emails contain large attachements, could this overflow the database? + # donation: Mapped[str] = mapped_column(StringEncryptedType( + # VARBINARY(5000), + # secret_key, + # AesGcmEngine, + # AesEngine, + # 'pkcs5', + # length=5000), nullable=True) + donation: Mapped[str] = mapped_column(EncType, nullable=True) # the hash checksum of the donation zip file, for example SHA-256 - # could also be SHA-3 - # Compute SHA-256 hash - # sha256_hash = hashlib.sha256(data).hexdigest() - checksum: Mapped[str] = mapped_column(String, nullable=True) + checksum: Mapped[str] = mapped_column(String(128), nullable=True) # Now the metadata # the date of the donation date: Mapped[datetime.datetime] = mapped_column( DateTime(timezone=True), server_default=func.now(), nullable=False ) # the email of the donor - email: Mapped[str] = mapped_column(String, nullable=True) + # email goes into different model for newsletter + # email: Mapped[str] = mapped_column(String(500), nullable=True) + # donor consent form + consent: Mapped[bool] = mapped_column(Integer, nullable=True) # the age group of the donor in categories age: Mapped[int] = mapped_column(Integer, nullable=True) # the region of the donor in categories - region: Mapped[int] = mapped_column(String, nullable=True) + region: Mapped[int] = mapped_column(String(500), nullable=True) # the gender of the donor in categories gender: Mapped[int] = mapped_column(Integer, nullable=True) # if the emails are in the mother tongue of the donor @@ -44,14 +75,29 @@ class ProcessedData(db.Model): # the submission id id: Mapped[int] = mapped_column(Integer, primary_key=True) # the raw email text - raw_email: Mapped[str] = mapped_column(String, nullable=False) + raw_email: Mapped[str] = mapped_column(String(5000), nullable=False) # the processed pseudonymized email text - processed_email: Mapped[str] = mapped_column(String, nullable=False) + processed_email: Mapped[str] = mapped_column(String(5000), nullable=False) # the date of the processing date: Mapped[datetime.datetime] = mapped_column( DateTime(timezone=True), default=func.now(), nullable=False ) + # date the email was sent + date_sent: Mapped[datetime.datetime] = mapped_column( + DateTime(timezone=True), default=func.now(), nullable=False + ) + # if attachments were included + attachments: Mapped[bool] = mapped_column(Integer, nullable=False) + # type of the attachements + attachment_type: Mapped[str] = mapped_column(String(50), nullable=False) # the language of the email - language: Mapped[str] = mapped_column(String, nullable=False) + language: Mapped[str] = mapped_column(String(50), nullable=False) # the original donation id, one to many relationship donation_id: Mapped[int] = mapped_column(ForeignKey("raw_data.donor_id")) + +class InformantList(db.Model): + # the submission id + id: Mapped[int] = mapped_column(Integer, primary_key=True) + # the informant email + # should this be encrypted? + informant_email: Mapped[str] = mapped_column(String(500), nullable=False)