Skip to content

Commit

Permalink
[WIP] Migrate to PostgreSQL (related to #267) (#280)
Browse files Browse the repository at this point in the history
* feat!: change the code to support postgresql instead of sqlite

* increase corpus name size

* feat: automatically create PostgreSQL database when running manage.py db-create

* fix: fix psycopg2-binary version

* feat: use switch for DBMS-specific calls

* fix: keep naming schema from SQLite development database

* fix: adapt validation to new field size

* fix: use dynamic list length instead of hard-coded value

* fix: fix incorrect number of tokens in new corpus

* feat: add separate PostgreSQL test config

* refactor: use id attribute instead of hard-coded ID

* feat: create test database if it doesn't exist

* fix: PostgreSQL does not autoincrement if fixed id is given

* fix: PostgreSQL's unique constraint message is different from SQLite's

* fix: in PostgreSQL, all fields must be part of GROUP BY clause

* ci: add PostgreSQL tests to CI

* fix: close connection to PostgreSQL database in tearDown

* fix: create database if it does not exist

---------

Co-authored-by: François Ferry <[email protected]>
Co-authored-by: Carine Dengler <[email protected]>
Co-authored-by: Thibault Clérice <[email protected]>
  • Loading branch information
4 people authored Jan 9, 2024
1 parent 2b1b858 commit a2624cd
Show file tree
Hide file tree
Showing 15 changed files with 140 additions and 46 deletions.
21 changes: 20 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,24 @@ jobs:
test:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: [3.8]

test_dbms: [postgresql, sqlite]
services:
postgres:
image: postgres:14-alpine
env:
POSTGRES_USER: pyrrha
POSTGRES_PASSWORD: pyrrha
POSTGRES_DB: data-test
ports:
- 5432:5432
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
Expand All @@ -28,6 +43,10 @@ jobs:
# flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
# flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Set up TEST_DATABASE_URL
run: |
echo "TEST_DATABASE_URL=postgresql://pyrrha:pyrrha@localhost:5432/data-test" >> $GITHUB_ENV
if: matrix.test_dbms == 'postgresql'
- name: Test with nose
run: |
nosetests ./tests --with-coverage --cover-package=app --cover-xml --verbose
Expand Down
3 changes: 3 additions & 0 deletions app/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
WordToken
)
from app.utils.forms import create_input_format_convertion
from sqlalchemy_utils import database_exists, create_database

app = None

Expand Down Expand Up @@ -76,6 +77,8 @@ def db_create():
""" Creates a local database
"""
with app.app_context():
if not database_exists(db.engine.url):
create_database(db.engine.url)
db.create_all()

Role.add_default_roles()
Expand Down
6 changes: 5 additions & 1 deletion app/main/views/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,11 @@ def error():
except (sqlalchemy.exc.StatementError, sqlalchemy.exc.IntegrityError) as e:
db.session.rollback()
flash("The corpus cannot be registered. Check your data", category="error")
if str(e.orig) == "UNIQUE constraint failed: corpus.name":
if db.engine.dialect.name == "postgresql":
unique_constraint = 'duplicate key value violates unique constraint "corpus_name_key"'
else:
unique_constraint = "UNIQUE constraint failed: corpus.name"
if unique_constraint in str(e.orig).lower():
flash("You have already a corpus going by the name {}".format(request.form.get("name")),
category="error")
return error()
Expand Down
8 changes: 4 additions & 4 deletions app/models/control_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,8 @@ class AllowedLemma(db.Model):
:param corpus: ID of the corpus this AllowedLemma is related to
"""
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
label = db.Column(db.String(64), nullable=False)
label_uniform = db.Column(db.String(64))
label = db.Column(db.String(128), nullable=False)
label_uniform = db.Column(db.String(128))
control_list = db.Column(db.Integer, db.ForeignKey('control_lists.id'))

__table_args__ = (
Expand Down Expand Up @@ -354,7 +354,7 @@ class AllowedPOS(db.Model):
:param corpus: ID of the corpus this AllowedPOS is related to
"""
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
label = db.Column(db.String(64))
label = db.Column(db.String(128))
control_list = db.Column(db.Integer, db.ForeignKey('control_lists.id'))

@staticmethod
Expand Down Expand Up @@ -402,7 +402,7 @@ class AllowedMorph(db.Model):
:param control_list: ID of the ControlLists this AllowedMorph is related to
"""
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
label = db.Column(db.String(64))
label = db.Column(db.String(128))
readable = db.Column(db.String(256))
control_list = db.Column(db.Integer, db.ForeignKey('control_lists.id'))

Expand Down
45 changes: 26 additions & 19 deletions app/models/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class Corpus(db.Model):
:type name: str
"""
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
name = db.Column(db.String(64), unique=True)
name = db.Column(db.String(256), unique=True)
context_left = db.Column(db.SmallInteger, default=3)
context_right = db.Column(db.SmallInteger, default=3)
control_lists_id = db.Column(db.Integer, db.ForeignKey('control_lists.id'), nullable=False)
Expand Down Expand Up @@ -132,14 +132,19 @@ def static_has_access(corpus_id, user):
return True

def changes_per_day(self):
if db.engine.dialect.name == "postgresql":
created_on = db.func.to_char(ChangeRecord.created_on, "yyyy-mm-dd")
elif db.engine.dialect.name == "sqlite":
created_on = db.func.strftime("%Y-%m-%d", ChangeRecord.created_on)
return list([
tuple(elem)
for elem in db.session.query(
db.func.count(ChangeRecord.id), db.func.strftime("%Y-%m-%d", ChangeRecord.created_on)
db.func.count(ChangeRecord.id),
created_on
).filter(
ChangeRecord.corpus == self.id
).group_by(
db.func.strftime("%Y-%m-%d", ChangeRecord.created_on)
created_on
).all()
])

Expand Down Expand Up @@ -620,11 +625,11 @@ class WordToken(db.Model):
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
corpus = db.Column(db.Integer, db.ForeignKey('corpus.id', ondelete='CASCADE'))
order_id = db.Column(db.Integer) # Id in the corpus
form = db.Column(db.String(64))
lemma = db.Column(db.String(64))
label_uniform = db.Column(db.String(64))
POS = db.Column(db.String(64))
morph = db.Column(db.String(64))
form = db.Column(db.String(128))
lemma = db.Column(db.String(128))
label_uniform = db.Column(db.String(128))
POS = db.Column(db.String(128))
morph = db.Column(db.String(128))
left_context = db.Column(db.String(512))
right_context = db.Column(db.String(512))

Expand Down Expand Up @@ -956,7 +961,7 @@ def get_like(filter_id, form, group_by, type_like="lemma", allowed_list=False):
)
)
if group_by is True:
return query.group_by(retrieve_fields[0])
return query.group_by(*retrieve_fields)
return query

@staticmethod
Expand Down Expand Up @@ -1085,7 +1090,7 @@ def add_batch(corpus_id, word_tokens_dict, context_left=None, context_right=None
order_id=i+1 # Asked by JB Camps...
)
for k in ("form",):
validate_length(k, wt[k], {"form": 64})
validate_length(k, wt[k], {"form": 128})
tokens.append(wt)

db.session.bulk_insert_mappings(WordToken, tokens)
Expand Down Expand Up @@ -1357,8 +1362,8 @@ class CorpusCustomDictionary(db.Model):

id = db.Column(db.Integer, primary_key=True, autoincrement=True)
corpus = db.Column(db.Integer, db.ForeignKey('corpus.id'), nullable=False)
label = db.Column(db.String(64), nullable=False)
secondary_label = db.Column(db.String(64))
label = db.Column(db.String(128), nullable=False)
secondary_label = db.Column(db.String(128))
category = db.Column(db.String(10), nullable=False)

search_index = db.Index("ccd-search", "corpus", "label", "secondary_label", "category")
Expand Down Expand Up @@ -1449,6 +1454,8 @@ def get_like(corpus_id, form, group_by, category="lemma"):
)
)
if group_by is True:
if db.engine.dialect.name == "postgresql":
return query.group_by(*retrieve_fields)
return query.group_by(retrieve_fields[0])

return query
Expand All @@ -1460,13 +1467,13 @@ class ChangeRecord(db.Model):
corpus = db.Column(db.Integer, db.ForeignKey('corpus.id'))
word_token_id = db.Column(db.Integer, db.ForeignKey('word_token.id'))
user_id = db.Column(db.Integer, db.ForeignKey(User.id))
form = db.Column(db.String(64))
lemma = db.Column(db.String(64))
POS = db.Column(db.String(64))
morph = db.Column(db.String(64), nullable=True)
lemma_new = db.Column(db.String(64))
POS_new = db.Column(db.String(64))
morph_new = db.Column(db.String(64))
form = db.Column(db.String(128))
lemma = db.Column(db.String(128))
POS = db.Column(db.String(128))
morph = db.Column(db.String(128), nullable=True)
lemma_new = db.Column(db.String(128))
POS_new = db.Column(db.String(128))
morph_new = db.Column(db.String(128))
created_on = db.Column(db.DateTime, server_default=db.func.now())
word_token = db.relationship('WordToken', lazy='select', viewonly=True)
user = db.relationship(User, lazy='select', viewonly=True)
Expand Down
1 change: 0 additions & 1 deletion app/models/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,6 @@ def add_default_users():
:return:
"""
default_user = User(
id=1,
first_name="admin",
last_name="admin",
email="[email protected]",
Expand Down
Empty file modified chromedriver.sh
100644 → 100755
Empty file.
19 changes: 13 additions & 6 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,7 @@ class DevelopmentConfig(Config):
DEBUG = True
ASSETS_DEBUG = True
SQLALCHEMY_DATABASE_URI = os.environ.get('DEV_DATABASE_URL') or \
'sqlite:///' + os.path.join(basedir, 'data-dev.sqlite')
#SQLALCHEMY_DATABASE_URI = "postgres://postgres:[email protected]:5432/postgres"
"postgresql://user:pwd@localhost:5432/data-dev"
print('THIS APP IS IN DEBUG MODE. YOU SHOULD NOT SEE THIS IN PRODUCTION.')

# Email
Expand All @@ -75,11 +74,10 @@ class DevelopmentConfig(Config):
BABEL_TRANSLATION_DIRECTORIES = os.path.join(os.path.dirname(__file__), "translations")


class TestConfig(Config):
class BaseTestConfig(Config):
"""Test configuration base class."""
DEBUG = True
ASSETS_DEBUG = True
SQLALCHEMY_DATABASE_URI = os.environ.get('TEST_DATABASE_URL') or \
'sqlite:///' + os.path.join(basedir, 'data-test.sqlite')
print('THIS APP IS IN DEBUG MODE. YOU SHOULD NOT SEE THIS IN PRODUCTION.')

# Disable CSRF for login purpose
Expand All @@ -101,9 +99,18 @@ class TestConfig(Config):
EMAIL_SENDER = '{app_name} Admin <{email}>'.format(app_name=Config.APP_NAME, email=MAIL_USERNAME)


class SQLiteTestConfig(BaseTestConfig):
SQLALCHEMY_DATABASE_URI = os.environ.get('TEST_DATABASE_URL') or \
'sqlite:///' + os.path.join(basedir, 'data-test.sqlite')


class PostgreSQLTestConfig(BaseTestConfig):
SQLALCHEMY_DATABASE_URI = os.environ.get('TEST_DATABASE_URL') or \
'postgresql:///data-test'


config = {
"dev": DevelopmentConfig,
"prod": Config,
"test": TestConfig
"test": PostgreSQLTestConfig if os.environ.get("TEST_DBMS", "sqlite").lower() == "postgresql" else SQLiteTestConfig
}
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
Flask==2.1.2
Jinja2==3.1.2
Werkzeug==2.1.2
psycopg2-binary==2.9.3

# DB
Flask-SQLAlchemy==2.5.1
SQLAlchemy==1.4.36
sqlalchemy-utils==0.38.2

# Forms
Flask-WTF==1.0.1
Expand Down Expand Up @@ -33,4 +35,4 @@ cov-core==1.15.0
nose==1.3.7
selenium>=3.141.0
Flask-Testing==0.8.1
mock==4.0.3
mock==4.0.3
4 changes: 2 additions & 2 deletions tests/db_fixtures/floovant.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,18 @@
from app.models import ControlLists, ControlListsUser


FCL = ControlLists(id=2, name="Floovant")
Floovant = Corpus(
name="Floovant",
id=2,
control_lists_id=2
control_lists_id=FCL.id
)
FloovantColumns = [
Column(heading="Lemma", corpus_id=2),
Column(heading="POS", corpus_id=2),
Column(heading="Morph", corpus_id=2),
Column(heading="Similar", corpus_id=2),
]
FCL = ControlLists(id=2, name="Floovant")
FloovantTokens = [
WordToken(corpus=Floovant.id, form="SOIGNORS", lemma="seignor", left_context="", right_context="or escoutez que",
label_uniform="seignor", morph="NOMB.=p|GENRE=m|CAS=n"),
Expand Down
5 changes: 3 additions & 2 deletions tests/db_fixtures/wauchier.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
from app.models import Corpus, WordToken, AllowedLemma, AllowedPOS, AllowedMorph, Column
from app.models import ControlLists, ControlListsUser


WCL = ControlLists(id=1, name="Wauchier")
Wauchier = Corpus(
name="Wauchier",
id=1,
control_lists_id=1
control_lists_id=WCL.id
)
WauchierColumns = [
Column(heading="Lemma", corpus_id=1),
Column(heading="POS", corpus_id=1),
Column(heading="Morph", corpus_id=1),
Column(heading="Similar", corpus_id=1),
]
WCL = ControlLists(id=1, name="Wauchier")
WauchierTokens = [
WordToken(corpus=Wauchier.id, form="De", lemma="de", POS="PRE", left_context="", right_context="seint Martin mout", label_uniform="de", morph="None"),
WordToken(corpus=Wauchier.id, form="seint", lemma="saint", POS="ADJqua", left_context="De", right_context="Martin mout doit", label_uniform="saint", morph="None"),
Expand Down
8 changes: 6 additions & 2 deletions tests/test_cli/test_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from click.testing import CliRunner
from nose.tools import nottest
from sqlalchemy_utils import database_exists, create_database

from app import create_app, db
from app.cli import make_cli
Expand Down Expand Up @@ -42,6 +43,8 @@ def setUp(self):

# We create all cli to check that it does not overwrite anything
with self.app.app_context():
if not database_exists(db.engine.url):
create_database(db.engine.url)
db.create_all()
db.session.commit()
self.cli = make_cli()
Expand Down Expand Up @@ -321,8 +324,9 @@ def make_test(self, tests, context):
"Context should be right"
)

self.clear_db(self.app)
db.create_all()
self.clear_db(self.app)
with self.app.app_context():
db.create_all()

def test_corpus_from_dir(self):
""" Test that import from a directory works with autogenerated tests"""
Expand Down
Loading

0 comments on commit a2624cd

Please sign in to comment.