Skip to content

Commit

Permalink
Add tools to create mock database for testing
Browse files Browse the repository at this point in the history
  • Loading branch information
albireox committed Feb 12, 2024
1 parent e3a3124 commit ef71767
Show file tree
Hide file tree
Showing 2 changed files with 710 additions and 0 deletions.
100 changes: 100 additions & 0 deletions tests/scripts/create_test_database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# @Author: José Sánchez-Gallego ([email protected])
# @Date: 2024-02-11
# @Filename: create_test_database.py
# @License: BSD 3-clause (http://www.opensource.org/licenses/BSD-3-Clause)

from __future__ import annotations

import pathlib
import subprocess

from sdssdb.peewee.sdss5db import catalogdb

from too import console, log
from too.tools import download_file


BASE_URL = "https://data.sdss5.org/resources/target/mocks/samples/sdss5db_too_test"
CACHE_PATH = pathlib.Path("~/.cache/sdss/too/samples/sdss5db_too_test").expanduser()

DBNAME = "sdss5db_too_test"


def create_test_database(
user: str | None = None,
host: str | None = None,
port: int | None = None,
confirm: bool = True,
):
"""Recreates a simple copy of ``catalogdb`` for testing.
This script creates populates a database called ``sdss5db_too_test`` with
several of the ``catalogdb`` tables, with rows limited to the sample files.
While in principle it should not be possible to affect the production database
as the database names are different, please do not run this script at Utah or
with a tunnel to the production database.
This script requires the database to have been previously created.
Parameters
----------
user
The user to connect to the database. If not provided, the default user
will be used.
host
The host where the database is running. If not provided, the default
host will be used.
port
The port where the database is running. If not provided, the default
port will be used.
"""

log.info("Verifying that the database exists.")

if not catalogdb.database.connect(DBNAME, user=user, host=host, port=port):
raise ConnectionError("Failed connecting to the database.")

catalog_exists = catalogdb.Catalog.table_exists()
if catalog_exists:
raise RuntimeError(
"The table catalog exists. "
"This script can only run on an empty database."
)

files = [
"catalog.csv.gz",
"sdss_id_stacked.csv.gz",
"catalog_to_sdss_dr13_photoobj_primary.csv.gz",
"catalog_to_gaia_dr3_source.csv.gz",
"catalog_to_twomass_psc.csv.gz",
"sdss_dr13_photoobj.csv.gz",
"gaia_dr3_source.csv.gz",
"twomass_psc.csv.gz",
"sdss_id_stacked.csv.gz",
]
for file in files:
if not (CACHE_PATH / file).exists():
log.info(f"File {file!r} not found in cache. Downloading from {BASE_URL}.")
download_file(
f"{BASE_URL}/{file}",
CACHE_PATH,
transient_progress=True,
console=console,
)

log.info("Proceeding with the population of the test database.")
script_path = pathlib.Path(__file__).parent / "sdss5db_too_test.sql"
subprocess.run(
f"psql -d {DBNAME} -f {script_path!s}",
shell=True,
cwd=CACHE_PATH,
)


if __name__ == "__main__":
create_test_database()
Loading

0 comments on commit ef71767

Please sign in to comment.