Skip to content

Commit

Permalink
Support customizing DATA_PATH (#78)
Browse files Browse the repository at this point in the history
  • Loading branch information
my8100 authored Aug 3, 2019
1 parent 0d2cf56 commit bd1c71f
Show file tree
Hide file tree
Showing 10 changed files with 98 additions and 50 deletions.
75 changes: 45 additions & 30 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,16 @@ jobs:
use-git:
type: boolean
default: false
use-mysql:
set-data-path:
type: boolean
default: false
use-sqlite:
type: boolean
default: false
use-postgresql:
type: boolean
default: false
use-sqlite:
use-mysql:
type: boolean
default: false
steps:
Expand Down Expand Up @@ -60,6 +63,35 @@ jobs:
command: |
python3 -m venv venv
- when:
condition: <<parameters.set-data-path>>
steps:
- run:
name: Set DATABASE_URL to sqlite
command: |
printf "\nDATA_PATH = '"$DATA_PATH"'\n" >> scrapydweb_settings_v8.py
- when:
condition: <<parameters.use-sqlite>>
steps:
- run:
name: Set DATABASE_URL to sqlite
command: |
printf "\nDATABASE_URL = '"$DATABASE_URL"'\n" >> scrapydweb_settings_v8.py
- when:
condition: <<parameters.use-postgresql>>
steps:
- run:
name: Setup PSQL Databases
command: |
# https://discuss.circleci.com/t/multiple-postgres-databases-in-circleci-2-0/23089
# createdb: could not connect to database template1: FATAL: role "circleci" does not exist
# sudo apt install -y postgresql-client
# createdb -h localhost scrapydweb_apscheduler -O circleci
- run:
name: Set DATABASE_URL to postgresql
command: |
# postgres://[email protected]:5432
printf "\nDATABASE_URL = '"$DATABASE_URL"'\n" >> scrapydweb_settings_v8.py
- when:
condition: <<parameters.use-mysql>>
steps:
Expand Down Expand Up @@ -90,31 +122,7 @@ jobs:
command: |
# mysql://user:[email protected]:3306
printf "\nDATABASE_URL = '"$DATABASE_URL"'\n" >> scrapydweb_settings_v8.py
cat scrapydweb_settings_v8.py
- when:
condition: <<parameters.use-postgresql>>
steps:
- run:
name: Setup PSQL Databases
command: |
# https://discuss.circleci.com/t/multiple-postgres-databases-in-circleci-2-0/23089
# createdb: could not connect to database template1: FATAL: role "circleci" does not exist
# sudo apt install -y postgresql-client
# createdb -h localhost scrapydweb_apscheduler -O circleci
- run:
name: Set DATABASE_URL to postgresql
command: |
# postgres://[email protected]:5432
printf "\nDATABASE_URL = '"$DATABASE_URL"'\n" >> scrapydweb_settings_v8.py
cat scrapydweb_settings_v8.py
- when:
condition: <<parameters.use-sqlite>>
steps:
- run:
name: Set DATABASE_URL to sqlite
command: |
printf "\nDATABASE_URL = '"$DATABASE_URL"'\n" >> scrapydweb_settings_v8.py
cat scrapydweb_settings_v8.py
- run:
name: Install dependencies
command: |
Expand Down Expand Up @@ -160,13 +168,16 @@ jobs:
- run:
name: Generate report
command: |
touch scrapydweb_settings_v8.py
cat scrapydweb_settings_v8.py
echo $DATA_PATH
echo $DATABASE_URL
. venv/bin/activate
coverage report
coverage html
coverage xml
coveralls
ls -la
coveralls
- store_artifacts:
path: htmlcov
- store_artifacts:
Expand All @@ -184,6 +195,7 @@ jobs:
- image: circleci/python:2.7
environment:
SCRAPYDWEB_TESTMODE: True
DATA_PATH: '/home/circleci/repo/scrapydweb_data'
DATABASE_URL: 'sqlite:////home/circleci/repo/scrapydweb_database'
py27-postgresql:
<<: *test-template
Expand Down Expand Up @@ -224,6 +236,7 @@ jobs:
- image: circleci/python:3.6
environment:
SCRAPYDWEB_TESTMODE: True
DATA_PATH: '/home/circleci/repo/scrapydweb_data'
DATABASE_URL: 'sqlite:////home/circleci/repo/scrapydweb_database'
py37-git-postgresql:
<<: *test-template
Expand Down Expand Up @@ -266,6 +279,7 @@ workflows:
is-py27: true
- py27-sqlite:
is-py27: true
set-data-path: true
use-sqlite: true
- py27-postgresql:
is-py27: true
Expand All @@ -274,9 +288,10 @@ workflows:
is-py27: true
use-mysql: true

- py36-sqlite:
use-postgresql: true
- py37
- py36-sqlite:
set-data-path: true
use-sqlite: true
- py37-git-postgresql:
use-git: true
use-postgresql: true
Expand Down
8 changes: 6 additions & 2 deletions scrapydweb/default_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,13 +309,17 @@
# for getting more information about how ScrapydWeb works, especially while debugging.
VERBOSE = False

# The default is '', which means saving data of Jobs and Timer Tasks in the Python directory using SQLite.
# The default is '', which means saving all program data in the Python directory.
# e.g. 'C:/Users/username/scrapydweb_data' or '/home/username/scrapydweb_data'
DATA_PATH = ''

# The default is '', which means saving data of Jobs and Timer Tasks in DATA_PATH using SQLite.
# The data could be also saved in MySQL or PostgreSQL backend in order to improve concurrency.
# To use MySQL backend, run command: pip install --upgrade pymysql
# To use PostgreSQL backend, run command: pip install --upgrade psycopg2
# e.g.
# 'mysql://username:[email protected]:3306'
# 'postgres://username:[email protected]:5432'
# 'sqlite:///c:/Users/username'
# 'sqlite:///C:/Users/username'
# 'sqlite:////home/username'
DATABASE_URL = ''
4 changes: 4 additions & 0 deletions scrapydweb/templates/scrapydweb/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,10 @@ <h3>System</h3>
<ul class="collapse">
<li><div class="title"><h4>DEBUG = {{ DEBUG }}</h4></div></li>
<li><div class="title"><h4>VERBOSE = {{ VERBOSE }}</h4></div></li>
<li>
<div class="title"><h4>DATA_PATH</h4><i class="iconfont icon-right"></i></div>
<pre>{{ DATA_PATH }}</pre>
</li>
<li>
<div class="title"><h4>DATABASE</h4><i class="iconfont icon-right"></i></div>
<pre>{{ database_details }}</pre>
Expand Down
1 change: 1 addition & 0 deletions scrapydweb/utils/check_app_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ def check_assert(key, default, is_instance, allow_zero=True, non_empty=False, co
# logging.getLogger('apscheduler').setLevel(logging.DEBUG)
# else:
# logging.getLogger('apscheduler').setLevel(logging.WARNING)
check_assert('DATA_PATH', '', str)
check_assert('DATABASE_URL', '', str)
database_url = config.get('DATABASE_URL', '')
if database_url:
Expand Down
2 changes: 1 addition & 1 deletion scrapydweb/utils/setup_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def setup_database(database_url, database_path):
database_path = os.path.abspath(database_path)
database_path = re.sub(r'\\', '/', database_path)
database_path = re.sub(r'/$', '', database_path)
if not os.path.exists(database_path):
if not os.path.isdir(database_path):
os.mkdir(database_path)

if m_mysql or m_postgres:
Expand Down
16 changes: 13 additions & 3 deletions scrapydweb/vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from apscheduler.schedulers.base import STATE_PAUSED, STATE_RUNNING, STATE_STOPPED

from .default_settings import DATA_PATH as default_data_path
from .default_settings import DATABASE_URL as default_database_url
from .utils.setup_database import setup_database

Expand All @@ -18,14 +19,23 @@
try:
custom_settings_module = importlib.import_module(os.path.splitext(SCRAPYDWEB_SETTINGS_PY)[0])
except ImportError:
custom_data_path = ''
custom_database_url = ''
else:
custom_data_path = getattr(custom_settings_module, 'DATA_PATH', '')
custom_data_path = custom_data_path if isinstance(custom_data_path, str) else ''
custom_database_url = getattr(custom_settings_module, 'DATABASE_URL', '')
custom_database_url = custom_database_url if isinstance(custom_database_url, str) else ''

# For data path
# For data storage
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_PATH = os.path.join(ROOT_DIR, 'data')

DATA_PATH = default_data_path or custom_data_path
if DATA_PATH:
DATA_PATH = os.path.abspath(DATA_PATH)
else:
DATA_PATH = os.path.join(ROOT_DIR, 'data')

DATABASE_PATH = os.path.join(DATA_PATH, 'database')
DEMO_PROJECTS_PATH = os.path.join(DATA_PATH, 'demo_projects')
DEPLOY_PATH = os.path.join(DATA_PATH, 'deploy')
Expand All @@ -47,7 +57,7 @@
TIMER_TASKS_HISTORY_LOG = os.path.join(HISTORY_LOG, 'timer_tasks_history.log')

# For database
DATABASE_URL = custom_database_url or default_database_url or 'sqlite:///' + DATA_PATH
DATABASE_URL = custom_database_url or default_database_url or 'sqlite:///' + DATABASE_PATH
results = setup_database(DATABASE_URL, DATABASE_PATH)
APSCHEDULER_DATABASE_URI, SQLALCHEMY_DATABASE_URI, SQLALCHEMY_BINDS, DATABASE_PATH = results

Expand Down
6 changes: 4 additions & 2 deletions scrapydweb/views/baseview.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
from ..__version__ import __version__ as SCRAPYDWEB_VERSION
from ..common import (get_now_string, get_response_from_view, handle_metadata,
handle_slash, json_dumps, session)
from ..vars import (ALLOWED_SCRAPYD_LOG_EXTENSIONS, APSCHEDULER_DATABASE_URI, DEMO_PROJECTS_PATH, DEPLOY_PATH,
EMAIL_TRIGGER_KEYS, PARSE_PATH, LEGAL_NAME_PATTERN, SCHEDULE_ADDITIONAL,
from ..vars import (ALLOWED_SCRAPYD_LOG_EXTENSIONS, APSCHEDULER_DATABASE_URI,
DATA_PATH, DEMO_PROJECTS_PATH, DEPLOY_PATH, PARSE_PATH,
EMAIL_TRIGGER_KEYS, LEGAL_NAME_PATTERN, SCHEDULE_ADDITIONAL,
SCHEDULE_PATH, STATE_PAUSED, STATE_RUNNING, STATS_PATH, STRICT_NAME_PATTERN)
from ..utils.scheduler import scheduler

Expand Down Expand Up @@ -52,6 +53,7 @@ def __init__(self, *args, **kwargs):
# System
self.DEBUG = app.config.get('DEBUG', False)
self.VERBOSE = app.config.get('VERBOSE', False)
self.DATA_PATH = DATA_PATH
self.APSCHEDULER_DATABASE_URI = APSCHEDULER_DATABASE_URI
self.SQLALCHEMY_DATABASE_URI = app.config['SQLALCHEMY_DATABASE_URI']
self.SQLALCHEMY_BINDS = app.config['SQLALCHEMY_BINDS']
Expand Down
1 change: 1 addition & 0 deletions scrapydweb/views/system/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ def update_kwargs(self):
# System
self.kwargs['DEBUG'] = self.DEBUG
self.kwargs['VERBOSE'] = self.VERBOSE
self.kwargs['DATA_PATH'] = self.DATA_PATH
self.kwargs['database_details'] = self.json_dumps(dict(
APSCHEDULER_DATABASE_URI=self.hide_account(self.APSCHEDULER_DATABASE_URI),
SQLALCHEMY_DATABASE_URI=self.hide_account(self.SQLALCHEMY_DATABASE_URI),
Expand Down
12 changes: 0 additions & 12 deletions tests/test_database.py

This file was deleted.

23 changes: 23 additions & 0 deletions tests/test_system.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# coding: utf-8
import os
import re

from scrapydweb.vars import APSCHEDULER_DATABASE_URI, DATA_PATH, DATABASE_PATH, ROOT_DIR


def test_option_data_path(app):
data_path = os.environ.get('DATA_PATH', '')
if data_path and os.environ.get('TEST_ON_CIRCLECI', 'False').lower() == 'true':
assert not os.path.isdir(os.path.join(ROOT_DIR, 'data', 'database'))
assert os.path.isdir(os.path.join(data_path or DATA_PATH, 'database'))


def test_option_database_url(app):
database_url = os.environ.get('DATABASE_URL', 'sqlite:///' + DATABASE_PATH)
assert APSCHEDULER_DATABASE_URI.startswith(database_url)
assert app.config['SQLALCHEMY_DATABASE_URI'].startswith(database_url)
for value in app.config['SQLALCHEMY_BINDS'].values():
assert value.startswith(database_url)

m = re.match(r'sqlite:///(.+)$', database_url)
assert os.path.isdir(m.group(1) if m else DATABASE_PATH)

0 comments on commit bd1c71f

Please sign in to comment.