Skip to content

Commit

Permalink
Merge pull request #20 from nansencenter/issue19-new-script-for-multi…
Browse files Browse the repository at this point in the history
…_folder

Issue19 new script for multi folder
  • Loading branch information
opsdep authored Nov 4, 2020
2 parents bfce77c + 3740320 commit cb875c5
Show file tree
Hide file tree
Showing 7 changed files with 299 additions and 6 deletions.
6 changes: 4 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ jobs:
'postgis/postgis:12-3.0'
- >
docker build .
-f Dockerfile_cli
-t "${IMAGE_NAME}"
--build-arg "BASE_IMAGE=${BASE_IMAGE_NAME}"
--target base
Expand All @@ -47,7 +48,7 @@ jobs:
-e "GEOSPAAS_DB_USER=$GEOSPAAS_DB_USER" -e "GEOSPAAS_DB_PASSWORD=$GEOSPAAS_DB_PASSWORD"
--entrypoint bash
"${IMAGE_NAME}"
-c "coverage run --source=./geospaas_processing /src/runtests.py && coveralls"
-c "coverage run --source=geospaas_processing /src/runtests.py && coveralls"
after_script:
- docker stop "$GEOSPAAS_DB_HOST"

Expand All @@ -66,6 +67,7 @@ jobs:
script:
- >
docker build .
-f Dockerfile_cli
--cache-from "${IMAGE_NAME}"
--build-arg "BASE_IMAGE=$BASE_IMAGE_NAME"
-t "${IMAGE_NAME}:${DOCKER_TMP_TAG}"
Expand All @@ -90,4 +92,4 @@ jobs:
file: 'dist/*'
file_glob: true
skip_cleanup: true
...
...
12 changes: 12 additions & 0 deletions Dockerfile_cli
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
ARG BASE_IMAGE=nansencenter/geospaas:2.0.0-slim
FROM ${BASE_IMAGE} AS base
RUN pip install --no-cache-dir django-celery-results==1.2 redis graypy freezegun

WORKDIR /tmp/setup
COPY setup.py README.md ./
COPY geospaas_processing ./geospaas_processing
RUN python setup.py install && cd /tmp && rm -rf /tmp/setup/

WORKDIR /

ENTRYPOINT ["/venv/bin/python"]
File renamed without changes.
Empty file.
102 changes: 102 additions & 0 deletions geospaas_processing/cli/download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
"""
Download files that are selected from the database using input criteria.
"""
import argparse
import json
import os
from datetime import datetime

import django
from dateutil.relativedelta import relativedelta
from dateutil.tz import tzutc
from django.contrib.gis.geos import GEOSGeometry

os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'geospaas_processing.settings')
django.setup()
import geospaas_processing.downloaders as downloaders


def main():
"""
Instantiation and calling the download() method of DownloadManager based on created argparser.
"""
arg = cli_parse_args()
cumulative_query = json.loads(arg.query) if arg.query else {}
if arg.geometry:
cumulative_query['geographic_location__geometry__intersects'] = GEOSGeometry(arg.geometry)
designated_begin, designated_end = find_designated_time(arg.rel_time_flag, arg.begin, arg.end)
download_manager = downloaders.DownloadManager(
download_directory=arg.down_dir.rstrip(os.path.sep),
provider_settings_path=arg.config_file,
max_downloads=int(arg.safety_limit),
use_file_prefix=arg.use_filename_prefix,
time_coverage_start__gte=designated_begin,
time_coverage_end__lte=designated_end,
**cumulative_query
)
download_manager.download()


def find_designated_time(rel_time_flag, begin, end):
"""find the starting time and the ending time of downloading based on two cases of 1)relative or
2)absolute times definition by user."""
if rel_time_flag:
designated_begin = datetime.now().replace(tzinfo=tzutc()) + relativedelta(
hours=-abs(int(begin)))
designated_end = datetime.now().replace(tzinfo=tzutc())
else:
designated_begin = datetime.strptime(begin, "%Y-%m-%d").replace(tzinfo=tzutc())
designated_end = datetime.strptime(end, "%Y-%m-%d").replace(tzinfo=tzutc())
return designated_begin, designated_end


def cli_parse_args():
"""creates proper arguments parser with 'argparse' of python."""
parser = argparse.ArgumentParser(description='Process the arguments of entry_point')
parser.add_argument(
'-d', '--down_dir', required=True, type=str,
help="Absolute path for downloading files. If the path depends on the file date, usage "
+ "of %Y, %m and other placeholders interpretable by strftime is accepted")
parser.add_argument(
'-b', '--begin', required=True, type=str,
help="Absolute starting date for download in the format YYYY-MM-DD or (if used together "
+ "with '-r') lag in hours relative to today")
parser.add_argument(
'-e', '--end', required=True, type=str,
help="Absolute ending date for download in the format YYYY-MM-DD or (if used together "
+ "with '-r') has no influence.")
parser.add_argument(
'-r', '--rel_time_flag', required=False, action='store_true',
help="The flag that distinguishes between the two cases of time calculation (1.time-lag "
+ "from now 2.Two different points in time) based on its ABSENCE or PRESENCE in the "
+ "arguments.")
parser.add_argument(
'-s', '--safety_limit', required=False, type=str, default="400",
help="The upper limit (safety limit) of number of datasets that are going to be downloaded."
+ " If there total number of requested dataset for downloading exceeds this number, the "
+ "downloading process does not commence.")
parser.add_argument(
'-p', '--use_filename_prefix', action='store_true',
help="The flag that distinguishes between the two cases of having files WITH or WITHOUT "
+ "file prefix when downloaded")
parser.add_argument(
'-g', '--geometry', required=False, type=str,
help="The 'wkt' string of geometry which is acceptable by 'GEOSGeometry' of django")
parser.add_argument(
'-c', '--config_file', required=False, type=str,
help="The absolute path to the config file that is needed for configuring the downloading "
+ "process. default is the same folder of the 'download.py' file")
parser.add_argument(
'-q', '--query', required=False, type=str,
help="query exposed by user to confine the search result of database for downloading them. "
+ "It is a string which must be acceptable by json.loads() to for deserialization of one- "
+ "or multi-criteria limitation. "
+ "After deserialization, it must be a list of query that are readable by django filter."
+ "for example a dictionary of elements like "
+ "{\"dataseturi__uri__contains\":\"osisaf\", \"source__instrument__short_name"
+ "__icontains\":\"AMSR2\"}"
)
return parser.parse_args()

if __name__ == "__main__":
main()
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@

setuptools.setup(
name="geospaas_processing",
version="0.0.4",
author="Adrien Perrin",
author_email="[email protected]",
version="0.1.0",
author=["Adrien Perrin", "Arash Azamifard"],
author_email=["[email protected]", "[email protected]"],
description="Processing tools for GeoSPaaS",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/nansencenter/django-geo-spaas-processing",
packages=["geospaas_processing"],
packages=["geospaas_processing", "geospaas_processing.cli"],
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
Expand Down
177 changes: 177 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
"""Unit tests for cli"""
import sys
import unittest
import unittest.mock as mock
from datetime import datetime

from dateutil.tz import tzutc
from django.contrib.gis.geos import GEOSGeometry
from freezegun import freeze_time

import geospaas_processing.cli.download as cli_download


class DownlaodingCLITestCase(unittest.TestCase):
"""Tests for the cli of downloading """

def setUp(self):
sys.argv = [
"",
'-d', "/test_folder/%Y_nh_polstere",
'-b', "200",
'-e', "2020-08-22",
'-r',
'-s', "100",
'-p',
'-g', "POLYGON ((-22 84, -22 74, 32 74, 32 84, -22 84))",
'-c', "/config_folder/config_file.yml",
'-q',
'{"dataseturi__uri__contains": "osisaf", "source__instrument__short_name__icontains": '
+ '"AMSR2"}',
]

def test_extract_arg(self):
"""shall return the correct argument values based on the 'sys.argv' """
arg = cli_download.cli_parse_args()
self.assertEqual(arg.begin, '200')
self.assertEqual(arg.config_file, '/config_folder/config_file.yml')
self.assertEqual(arg.down_dir, '/test_folder/%Y_nh_polstere')
self.assertEqual(arg.end, '2020-08-22')
self.assertEqual(arg.geometry, 'POLYGON ((-22 84, -22 74, 32 74, 32 84, -22 84))')
self.assertEqual(arg.safety_limit, '100')
self.assertEqual(arg.query,
'{"dataseturi__uri__contains": "osisaf", '
+'"source__instrument__short_name__icontains": "AMSR2"}')
# testing the flag enumeration
self.assertTrue(arg.rel_time_flag)
self.assertTrue(arg.use_filename_prefix)
sys.argv.remove('-r')
sys.argv.remove('-p')
arg = cli_download.cli_parse_args()
self.assertFalse(arg.rel_time_flag)
self.assertFalse(arg.use_filename_prefix)

@mock.patch('geospaas_processing.downloaders.DownloadManager.__init__', return_value=None)
@mock.patch('geospaas_processing.downloaders.DownloadManager.download')
def test_correct_call_json_deserializer(self, mock_download_method, mock_download_manager_init):
"""'json.loads' shall deserialize the whole string that comes after '-q' """
arg = cli_download.cli_parse_args()
with mock.patch('json.loads') as mock_json:
cli_download.main()
self.assertIn(
('{"dataseturi__uri__contains": "osisaf", '
'"source__instrument__short_name__icontains": "AMSR2"}',),
mock_json.call_args)

@mock.patch('geospaas_processing.downloaders.DownloadManager.__init__', return_value=None)
@mock.patch('geospaas_processing.downloaders.DownloadManager.download')
def test_lack_of_calling_json_deserializer_when_no_query_appears(
self, mock_download_method, mock_download_manager_init):
"""'json.loads' should not called when nothing comes after '-q' """
sys.argv.pop()
sys.argv.pop()
with mock.patch('json.loads') as mock_json:
cli_download.main()
self.assertIsNone(mock_json.call_args)

@mock.patch('geospaas_processing.downloaders.DownloadManager.__init__', return_value=None)
@mock.patch('geospaas_processing.downloaders.DownloadManager.download')
def test_correct_call_DownloadManager_without_file_prefix(
self, mock_download_method, mock_download_manager_init):
"""shall return the proper call for the case of lack of file prefix ('-p') in arguments"""
sys.argv.remove('-p')
sys.argv.remove('-r')
sys.argv[4] = '2019-10-22'
arg = cli_download.cli_parse_args()
cli_download.main()
self.assertIn({
'download_directory': '/test_folder/%Y_nh_polstere',
'geographic_location__geometry__intersects':
GEOSGeometry('POLYGON ((-22 84, -22 74, 32 74, 32 84, -22 84))'),
'max_downloads': 100,
'provider_settings_path': '/config_folder/config_file.yml',
'time_coverage_end__lte': datetime(2020, 8, 22, 0, 0, tzinfo=tzutc()),
'time_coverage_start__gte': datetime(2019, 10, 22, 0, 0, tzinfo=tzutc()),
'dataseturi__uri__contains': 'osisaf',
'source__instrument__short_name__icontains': 'AMSR2',
'use_file_prefix': False
}, mock_download_manager_init.call_args)

@mock.patch('geospaas_processing.downloaders.DownloadManager.__init__', return_value=None)
@mock.patch('geospaas_processing.downloaders.DownloadManager.download')
def test_correct_call_DownloadManager_with_file_prefix(
self, mock_download_method, mock_download_manager_init):
"""
shall return the proper call for the case of lack of two definite time points in arguments
"""
sys.argv.remove('-r')
sys.argv[4] = '2019-10-22'
arg = cli_download.cli_parse_args()
cli_download.main()
self.assertIn({
'download_directory': '/test_folder/%Y_nh_polstere',
'geographic_location__geometry__intersects':
GEOSGeometry('POLYGON ((-22 84, -22 74, 32 74, 32 84, -22 84))'),
'max_downloads': 100,
'provider_settings_path': '/config_folder/config_file.yml',
'time_coverage_end__lte': datetime(2020, 8, 22, 0, 0, tzinfo=tzutc()),
'time_coverage_start__gte': datetime(2019, 10, 22, 0, 0, tzinfo=tzutc()),
'dataseturi__uri__contains': 'osisaf',
'source__instrument__short_name__icontains': 'AMSR2',
'use_file_prefix': True
}, mock_download_manager_init.call_args)

@mock.patch('geospaas_processing.downloaders.DownloadManager.__init__', return_value=None)
@mock.patch('geospaas_processing.downloaders.DownloadManager.download')
def test_correct_call_DownloadManager_without_geometry(
self, mock_download_method, mock_download_manager_init):
"""shall return the proper call for the case of lack of geometry in arguments"""
sys.argv.remove('-g')
sys.argv.remove("POLYGON ((-22 84, -22 74, 32 74, 32 84, -22 84))")
sys.argv.remove('-r')
sys.argv[4] = '2019-10-22'
arg = cli_download.cli_parse_args()
cli_download.main()
self.assertIn({
'download_directory': '/test_folder/%Y_nh_polstere',
'max_downloads': 100,
'provider_settings_path': '/config_folder/config_file.yml',
'time_coverage_end__lte': datetime(2020, 8, 22, 0, 0, tzinfo=tzutc()),
'time_coverage_start__gte': datetime(2019, 10, 22, 0, 0, tzinfo=tzutc()),
'dataseturi__uri__contains': 'osisaf',
'source__instrument__short_name__icontains': 'AMSR2',
'use_file_prefix': True
}, mock_download_manager_init.call_args)

@mock.patch('geospaas_processing.downloaders.DownloadManager.__init__', return_value=None)
@mock.patch('geospaas_processing.downloaders.DownloadManager.download')
def test_correct_call_DownloadManager_with_relative_time(
self, mock_download_method, mock_download_manager_init):
"""shall return the proper call for the case of relative time definition in arguments"""
sys.argv[4] = "40"
arg = cli_download.cli_parse_args()
with freeze_time("2012-01-14"):
cli_download.main()
self.assertIn({
'download_directory': '/test_folder/%Y_nh_polstere',
'geographic_location__geometry__intersects':
GEOSGeometry('POLYGON ((-22 84, -22 74, 32 74, 32 84, -22 84))'),
'max_downloads': 100,
'provider_settings_path': '/config_folder/config_file.yml',
'time_coverage_end__lte': datetime(2012, 1, 14, 0, 0, tzinfo=tzutc()),
'time_coverage_start__gte': datetime(2012, 1, 12, 8, 0, tzinfo=tzutc()),
'dataseturi__uri__contains': 'osisaf',
'source__instrument__short_name__icontains': 'AMSR2',
'use_file_prefix': True
}, mock_download_manager_init.call_args)

def test_find_designated_time_function(self):
"""test the 'find_designated_time' function logics. answer_1, answer_2 are used for absolute
and answer_3, answer_4 are used for relative timing"""
answer_1, answer_2 = cli_download.find_designated_time(False, '2019-10-22', '2020-08-22')
self.assertEqual(answer_1, datetime(2019, 10, 22, 0, 0, tzinfo=tzutc()))
self.assertEqual(answer_2, datetime(2020, 8, 22, 0, 0, tzinfo=tzutc()))
with freeze_time("2012-01-14"):
answer_3, answer_4 = cli_download.find_designated_time(True, '500', '')
self.assertEqual(answer_3, datetime(2011, 12, 24, 4, 0, tzinfo=tzutc()))
self.assertEqual(answer_4, datetime(2012, 1, 14, 0, 0, tzinfo=tzutc()))

0 comments on commit cb875c5

Please sign in to comment.