-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #20 from nansencenter/issue19-new-script-for-multi…
…_folder Issue19 new script for multi folder
- Loading branch information
Showing
7 changed files
with
299 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
ARG BASE_IMAGE=nansencenter/geospaas:2.0.0-slim | ||
FROM ${BASE_IMAGE} AS base | ||
RUN pip install --no-cache-dir django-celery-results==1.2 redis graypy freezegun | ||
|
||
WORKDIR /tmp/setup | ||
COPY setup.py README.md ./ | ||
COPY geospaas_processing ./geospaas_processing | ||
RUN python setup.py install && cd /tmp && rm -rf /tmp/setup/ | ||
|
||
WORKDIR / | ||
|
||
ENTRYPOINT ["/venv/bin/python"] |
File renamed without changes.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
""" | ||
Download files that are selected from the database using input criteria. | ||
""" | ||
import argparse | ||
import json | ||
import os | ||
from datetime import datetime | ||
|
||
import django | ||
from dateutil.relativedelta import relativedelta | ||
from dateutil.tz import tzutc | ||
from django.contrib.gis.geos import GEOSGeometry | ||
|
||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'geospaas_processing.settings') | ||
django.setup() | ||
import geospaas_processing.downloaders as downloaders | ||
|
||
|
||
def main(): | ||
""" | ||
Instantiation and calling the download() method of DownloadManager based on created argparser. | ||
""" | ||
arg = cli_parse_args() | ||
cumulative_query = json.loads(arg.query) if arg.query else {} | ||
if arg.geometry: | ||
cumulative_query['geographic_location__geometry__intersects'] = GEOSGeometry(arg.geometry) | ||
designated_begin, designated_end = find_designated_time(arg.rel_time_flag, arg.begin, arg.end) | ||
download_manager = downloaders.DownloadManager( | ||
download_directory=arg.down_dir.rstrip(os.path.sep), | ||
provider_settings_path=arg.config_file, | ||
max_downloads=int(arg.safety_limit), | ||
use_file_prefix=arg.use_filename_prefix, | ||
time_coverage_start__gte=designated_begin, | ||
time_coverage_end__lte=designated_end, | ||
**cumulative_query | ||
) | ||
download_manager.download() | ||
|
||
|
||
def find_designated_time(rel_time_flag, begin, end): | ||
"""find the starting time and the ending time of downloading based on two cases of 1)relative or | ||
2)absolute times definition by user.""" | ||
if rel_time_flag: | ||
designated_begin = datetime.now().replace(tzinfo=tzutc()) + relativedelta( | ||
hours=-abs(int(begin))) | ||
designated_end = datetime.now().replace(tzinfo=tzutc()) | ||
else: | ||
designated_begin = datetime.strptime(begin, "%Y-%m-%d").replace(tzinfo=tzutc()) | ||
designated_end = datetime.strptime(end, "%Y-%m-%d").replace(tzinfo=tzutc()) | ||
return designated_begin, designated_end | ||
|
||
|
||
def cli_parse_args(): | ||
"""creates proper arguments parser with 'argparse' of python.""" | ||
parser = argparse.ArgumentParser(description='Process the arguments of entry_point') | ||
parser.add_argument( | ||
'-d', '--down_dir', required=True, type=str, | ||
help="Absolute path for downloading files. If the path depends on the file date, usage " | ||
+ "of %Y, %m and other placeholders interpretable by strftime is accepted") | ||
parser.add_argument( | ||
'-b', '--begin', required=True, type=str, | ||
help="Absolute starting date for download in the format YYYY-MM-DD or (if used together " | ||
+ "with '-r') lag in hours relative to today") | ||
parser.add_argument( | ||
'-e', '--end', required=True, type=str, | ||
help="Absolute ending date for download in the format YYYY-MM-DD or (if used together " | ||
+ "with '-r') has no influence.") | ||
parser.add_argument( | ||
'-r', '--rel_time_flag', required=False, action='store_true', | ||
help="The flag that distinguishes between the two cases of time calculation (1.time-lag " | ||
+ "from now 2.Two different points in time) based on its ABSENCE or PRESENCE in the " | ||
+ "arguments.") | ||
parser.add_argument( | ||
'-s', '--safety_limit', required=False, type=str, default="400", | ||
help="The upper limit (safety limit) of number of datasets that are going to be downloaded." | ||
+ " If there total number of requested dataset for downloading exceeds this number, the " | ||
+ "downloading process does not commence.") | ||
parser.add_argument( | ||
'-p', '--use_filename_prefix', action='store_true', | ||
help="The flag that distinguishes between the two cases of having files WITH or WITHOUT " | ||
+ "file prefix when downloaded") | ||
parser.add_argument( | ||
'-g', '--geometry', required=False, type=str, | ||
help="The 'wkt' string of geometry which is acceptable by 'GEOSGeometry' of django") | ||
parser.add_argument( | ||
'-c', '--config_file', required=False, type=str, | ||
help="The absolute path to the config file that is needed for configuring the downloading " | ||
+ "process. default is the same folder of the 'download.py' file") | ||
parser.add_argument( | ||
'-q', '--query', required=False, type=str, | ||
help="query exposed by user to confine the search result of database for downloading them. " | ||
+ "It is a string which must be acceptable by json.loads() to for deserialization of one- " | ||
+ "or multi-criteria limitation. " | ||
+ "After deserialization, it must be a list of query that are readable by django filter." | ||
+ "for example a dictionary of elements like " | ||
+ "{\"dataseturi__uri__contains\":\"osisaf\", \"source__instrument__short_name" | ||
+ "__icontains\":\"AMSR2\"}" | ||
) | ||
return parser.parse_args() | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,14 +6,14 @@ | |
|
||
setuptools.setup( | ||
name="geospaas_processing", | ||
version="0.0.4", | ||
author="Adrien Perrin", | ||
author_email="[email protected]", | ||
version="0.1.0", | ||
author=["Adrien Perrin", "Arash Azamifard"], | ||
author_email=["[email protected]", "[email protected]"], | ||
description="Processing tools for GeoSPaaS", | ||
long_description=long_description, | ||
long_description_content_type="text/markdown", | ||
url="https://github.com/nansencenter/django-geo-spaas-processing", | ||
packages=["geospaas_processing"], | ||
packages=["geospaas_processing", "geospaas_processing.cli"], | ||
classifiers=[ | ||
"Programming Language :: Python :: 3", | ||
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)", | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,177 @@ | ||
"""Unit tests for cli""" | ||
import sys | ||
import unittest | ||
import unittest.mock as mock | ||
from datetime import datetime | ||
|
||
from dateutil.tz import tzutc | ||
from django.contrib.gis.geos import GEOSGeometry | ||
from freezegun import freeze_time | ||
|
||
import geospaas_processing.cli.download as cli_download | ||
|
||
|
||
class DownlaodingCLITestCase(unittest.TestCase): | ||
"""Tests for the cli of downloading """ | ||
|
||
def setUp(self): | ||
sys.argv = [ | ||
"", | ||
'-d', "/test_folder/%Y_nh_polstere", | ||
'-b', "200", | ||
'-e', "2020-08-22", | ||
'-r', | ||
'-s', "100", | ||
'-p', | ||
'-g', "POLYGON ((-22 84, -22 74, 32 74, 32 84, -22 84))", | ||
'-c', "/config_folder/config_file.yml", | ||
'-q', | ||
'{"dataseturi__uri__contains": "osisaf", "source__instrument__short_name__icontains": ' | ||
+ '"AMSR2"}', | ||
] | ||
|
||
def test_extract_arg(self): | ||
"""shall return the correct argument values based on the 'sys.argv' """ | ||
arg = cli_download.cli_parse_args() | ||
self.assertEqual(arg.begin, '200') | ||
self.assertEqual(arg.config_file, '/config_folder/config_file.yml') | ||
self.assertEqual(arg.down_dir, '/test_folder/%Y_nh_polstere') | ||
self.assertEqual(arg.end, '2020-08-22') | ||
self.assertEqual(arg.geometry, 'POLYGON ((-22 84, -22 74, 32 74, 32 84, -22 84))') | ||
self.assertEqual(arg.safety_limit, '100') | ||
self.assertEqual(arg.query, | ||
'{"dataseturi__uri__contains": "osisaf", ' | ||
+'"source__instrument__short_name__icontains": "AMSR2"}') | ||
# testing the flag enumeration | ||
self.assertTrue(arg.rel_time_flag) | ||
self.assertTrue(arg.use_filename_prefix) | ||
sys.argv.remove('-r') | ||
sys.argv.remove('-p') | ||
arg = cli_download.cli_parse_args() | ||
self.assertFalse(arg.rel_time_flag) | ||
self.assertFalse(arg.use_filename_prefix) | ||
|
||
@mock.patch('geospaas_processing.downloaders.DownloadManager.__init__', return_value=None) | ||
@mock.patch('geospaas_processing.downloaders.DownloadManager.download') | ||
def test_correct_call_json_deserializer(self, mock_download_method, mock_download_manager_init): | ||
"""'json.loads' shall deserialize the whole string that comes after '-q' """ | ||
arg = cli_download.cli_parse_args() | ||
with mock.patch('json.loads') as mock_json: | ||
cli_download.main() | ||
self.assertIn( | ||
('{"dataseturi__uri__contains": "osisaf", ' | ||
'"source__instrument__short_name__icontains": "AMSR2"}',), | ||
mock_json.call_args) | ||
|
||
@mock.patch('geospaas_processing.downloaders.DownloadManager.__init__', return_value=None) | ||
@mock.patch('geospaas_processing.downloaders.DownloadManager.download') | ||
def test_lack_of_calling_json_deserializer_when_no_query_appears( | ||
self, mock_download_method, mock_download_manager_init): | ||
"""'json.loads' should not called when nothing comes after '-q' """ | ||
sys.argv.pop() | ||
sys.argv.pop() | ||
with mock.patch('json.loads') as mock_json: | ||
cli_download.main() | ||
self.assertIsNone(mock_json.call_args) | ||
|
||
@mock.patch('geospaas_processing.downloaders.DownloadManager.__init__', return_value=None) | ||
@mock.patch('geospaas_processing.downloaders.DownloadManager.download') | ||
def test_correct_call_DownloadManager_without_file_prefix( | ||
self, mock_download_method, mock_download_manager_init): | ||
"""shall return the proper call for the case of lack of file prefix ('-p') in arguments""" | ||
sys.argv.remove('-p') | ||
sys.argv.remove('-r') | ||
sys.argv[4] = '2019-10-22' | ||
arg = cli_download.cli_parse_args() | ||
cli_download.main() | ||
self.assertIn({ | ||
'download_directory': '/test_folder/%Y_nh_polstere', | ||
'geographic_location__geometry__intersects': | ||
GEOSGeometry('POLYGON ((-22 84, -22 74, 32 74, 32 84, -22 84))'), | ||
'max_downloads': 100, | ||
'provider_settings_path': '/config_folder/config_file.yml', | ||
'time_coverage_end__lte': datetime(2020, 8, 22, 0, 0, tzinfo=tzutc()), | ||
'time_coverage_start__gte': datetime(2019, 10, 22, 0, 0, tzinfo=tzutc()), | ||
'dataseturi__uri__contains': 'osisaf', | ||
'source__instrument__short_name__icontains': 'AMSR2', | ||
'use_file_prefix': False | ||
}, mock_download_manager_init.call_args) | ||
|
||
@mock.patch('geospaas_processing.downloaders.DownloadManager.__init__', return_value=None) | ||
@mock.patch('geospaas_processing.downloaders.DownloadManager.download') | ||
def test_correct_call_DownloadManager_with_file_prefix( | ||
self, mock_download_method, mock_download_manager_init): | ||
""" | ||
shall return the proper call for the case of lack of two definite time points in arguments | ||
""" | ||
sys.argv.remove('-r') | ||
sys.argv[4] = '2019-10-22' | ||
arg = cli_download.cli_parse_args() | ||
cli_download.main() | ||
self.assertIn({ | ||
'download_directory': '/test_folder/%Y_nh_polstere', | ||
'geographic_location__geometry__intersects': | ||
GEOSGeometry('POLYGON ((-22 84, -22 74, 32 74, 32 84, -22 84))'), | ||
'max_downloads': 100, | ||
'provider_settings_path': '/config_folder/config_file.yml', | ||
'time_coverage_end__lte': datetime(2020, 8, 22, 0, 0, tzinfo=tzutc()), | ||
'time_coverage_start__gte': datetime(2019, 10, 22, 0, 0, tzinfo=tzutc()), | ||
'dataseturi__uri__contains': 'osisaf', | ||
'source__instrument__short_name__icontains': 'AMSR2', | ||
'use_file_prefix': True | ||
}, mock_download_manager_init.call_args) | ||
|
||
@mock.patch('geospaas_processing.downloaders.DownloadManager.__init__', return_value=None) | ||
@mock.patch('geospaas_processing.downloaders.DownloadManager.download') | ||
def test_correct_call_DownloadManager_without_geometry( | ||
self, mock_download_method, mock_download_manager_init): | ||
"""shall return the proper call for the case of lack of geometry in arguments""" | ||
sys.argv.remove('-g') | ||
sys.argv.remove("POLYGON ((-22 84, -22 74, 32 74, 32 84, -22 84))") | ||
sys.argv.remove('-r') | ||
sys.argv[4] = '2019-10-22' | ||
arg = cli_download.cli_parse_args() | ||
cli_download.main() | ||
self.assertIn({ | ||
'download_directory': '/test_folder/%Y_nh_polstere', | ||
'max_downloads': 100, | ||
'provider_settings_path': '/config_folder/config_file.yml', | ||
'time_coverage_end__lte': datetime(2020, 8, 22, 0, 0, tzinfo=tzutc()), | ||
'time_coverage_start__gte': datetime(2019, 10, 22, 0, 0, tzinfo=tzutc()), | ||
'dataseturi__uri__contains': 'osisaf', | ||
'source__instrument__short_name__icontains': 'AMSR2', | ||
'use_file_prefix': True | ||
}, mock_download_manager_init.call_args) | ||
|
||
@mock.patch('geospaas_processing.downloaders.DownloadManager.__init__', return_value=None) | ||
@mock.patch('geospaas_processing.downloaders.DownloadManager.download') | ||
def test_correct_call_DownloadManager_with_relative_time( | ||
self, mock_download_method, mock_download_manager_init): | ||
"""shall return the proper call for the case of relative time definition in arguments""" | ||
sys.argv[4] = "40" | ||
arg = cli_download.cli_parse_args() | ||
with freeze_time("2012-01-14"): | ||
cli_download.main() | ||
self.assertIn({ | ||
'download_directory': '/test_folder/%Y_nh_polstere', | ||
'geographic_location__geometry__intersects': | ||
GEOSGeometry('POLYGON ((-22 84, -22 74, 32 74, 32 84, -22 84))'), | ||
'max_downloads': 100, | ||
'provider_settings_path': '/config_folder/config_file.yml', | ||
'time_coverage_end__lte': datetime(2012, 1, 14, 0, 0, tzinfo=tzutc()), | ||
'time_coverage_start__gte': datetime(2012, 1, 12, 8, 0, tzinfo=tzutc()), | ||
'dataseturi__uri__contains': 'osisaf', | ||
'source__instrument__short_name__icontains': 'AMSR2', | ||
'use_file_prefix': True | ||
}, mock_download_manager_init.call_args) | ||
|
||
def test_find_designated_time_function(self): | ||
"""test the 'find_designated_time' function logics. answer_1, answer_2 are used for absolute | ||
and answer_3, answer_4 are used for relative timing""" | ||
answer_1, answer_2 = cli_download.find_designated_time(False, '2019-10-22', '2020-08-22') | ||
self.assertEqual(answer_1, datetime(2019, 10, 22, 0, 0, tzinfo=tzutc())) | ||
self.assertEqual(answer_2, datetime(2020, 8, 22, 0, 0, tzinfo=tzutc())) | ||
with freeze_time("2012-01-14"): | ||
answer_3, answer_4 = cli_download.find_designated_time(True, '500', '') | ||
self.assertEqual(answer_3, datetime(2011, 12, 24, 4, 0, tzinfo=tzutc())) | ||
self.assertEqual(answer_4, datetime(2012, 1, 14, 0, 0, tzinfo=tzutc())) |