diff --git a/.env b/.env index c9935e35..2d53e416 100644 --- a/.env +++ b/.env @@ -44,7 +44,7 @@ CKAN_SMTP_PASSWORD=pass CKAN_SMTP_MAIL_FROM=ckan@localhost # Extensions -CKAN__PLUGINS=envvars image_view text_view recline_view datagov_harvest ckan_harvester geodatagov geodatagov_miscs z3950_harvester arcgis_harvester geodatagov_geoportal_harvester waf_harvester_collection geodatagov_csw_harvester geodatagov_doc_harvester geodatagov_waf_harvester spatial_metadata spatial_query s3test +CKAN__PLUGINS=envvars image_view text_view recline_view datagov_harvest ckan_harvester geodatagov geodatagov_miscs z3950_harvester arcgis_harvester geodatagov_geoportal_harvester waf_harvester_collection geodatagov_csw_harvester geodatagov_doc_harvester geodatagov_waf_harvester spatial_metadata spatial_query s3test datajson datajson_harvest # Harvest settings CKAN__HARVEST__MQ__TYPE=redis diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 135ebcc5..47901719 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,7 +21,7 @@ jobs: needs: lint strategy: matrix: - ckan-version: [2.9.5, 2.9, 2.9.7] + ckan-version: ['2.10', '2.10.1'] fail-fast: false name: CKAN ${{ matrix.ckan-version }} diff --git a/Dockerfile b/Dockerfile index 96eaee05..a1cab295 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -ARG CKAN_VERSION=2.9.5 +ARG CKAN_VERSION=2.10.1 FROM openknowledge/ckan-dev:${CKAN_VERSION} ARG CKAN_VERSION diff --git a/Makefile b/Makefile index dd7db552..4a82c3e7 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -CKAN_VERSION ?= 2.9.7 +CKAN_VERSION ?= 2.10.1 COMPOSE_FILE ?= docker-compose.yml build: ## Build the docker containers diff --git a/README.md b/README.md index 88ea2952..8843a400 100644 --- a/README.md +++ b/README.md @@ -34,11 +34,12 @@ This extension is compatible with these versions of CKAN. CKAN version | Compatibility ------------ | ------------- <=2.8 | no -2.9 | [complete](https://github.com/GSA/datagov-ckan-multi/issues/570) +2.9 | 0.1.37 (last supported) +2.10 | >=0.2.0 ## Tests -All the tests live in the [/ckanext/geodatagov/tests](/ckanext/geodatagov/tests) folder. [Github actions](https://github.com/GSA/ckanext-geodatagov/blob/main/.github/workflows/test.yml) is configured to run the tests against CKAN 2.9 when you open a pull request. +All the tests live in the [/ckanext/geodatagov/tests](/ckanext/geodatagov/tests) folder. [Github actions](https://github.com/GSA/ckanext-geodatagov/blob/main/.github/workflows/test.yml) is configured to run the tests against CKAN 2.10 when you open a pull request. ## Using the Docker Dev Environment @@ -61,7 +62,7 @@ To docker exec into the CKAN image, run: ### Testing They follow the guidelines for [testing CKAN -extensions](https://docs.ckan.org/en/2.9/extensions/testing-extensions.html#testing-extensions). +extensions](https://docs.ckan.org/en/2.10/extensions/testing-extensions.html#testing-extensions). To run the extension tests, start the containers with `make up`, then: @@ -100,7 +101,7 @@ In order to support multiple versions of CKAN, or even upgrade to new versions of CKAN, we support development and testing through the `CKAN_VERSION` environment variable. - $ make CKAN_VERSION=2.9 test + $ make CKAN_VERSION=2.10 test ### Command line interface diff --git a/ckanext/geodatagov/blueprint.py b/ckanext/geodatagov/blueprint.py index 2b2e7378..106dbbfb 100644 --- a/ckanext/geodatagov/blueprint.py +++ b/ckanext/geodatagov/blueprint.py @@ -4,7 +4,7 @@ from flask import Blueprint from flask.wrappers import Response as response -from ckanext.geodatagov.model import MiscsFeed, MiscsTopicCSV +from ckanext.geodatagov.model import MiscsFeed datapusher = Blueprint('geodatagov', __name__) @@ -18,25 +18,5 @@ def feed(): return entry.feed -def csv(date=None): - if date: - entry = model.Session.query(MiscsTopicCSV) \ - .filter_by(date=date) \ - .first() - else: - entry = model.Session.query(MiscsTopicCSV) \ - .order_by(MiscsTopicCSV.date.desc()) \ - .first() - if not entry or not entry.csv: - abort(404, 'There is no csv entry yet.') - response.content_type = 'text/csv' - response.content_disposition = 'attachment; filename="topics-%s.csv"' % entry.date - return entry.csv - - datapusher.add_url_rule('/usasearch-custom-feed.xml', view_func=feed) -datapusher.add_url_rule('/topics-csv/{date}', - view_func=csv) -datapusher.add_url_rule('/topics-csv', - view_func=csv) diff --git a/ckanext/geodatagov/commands.py b/ckanext/geodatagov/commands.py index 4d14dcdf..89bd1f01 100644 --- a/ckanext/geodatagov/commands.py +++ b/ckanext/geodatagov/commands.py @@ -20,7 +20,7 @@ from ckan.plugins.toolkit import config from ckanext.harvest.model import HarvestSource, HarvestJob -from ckanext.geodatagov.model import MiscsFeed, MiscsTopicCSV +from ckanext.geodatagov.model import MiscsFeed # https://github.com/GSA/ckanext-geodatagov/issues/117 @@ -566,104 +566,6 @@ def export_group_and_tags(packages, domain='https://catalog.data.gov'): result.append(package) return result - def export_csv(self, domain='https://catalog.data.gov'): - print('export started...') - - # cron job - # paster --plugin=ckanext-geodatagov geodatagov export-csv --config=/etc/ckan/production.ini - - # Exported CSV header list: - # - Dataset Title - # - Dataset URL - # - Organization Name - # - Organization Link - # - Harvest Source Name - # - Harvest Source Link - # - Topic Name - # - Topic Categories - - import io - import csv - - limit = 100 - page = 1 - - import pprint - - result = [] - - while True: - data_dict = { - 'q': 'groups: *', - # 'fq': fq, - # 'facet.field': facets.keys(), - 'rows': limit, - # 'sort': sort_by, - 'start': (page - 1) * limit - # 'extras': search_extras - } - - query = logic.get_action('package_search')({'model': model, 'ignore_auth': True}, data_dict) - - page += 1 - # import pprint - # pprint.pprint(packages) - - if not query['results']: - break - - packages = query['results'] - result = result + GeoGovCommand.export_group_and_tags(packages=packages, domain=domain) - - if not result: - print('nothing to do') - return - - import datetime - - print('writing into db...') - - date_suffix = datetime.datetime.strftime(datetime.datetime.now(), '%Y%m%d') - csv_output = io.StringIO() - - fieldnames = ['Dataset Title', 'Dataset URL', 'Organization Name', 'Organization Link', - 'Harvest Source Name', 'Harvest Source Link', 'Topic Name', 'Topic Categories'] - - writer = csv.writer(csv_output) - writer.writerow(fieldnames) - - for pkg in result: - try: - writer.writerow( - [ - pkg['title'], - pkg['url'], - pkg['organization'], - pkg['organizationUrl'], - pkg['harvestSourceTitle'], - pkg['harvestSourceUrl'], - pkg['topic'], - pkg['topicCategories'] - ] - ) - except UnicodeEncodeError: - pprint.pprint(pkg) - - content = csv_output.getvalue() - - entry = model.Session.query(MiscsTopicCSV) \ - .filter_by(date=date_suffix) \ - .first() - if not entry: - # create the empty entry for the first time - entry = MiscsTopicCSV() - entry.date = date_suffix - entry.csv = content - entry.save() - - print('csv file topics-%s.csv is ready.' % date_suffix) - return result, entry - # this code is defunct and will need to be refactored into cli.py """ def jsonl_export(self): @@ -838,7 +740,7 @@ def update_dataset_geo_fields(self): # iterate over all datasets search_backend = config.get('ckanext.spatial.search_backend', 'postgis') - if search_backend != 'solr': + if search_backend != 'solr-bbox': raise ValueError('Solr is not your default search backend (ckanext.spatial.search_backend)') datasets = model.Session.query(model.Package).all() diff --git a/ckanext/geodatagov/harvesters/arcgis.py b/ckanext/geodatagov/harvesters/arcgis.py index 8632ba03..22b16015 100644 --- a/ckanext/geodatagov/harvesters/arcgis.py +++ b/ckanext/geodatagov/harvesters/arcgis.py @@ -23,6 +23,8 @@ from ckan.plugins.toolkit import add_template_directory, add_resource, requires_ckan_version from ckan.plugins import IConfigurer +from ckanext.geodatagov.helpers import string as custom_string + requires_ckan_version("2.9") @@ -118,7 +120,7 @@ def info(self): def extra_schema(self): return { 'private_datasets': [ignore_empty, boolean_validator], - 'extra_search_criteria': [ignore_empty, str], + 'extra_search_criteria': [ignore_empty, custom_string], } def gather_stage(self, harvest_job): @@ -287,7 +289,7 @@ def import_stage(self, harvest_object): package_schema = logic.schema.default_update_package_schema() tag_schema = logic.schema.default_tags_schema() - tag_schema['name'] = [not_empty, str] + tag_schema['name'] = [not_empty, custom_string] package_schema['tags'] = tag_schema context['schema'] = package_schema # TODO: user @@ -298,7 +300,7 @@ def import_stage(self, harvest_object): # We need to explicitly provide a package ID, otherwise ckanext-spatial # won't be be able to link the extent to the package. package_dict['id'] = str(uuid.uuid4()) - package_schema['id'] = [str] + package_schema['id'] = [custom_string] # Save reference to the package on the object harvest_object.package_id = package_dict['id'] diff --git a/ckanext/geodatagov/harvesters/waf_collection.py b/ckanext/geodatagov/harvesters/waf_collection.py index 684e07b7..0b969dee 100644 --- a/ckanext/geodatagov/harvesters/waf_collection.py +++ b/ckanext/geodatagov/harvesters/waf_collection.py @@ -13,6 +13,7 @@ ) # , validate_profiles; , validate_profiles from ckanext.harvest.model import HarvestObject from ckanext.harvest.model import HarvestObjectExtra as HOExtra +from ckanext.geodatagov.helpers import string class WAFCollectionHarvester(GeoDataGovWAFHarvester): @@ -26,7 +27,7 @@ def info(self): def extra_schema(self): extra_schema = super(WAFCollectionHarvester, self).extra_schema() - extra_schema["collection_metadata_url"] = [not_empty, str] + extra_schema["collection_metadata_url"] = [not_empty, string] log.debug( "Getting extra schema for WAFCollectionHarvester: {}".format(extra_schema) ) diff --git a/ckanext/geodatagov/harvesters/z3950.py b/ckanext/geodatagov/harvesters/z3950.py index f8bd743f..b5eb6201 100644 --- a/ckanext/geodatagov/harvesters/z3950.py +++ b/ckanext/geodatagov/harvesters/z3950.py @@ -17,6 +17,7 @@ from ckan.logic.validators import boolean_validator from ckan.plugins.toolkit import add_template_directory, add_resource, requires_ckan_version +from ckanext.geodatagov.helpers import string requires_ckan_version("2.9") @@ -43,7 +44,7 @@ def info(self): def extra_schema(self): return {'private_datasets': [ignore_empty, boolean_validator], - 'database': [not_empty, str], + 'database': [not_empty, string], 'port': [not_empty, convert_int]} def gather_stage(self, harvest_job): diff --git a/ckanext/geodatagov/helpers.py b/ckanext/geodatagov/helpers.py index fbed8393..1a1892d0 100644 --- a/ckanext/geodatagov/helpers.py +++ b/ckanext/geodatagov/helpers.py @@ -61,3 +61,7 @@ def get_harvest_source_config(harvester_id): def get_collection_package(collection_package_id): package = p.toolkit.get_action('package_show')({}, {'id': collection_package_id}) return package + + +def string(value): + return str(value) diff --git a/ckanext/geodatagov/logic.py b/ckanext/geodatagov/logic.py index ef66b400..7a4cd78e 100644 --- a/ckanext/geodatagov/logic.py +++ b/ckanext/geodatagov/logic.py @@ -6,6 +6,7 @@ import time import uuid +from ckan.lib.navl.validators import not_empty from ckan.logic import side_effect_free import ckan.logic.schema as schema from ckan.logic.action import get as core_get @@ -13,6 +14,7 @@ import ckan.plugins as p from ckanext.geodatagov.plugin import change_resource_details, split_tags from ckanext.geodatagov.harvesters.arcgis import _slugify +from ckanext.geodatagov.helpers import string from ckanext.harvest.model import HarvestObject # , HarvestJob from ckan.common import config @@ -238,7 +240,7 @@ def datajson_create(context, data_dict): 'extras': [{'key': 'organization_type', 'value': "Federal Government"}]}) context['schema'] = schema.default_create_package_schema() - context['schema']['id'] = [p.toolkit.get_validator('not_empty')] + context['schema']['id'] = [not_empty] context['return_id_only'] = True return p.toolkit.get_action('package_create')(context, new_package) @@ -303,7 +305,7 @@ def doi_create(context, data_dict): new_package["extras"].append({"key": "harvest_object_id", "value": obj.id}) context['schema'] = schema.default_create_package_schema() - context['schema']['id'] = [p.toolkit.get_validator('not_empty')] + context['schema']['id'] = [not_empty] context['return_id_only'] = True p.toolkit.get_action('package_create')(context, new_package) print(str(datetime.datetime.now()) + ' Imported doi id ' + new_package['id']) @@ -413,7 +415,7 @@ def rollup_save_action(context, data_dict): if p.toolkit.check_ckan_version(min_version='2.8'): search_backend = config.get('ckanext.spatial.search_backend', 'postgis') log.debug('Search backend {}'.format(search_backend)) - if search_backend == 'solr': + if search_backend == 'solr-bbox': old_spatial = new_extras_rollup.get('spatial', None) if old_spatial is not None: log.info('Old Spatial found {}'.format(old_spatial)) @@ -429,6 +431,10 @@ def rollup_save_action(context, data_dict): new_extras.append({'key': 'spatial', 'value': new_spatial}) # remove rolled spatial to skip run this process again new_extras_rollup['old-spatial'] = new_extras_rollup.pop('spatial') + else: + log.info('New spatial could not be created') + new_extras.append({'key': 'spatial', 'value': ''}) + new_extras_rollup['old-spatial'] = new_extras_rollup.pop('spatial') if new_extras_rollup: new_extras.append({'key': 'extras_rollup', 'value': json.dumps(new_extras_rollup)}) @@ -474,10 +480,18 @@ def translate_spatial(old_spatial): try: numbers_with_spaces = [int(i) for i in old_spatial_transformed.split(' ')] if all(isinstance(x, int) for x in numbers_with_spaces): - old_spatial_transformed = 'null' + old_spatial_transformed = '' except ValueError: pass + # If we have 4 numbers separated by commas, transform them as GeoJSON + parts = old_spatial_transformed.strip().split(',') + if len(parts) == 4 and all(is_number(x) for x in parts): + minx, miny, maxx, maxy = parts + params = {"minx": minx, "miny": miny, "maxx": maxx, "maxy": maxy} + new_spatial = geojson_tpl.format(**params) + return new_spatial + # Analyze with type of data is JSON valid try: geometry = json.loads(old_spatial_transformed) # NOQA F841 @@ -492,18 +506,13 @@ def translate_spatial(old_spatial): return old_spatial_transformed except BaseException: log.info('JSON that could not be parsed\n\t{}'.format(old_spatial_transformed)) - pass - # If we have 4 numbers separated by commas, transform them as GeoJSON - parts = old_spatial_transformed.strip().split(',') - if len(parts) == 4 and all(is_number(x) for x in parts): - minx, miny, maxx, maxy = parts - params = {"minx": minx, "miny": miny, "maxx": maxx, "maxy": maxy} - new_spatial = geojson_tpl.format(**params) - return new_spatial + try: + return get_geo_from_string(old_spatial) + except AttributeError: + pass - g = get_geo_from_string(old_spatial) - return g + return '' def is_number(s): @@ -541,6 +550,10 @@ def package_create(up_func, context, data_dict): """ before_package_create for CKAN 2.8 """ rollup_save_action(context, data_dict) data_dict = fix_dataset(data_dict) + # TODO: This fix is bad, find a better one :( + if 'schema' in context.keys(): + context['schema']['id'] = [string] + context['schema']['tags']['name'] = [not_empty, string] return up_func(context, data_dict) diff --git a/ckanext/geodatagov/model.py b/ckanext/geodatagov/model.py index 19e3ef3e..ef3d94be 100644 --- a/ckanext/geodatagov/model.py +++ b/ckanext/geodatagov/model.py @@ -8,7 +8,6 @@ log = logging.getLogger(__name__) miscs_feed_table = None -miscs_topic_csv_table = None class MiscsFeedException(Exception): @@ -19,14 +18,6 @@ class MiscsFeed(domain_object.DomainObject): pass -class MiscsTopicCSVException(Exception): - pass - - -class MiscsTopicCSV(domain_object.DomainObject): - pass - - class MiscsSolrSyncException(Exception): pass @@ -50,19 +41,6 @@ def setup(): else: log.debug('Geodatagov Miscs Feed table creation deferred') - if miscs_topic_csv_table is None: - define_miscs_topic_csv_table() - log.debug('Geodatagov Miscs Topic CSV table defined in memory') - - if model.package_table.exists(): - if not miscs_topic_csv_table.exists(): - miscs_topic_csv_table.create() - log.debug('Geodatagov Miscs Topic CSV table created') - else: - log.debug('Geodatagov Miscs Topic CSV table already exists') - else: - log.debug('Geodatagov Miscs Topic CSV table creation deferred') - def define_miscs_feed_table(): global miscs_feed_table @@ -72,14 +50,3 @@ def define_miscs_feed_table(): ) meta.mapper(MiscsFeed, miscs_feed_table) - - -def define_miscs_topic_csv_table(): - global miscs_topic_csv_table - miscs_topic_csv_table = Table('miscs_topic_csv', meta.metadata, - Column('id', types.UnicodeText, primary_key=True, default=model.types.make_uuid), - Column('date', types.UnicodeText, index=True, unique=True, nullable=False, default=u''), - Column('csv', types.UnicodeText, nullable=False, default=u''), - ) - - meta.mapper(MiscsTopicCSV, miscs_topic_csv_table) diff --git a/ckanext/geodatagov/plugin.py b/ckanext/geodatagov/plugin.py index 568f4748..02a17a60 100644 --- a/ckanext/geodatagov/plugin.py +++ b/ckanext/geodatagov/plugin.py @@ -405,53 +405,6 @@ def update_config(self, config): edit_url = None - UPDATE_CATEGORY_ACTIONS = ['package_update', 'dataset_update'] - ROLLUP_SAVE_ACTIONS = ['package_create', 'dataset_create', 'package_update', 'dataset_update'] - - # source ignored as queried diretly - EXTRAS_ROLLUP_KEY_IGNORE = ["metadata-source", "tags", "extras_rollup"] - - def before_action(self, action_name, context, data_dict): - """ before_action is a hook in CKAN 2.3 for ALL actions - This not exists at CKAN 2.8 and chained action do not exists at CKAN 2.3 """ - log.info('before_action CKAN {} {} {} {}'.format(ckan_version, action_name, context, data_dict)) - if action_name in self.UPDATE_CATEGORY_ACTIONS: - pkg_dict = p.toolkit.get_action('package_show')(context, {'id': data_dict['id']}) - if 'groups' not in data_dict: - data_dict['groups'] = pkg_dict.get('groups', []) - cats = {} - for extra in pkg_dict.get('extras', []): - if extra['key'].startswith('__category_tag_'): - cats[extra['key']] = extra['value'] - extras = data_dict.get('extras', []) - for item in extras: - if item['key'] in cats: - del cats[item['key']] - for cat in cats: - extras.append({'key': cat, 'value': cats[cat]}) - - # make sure rollup happens after any other actions - if action_name in self.ROLLUP_SAVE_ACTIONS: - extras_rollup = {} - new_extras = [] - for extra in data_dict.get('extras', []): - if extra['key'] in self.EXTRAS_ROLLUP_KEY_IGNORE: - new_extras.append(extra) - else: - extras_rollup[extra['key']] = extra['value'] - if extras_rollup: - found_extras_rollup = False - for new_extra in new_extras: - if new_extra['key'] == "extras_rollup": - # Update extras_rollup - new_extra['value'] = json.dumps(extras_rollup) - found_extras_rollup = True - if not found_extras_rollup: - # Insert extras_rollup if not found - new_extras.append({'key': 'extras_rollup', - 'value': json.dumps(extras_rollup)}) - data_dict['extras'] = new_extras - def configure(self, config): log.info('plugin initialized: %s', self.__class__.__name__) self.__class__.edit_url = config.get('saml2.user_edit') @@ -462,7 +415,7 @@ def saml2_user_edit_url(cls): # IPackageController - def before_view(self, pkg_dict): + def before_dataset_view(self, pkg_dict): for num, extra in enumerate(pkg_dict.get('extras', [])): if extra['key'] == 'tags': @@ -487,7 +440,7 @@ def before_view(self, pkg_dict): return pkg_dict - def before_index(self, pkg_dict): + def before_dataset_index(self, pkg_dict): tags = pkg_dict.get('tags', []) tags.extend(tag for tag in split_tags(pkg_dict.get('extras_tags', ''))) @@ -522,7 +475,7 @@ def before_index(self, pkg_dict): return pkg_dict - def before_search(self, search_params): + def before_dataset_search(self, search_params): fq = search_params.get('fq', '') @@ -549,7 +502,7 @@ def before_search(self, search_params): search_params['fq'] = fq return search_params - def after_show(self, context, data_dict): + def after_dataset_show(self, context, data_dict): current_extras = data_dict.get('extras', []) new_extras = [] @@ -635,7 +588,6 @@ class Miscs(p.SingletonPlugin): ''' p.implements(p.IConfigurer) p.implements(p.IConfigurable) - p.implements(p.IRoutes, inherit=True) p.implements(p.IBlueprint) # IConfigurer diff --git a/ckanext/geodatagov/templates/source/geodatagov_source_form.html b/ckanext/geodatagov/templates/source/geodatagov_source_form.html index 24cf9dab..3c0f255f 100644 --- a/ckanext/geodatagov/templates/source/geodatagov_source_form.html +++ b/ckanext/geodatagov/templates/source/geodatagov_source_form.html @@ -1,6 +1,6 @@ {% extends 'source/new_source_form.html' %} {% import 'macros/form.html' as form %} -{% resource 'harvest-extra-field/main' %} +{% asset 'harvest-extra-field/main' %} {% block extra_config %} @@ -19,7 +19,7 @@ {% set validator_profiles = source_config.get('validator_profiles') or data.validator_profiles %} {% set validator_schema = source_config.get('validator_schema') or data.validator_schema %} -
+
diff --git a/ckanext/geodatagov/tests/test_category_tags.py b/ckanext/geodatagov/tests/test_category_tags.py index 6bb2a10d..8ce8c21d 100644 --- a/ckanext/geodatagov/tests/test_category_tags.py +++ b/ckanext/geodatagov/tests/test_category_tags.py @@ -34,7 +34,7 @@ def create_datasets(self): self.group2 = factories.Group() self.dataset1 = factories.Dataset(owner_org=organization['id'], groups=[{"name": self.group1["name"]}]) self.dataset2 = factories.Dataset(owner_org=organization['id'], groups=[{"name": self.group2["name"]}]) - sysadmin = factories.Sysadmin(name='testUpdate') + sysadmin = factories.SysadminWithToken() self.user_name = sysadmin['name'] def test_group_catagory_tag_update(self): diff --git a/ckanext/geodatagov/tests/test_export_csv.py b/ckanext/geodatagov/tests/test_export_csv.py deleted file mode 100644 index 27efe242..00000000 --- a/ckanext/geodatagov/tests/test_export_csv.py +++ /dev/null @@ -1,186 +0,0 @@ -from datetime import datetime -import json -import logging -import pkg_resources - -from ckan.tests.helpers import FunctionalTestBase -import ckan.lib.search as search -from ckan.tests import factories - -from ckanext.geodatagov.commands import GeoGovCommand - - -log = logging.getLogger(__name__) - - -class TestExportCSV(FunctionalTestBase): - - @classmethod - def setup(cls): - search.clear_all() - - def test_export_csv(self): - """ run json_export and analyze results """ - - self.create_datasets() - - cmd = GeoGovCommand() - results, entry = cmd.export_csv() - - # total results = groups in packages - assert len(results) == 8 - - r = results[0] - assert r['title'] == "Dataset 1" - assert r['topic'] == self.group1['title'] - assert r['topicCategories'] == "g1c1" - - r = results[1] - assert r['title'] == "Dataset 1" - assert r['topic'] == self.group2['title'] - assert r['topicCategories'] == "" - - r = results[2] - assert r['title'] == "Dataset 2" - assert r['topic'] == self.group2['title'] - assert r['topicCategories'] == "g2c1;g2c2" - - r = results[3] - assert r['title'] == "Dataset 2" - assert r['topic'] == self.group3['title'] - assert r['topicCategories'] == "" - - r = results[4] - assert r['title'] == "Dataset 3" - assert r['topic'] == self.group3['title'] - assert r['topicCategories'] == "g3c1;g3c2" - - r = results[5] - assert r['title'] == "Dataset 3" - assert r['topic'] == self.group4['title'] - assert r['topicCategories'] == "" - - r = results[6] - assert r['title'] == "Dataset 4" - assert r['topic'] == self.group1['title'] - assert r['topicCategories'] == "" - - r = results[7] - assert r['title'] == "Dataset 4" - assert r['topic'] == self.group4['title'] - assert r['topicCategories'] == "g4c1" - - assert entry.date, datetime.strftime(datetime.now() == '%Y%m%d') - - # look for all topics in the CSV - topics_found = [] - for cat in self.category_tags: - topic = ';'.join(cat['value'].strip('"[],').split('","')) - assert topic in entry.csv - topics_found.append(cat['value']) - - def test_topics_csv_url(self): - """ test the /topics-csv url """ - self.create_datasets() - cmd = GeoGovCommand() - results, entry = cmd.export_csv() - - self.app = self._get_test_app() - res = self.app.get('/topics-csv') - for cat in self.category_tags: - topic = ';'.join(cat['value'].strip('"[],').split('","')) - assert topic in res - - def create_datasets(self): - - self.group1 = factories.Group() - group1_cat = {"key": "__category_tag_{}".format(self.group1['id']), "value": "[\"g1c1\"]"} - self.group2 = factories.Group() - group2_cat = {"key": "__category_tag_{}".format(self.group2['id']), "value": "[\"g2c1\",\"g2c2\"]"} - self.group3 = factories.Group() - group3_cat = {"key": "__category_tag_{}".format(self.group3['id']), "value": "[\"g3c1\",\"g3c2\"]"} - self.group4 = factories.Group() - group4_cat = {"key": "__category_tag_{}".format(self.group4['id']), "value": "[\"g4c1\"]"} - - self.category_tags = [group1_cat, group2_cat, group3_cat, group4_cat] - organization = factories.Organization() - - dataset1 = factories.Dataset( # NOQA - title="Dataset 1", - owner_org=organization['id'], - groups=[ - {"name": self.group1['name']}, - {"name": self.group2['name']} - ], - extras=[group1_cat]) - - dataset2 = factories.Dataset( # NOQA - title="Dataset 2", - owner_org=organization['id'], - groups=[ - {"name": self.group2['name']}, - {"name": self.group3['name']} - ], - extras=[group2_cat]) - - dataset3 = factories.Dataset( # NOQA - title="Dataset 3", - owner_org=organization['id'], - groups=[ - {"name": self.group3['name']}, - {"name": self.group4['name']} - ], - extras=[group3_cat]) - - dataset4 = factories.Dataset( # NOQA - title="Dataset 4", - owner_org=organization['id'], - groups=[ - {"name": self.group4['name']}, - {"name": self.group1['name']} - ], - extras=[group4_cat]) - - def test_static_export_group_and_tags(self): - - test_file = pkg_resources.resource_filename(__name__, "/data-samples/datasets_category_tags.json") - - f = open(test_file) - packages = json.load(f) - f.close() - - cmd = GeoGovCommand() - results = cmd.export_group_and_tags(packages) - - assert len(results) == 12 - - for result in results: - assert result['organization'], 'Department of Housing and Urban Development == Federal Government' - assert result['organizationUrl'] == 'https://catalog.data.gov/organization/hud-gov' - assert result['harvestSourceUrl'] == 'https://catalog.data.gov/harvest/991bcaf7-498f-4657-bed2-f6594e1bfbe7' - - assert results[0]['topic'] == 'BusinessUSA' - assert results[1]['topic'] == 'Consumer' - assert results[2]['topic'] == 'Energy' - assert results[3]['topic'] == 'Finance' - assert results[4]['topic'] == 'BusinessUSA' - assert results[5]['topic'] == 'Consumer' - assert results[6]['topic'] == 'Energy' - assert results[7]['topic'] == 'Finance' - assert results[8]['topic'] == 'BusinessUSA' - assert results[9]['topic'] == 'Consumer' - assert results[10]['topic'] == 'Energy' - assert results[11]['topic'] == 'Finance' - - assert results[0]['topicCategories'] == '' - assert results[1]['topicCategories'] == 'Finance' - assert results[2]['topicCategories'] == 'Total Energy' - assert results[3]['topicCategories'] == '' - assert results[4]['topicCategories'] == '' - assert results[5]['topicCategories'] == 'Finance' - assert results[6]['topicCategories'] == 'Total Energy' - assert results[7]['topicCategories'] == '' - assert results[8]['topicCategories'] == '' - assert results[9]['topicCategories'] == 'Finance' - assert results[10]['topicCategories'] == 'Total Energy' - assert results[11]['topicCategories'] == '' diff --git a/ckanext/geodatagov/tests/test_fix_spatial.py b/ckanext/geodatagov/tests/test_fix_spatial.py index 4fd82692..c06058f4 100644 --- a/ckanext/geodatagov/tests/test_fix_spatial.py +++ b/ckanext/geodatagov/tests/test_fix_spatial.py @@ -107,3 +107,54 @@ def test_list_spatial_transformation(self): assert result['count'] == 1 assert result['results'][0]['id'] == dataset['id'] + + def test_spatial_plus_sign(self): + + old_geo = '-179.231086,-14.601813,+179.859681,+71.441059' + + context = {'user': self.user['name'], 'ignore_auth': True} + pkg = { + 'title': 'Spatial Plus Sign', + 'name': 'spatial-plus', + 'extras': [ + {'key': 'spatial', 'value': old_geo} + ] + } + dataset = p.toolkit.get_action('package_create')(context, pkg) + + expected_spatial = ('{"type": "Polygon", "coordinates": [[[-179.231086, -14.601813], ' + '[-179.231086, 71.441059], [179.859681, 71.441059], [179.859681, ' + '-14.601813], [-179.231086, -14.601813]]]}') + spatial_extra_exists = False + for extra in dataset['extras']: + if extra['key'] == 'spatial': + spatial_extra_exists = True + assert extra['value'] == expected_spatial + + assert spatial_extra_exists is True + + def test_bad_string_transformation(self): + + old_geo = 'US Domestic' + # require locations table to be installed + + context = {'user': self.user['name'], 'ignore_auth': True} + pkg = { + 'title': 'Spatial US Domestic', + 'name': 'spatial-usd', + 'extras': [ + {'key': 'spatial', 'value': old_geo} + ] + } + dataset = p.toolkit.get_action('package_create')(context, pkg) + + expected_spatial = "" + spatial_extra_exists = False + for extra in dataset['extras']: + if extra['key'] == 'spatial': + spatial_extra_exists = True + assert extra['value'] == expected_spatial + if extra['key'] == 'old-spatial': + assert extra['value'] == old_geo + + assert spatial_extra_exists is True diff --git a/ckanext/geodatagov/tests/test_logic.py b/ckanext/geodatagov/tests/test_logic.py index 83e7ae97..318ae1e7 100644 --- a/ckanext/geodatagov/tests/test_logic.py +++ b/ckanext/geodatagov/tests/test_logic.py @@ -25,7 +25,7 @@ def create_datasets(self): ], extras=[]) - sysadmin = factories.Sysadmin(name='testUpdate') + sysadmin = factories.SysadminWithToken() self.user_name = sysadmin['name'] def test_rollup_save_action(self): diff --git a/ckanext/geodatagov/tests/test_waf.py b/ckanext/geodatagov/tests/test_waf.py index f28146d0..addaa1d6 100644 --- a/ckanext/geodatagov/tests/test_waf.py +++ b/ckanext/geodatagov/tests/test_waf.py @@ -9,8 +9,7 @@ from factories import HarvestJobObj, WafHarvestSourceObj from ckan.tests.helpers import reset_db -from ckan.tests.factories import Organization, Sysadmin - +from ckan.tests.factories import Organization, SysadminWithToken log = logging.getLogger(__name__) @@ -197,7 +196,7 @@ def test_waf_trim_tags(self): bad_list = list(set(tag_list) - set(expected_list)) log.info("Tags that are not trimmed: %s", bad_list) - assert (tag_list == expected_list) + assert (sorted(tag_list) == sorted(expected_list)) def test_extras_rollup(self): """ Test https://github.com/GSA/datagov-deploy/issues/2166 """ @@ -211,7 +210,7 @@ def test_extras_rollup(self): assert extras_rollup log.info("extras_rollup package info: %s", package) - sysadmin = Sysadmin(name='testUpdate') + sysadmin = SysadminWithToken() user_name = sysadmin['name'] context = {'user': user_name} new_extras = [{'key': key, 'value': value} for key, value in list(extras.items())] diff --git a/docker-compose.yml b/docker-compose.yml index e9f553aa..bc88c574 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -20,6 +20,7 @@ services: - db - redis - solr + - localstack-container volumes: - ./ckanext:/srv/app/ckanext/ - ./test.sh:/srv/app/test.sh diff --git a/requirements.txt b/requirements.txt index ed94b9b4..a2157720 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,29 +1,27 @@ -e git+https://github.com/ckan/ckanext-harvest.git#egg=ckanext_harvest -e git+https://github.com/ckan/ckanext-spatial.git#egg=ckanext-spatial -e git+https://github.com/asl2/PyZ3950.git#egg=PyZ3950 --e git+https://github.com/nickumia-reisys/werkzeug@e1f6527604ab30e4b46b5430a5fb97e7a7055cd7#egg=werkzeug +git+https://github.com/gsa/ckan.git@ckan-210-temp#egg=ckan # ckanext-harvest dependencies ckantoolkit>=0.0.7 pika>=1.1.0 pyOpenSSL>22.10 #pinning to fix error with crypto (https://levelup.gitconnected.com/fix-attributeerror-module-lib-has-no-attribute-openssl-521a35d83769) - # redis==2.10.6 # included in ckan core # requests>=2.11.1 # included in ckan core # ckanext-spatial # ckantoolkit # included as dep of ckanext-harvest -GeoAlchemy>=0.6 GeoAlchemy2==0.5.0 Shapely>=1.2.13 -pyproj==2.6.1 +pyproj==3.4.1 OWSLib==0.28.1 lxml>=2.3 argparse pyparsing>=2.1.10 # requests>=1.1.0 # included in ckan-core six -geojson==2.5.0 +geojson==3.0.1 # PyZ3950 pyasn1 diff --git a/setup.py b/setup.py index cfc14d2a..49d60a12 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( name="ckanext-geodatagov", - version="0.1.37", + version="0.2.0", description="", long_description=long_description, long_description_content_type='text/markdown', @@ -28,7 +28,7 @@ zip_safe=False, install_requires=[ # -*- Extra requirements: -*- - 'ckanext-datajson', + 'ckanext-datajson>=0.1.19', 'boto3', 'ply>=3.4', ], diff --git a/test.ini b/test.ini index a45612c0..fd3c00c7 100644 --- a/test.ini +++ b/test.ini @@ -12,7 +12,7 @@ ckan.site_description = A test site for testing my CKAN extension ckan.plugins = stats geodatagov datagov_harvest ckan_harvester geodatagov_geoportal_harvester z3950_harvester arcgis_harvester waf_harvester_collection geodatagov_csw_harvester geodatagov_doc_harvester geodatagov_waf_harvester geodatagov_miscs spatial_metadata spatial_query resource_proxy spatial_harvest_metadata_api datajson_harvest envvars ckan.legacy_templates = no ckan.spatial.validator.profiles = iso19139ngdc -ckanext.spatial.search_backend = solr +ckanext.spatial.search_backend = solr-bbox # Logging configuration [loggers] diff --git a/test.sh b/test.sh index c3ceab76..d49645ef 100755 --- a/test.sh +++ b/test.sh @@ -33,4 +33,5 @@ ckan -c $test_ini spatial initdb pytest --ckan-ini=test.ini --cov=ckanext.geodatagov --disable-warnings ckanext/geodatagov/tests/ # Run this this pytest command if only testing a single test -# pytest --ckan-ini=$test_ini --cov=ckanext.geodatagov --disable-warnings ckanext/geodatagov/tests/test_category_tags.py ckanext/geodatagov/tests/test_db_solr_sync.py +# pytest --ckan-ini=$test_ini --cov=ckanext.geodatagov --disable-warnings ckanext/geodatagov/tests/test_category_tags.py +# pytest --ckan-ini=$test_ini --cov=ckanext.geodatagov --disable-warnings ckanext/geodatagov/tests/test_category_tags.py ckanext/geodatagov/tests/test_waf-collection.py