diff --git a/.env b/.env index c9935e35..2d53e416 100644 --- a/.env +++ b/.env @@ -44,7 +44,7 @@ CKAN_SMTP_PASSWORD=pass CKAN_SMTP_MAIL_FROM=ckan@localhost # Extensions -CKAN__PLUGINS=envvars image_view text_view recline_view datagov_harvest ckan_harvester geodatagov geodatagov_miscs z3950_harvester arcgis_harvester geodatagov_geoportal_harvester waf_harvester_collection geodatagov_csw_harvester geodatagov_doc_harvester geodatagov_waf_harvester spatial_metadata spatial_query s3test +CKAN__PLUGINS=envvars image_view text_view recline_view datagov_harvest ckan_harvester geodatagov geodatagov_miscs z3950_harvester arcgis_harvester geodatagov_geoportal_harvester waf_harvester_collection geodatagov_csw_harvester geodatagov_doc_harvester geodatagov_waf_harvester spatial_metadata spatial_query s3test datajson datajson_harvest # Harvest settings CKAN__HARVEST__MQ__TYPE=redis diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 135ebcc5..47901719 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,7 +21,7 @@ jobs: needs: lint strategy: matrix: - ckan-version: [2.9.5, 2.9, 2.9.7] + ckan-version: ['2.10', '2.10.1'] fail-fast: false name: CKAN ${{ matrix.ckan-version }} diff --git a/Dockerfile b/Dockerfile index 96eaee05..a1cab295 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -ARG CKAN_VERSION=2.9.5 +ARG CKAN_VERSION=2.10.1 FROM openknowledge/ckan-dev:${CKAN_VERSION} ARG CKAN_VERSION diff --git a/Makefile b/Makefile index dd7db552..4a82c3e7 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -CKAN_VERSION ?= 2.9.7 +CKAN_VERSION ?= 2.10.1 COMPOSE_FILE ?= docker-compose.yml build: ## Build the docker containers diff --git a/README.md b/README.md index 88ea2952..8843a400 100644 --- a/README.md +++ b/README.md @@ -34,11 +34,12 @@ This extension is compatible with these versions of CKAN. CKAN version | Compatibility ------------ | ------------- <=2.8 | no -2.9 | [complete](https://github.com/GSA/datagov-ckan-multi/issues/570) +2.9 | 0.1.37 (last supported) +2.10 | >=0.2.0 ## Tests -All the tests live in the [/ckanext/geodatagov/tests](/ckanext/geodatagov/tests) folder. [Github actions](https://github.com/GSA/ckanext-geodatagov/blob/main/.github/workflows/test.yml) is configured to run the tests against CKAN 2.9 when you open a pull request. +All the tests live in the [/ckanext/geodatagov/tests](/ckanext/geodatagov/tests) folder. [Github actions](https://github.com/GSA/ckanext-geodatagov/blob/main/.github/workflows/test.yml) is configured to run the tests against CKAN 2.10 when you open a pull request. ## Using the Docker Dev Environment @@ -61,7 +62,7 @@ To docker exec into the CKAN image, run: ### Testing They follow the guidelines for [testing CKAN -extensions](https://docs.ckan.org/en/2.9/extensions/testing-extensions.html#testing-extensions). +extensions](https://docs.ckan.org/en/2.10/extensions/testing-extensions.html#testing-extensions). To run the extension tests, start the containers with `make up`, then: @@ -100,7 +101,7 @@ In order to support multiple versions of CKAN, or even upgrade to new versions of CKAN, we support development and testing through the `CKAN_VERSION` environment variable. - $ make CKAN_VERSION=2.9 test + $ make CKAN_VERSION=2.10 test ### Command line interface diff --git a/ckanext/geodatagov/blueprint.py b/ckanext/geodatagov/blueprint.py index 2b2e7378..106dbbfb 100644 --- a/ckanext/geodatagov/blueprint.py +++ b/ckanext/geodatagov/blueprint.py @@ -4,7 +4,7 @@ from flask import Blueprint from flask.wrappers import Response as response -from ckanext.geodatagov.model import MiscsFeed, MiscsTopicCSV +from ckanext.geodatagov.model import MiscsFeed datapusher = Blueprint('geodatagov', __name__) @@ -18,25 +18,5 @@ def feed(): return entry.feed -def csv(date=None): - if date: - entry = model.Session.query(MiscsTopicCSV) \ - .filter_by(date=date) \ - .first() - else: - entry = model.Session.query(MiscsTopicCSV) \ - .order_by(MiscsTopicCSV.date.desc()) \ - .first() - if not entry or not entry.csv: - abort(404, 'There is no csv entry yet.') - response.content_type = 'text/csv' - response.content_disposition = 'attachment; filename="topics-%s.csv"' % entry.date - return entry.csv - - datapusher.add_url_rule('/usasearch-custom-feed.xml', view_func=feed) -datapusher.add_url_rule('/topics-csv/{date}', - view_func=csv) -datapusher.add_url_rule('/topics-csv', - view_func=csv) diff --git a/ckanext/geodatagov/commands.py b/ckanext/geodatagov/commands.py index 4d14dcdf..89bd1f01 100644 --- a/ckanext/geodatagov/commands.py +++ b/ckanext/geodatagov/commands.py @@ -20,7 +20,7 @@ from ckan.plugins.toolkit import config from ckanext.harvest.model import HarvestSource, HarvestJob -from ckanext.geodatagov.model import MiscsFeed, MiscsTopicCSV +from ckanext.geodatagov.model import MiscsFeed # https://github.com/GSA/ckanext-geodatagov/issues/117 @@ -566,104 +566,6 @@ def export_group_and_tags(packages, domain='https://catalog.data.gov'): result.append(package) return result - def export_csv(self, domain='https://catalog.data.gov'): - print('export started...') - - # cron job - # paster --plugin=ckanext-geodatagov geodatagov export-csv --config=/etc/ckan/production.ini - - # Exported CSV header list: - # - Dataset Title - # - Dataset URL - # - Organization Name - # - Organization Link - # - Harvest Source Name - # - Harvest Source Link - # - Topic Name - # - Topic Categories - - import io - import csv - - limit = 100 - page = 1 - - import pprint - - result = [] - - while True: - data_dict = { - 'q': 'groups: *', - # 'fq': fq, - # 'facet.field': facets.keys(), - 'rows': limit, - # 'sort': sort_by, - 'start': (page - 1) * limit - # 'extras': search_extras - } - - query = logic.get_action('package_search')({'model': model, 'ignore_auth': True}, data_dict) - - page += 1 - # import pprint - # pprint.pprint(packages) - - if not query['results']: - break - - packages = query['results'] - result = result + GeoGovCommand.export_group_and_tags(packages=packages, domain=domain) - - if not result: - print('nothing to do') - return - - import datetime - - print('writing into db...') - - date_suffix = datetime.datetime.strftime(datetime.datetime.now(), '%Y%m%d') - csv_output = io.StringIO() - - fieldnames = ['Dataset Title', 'Dataset URL', 'Organization Name', 'Organization Link', - 'Harvest Source Name', 'Harvest Source Link', 'Topic Name', 'Topic Categories'] - - writer = csv.writer(csv_output) - writer.writerow(fieldnames) - - for pkg in result: - try: - writer.writerow( - [ - pkg['title'], - pkg['url'], - pkg['organization'], - pkg['organizationUrl'], - pkg['harvestSourceTitle'], - pkg['harvestSourceUrl'], - pkg['topic'], - pkg['topicCategories'] - ] - ) - except UnicodeEncodeError: - pprint.pprint(pkg) - - content = csv_output.getvalue() - - entry = model.Session.query(MiscsTopicCSV) \ - .filter_by(date=date_suffix) \ - .first() - if not entry: - # create the empty entry for the first time - entry = MiscsTopicCSV() - entry.date = date_suffix - entry.csv = content - entry.save() - - print('csv file topics-%s.csv is ready.' % date_suffix) - return result, entry - # this code is defunct and will need to be refactored into cli.py """ def jsonl_export(self): @@ -838,7 +740,7 @@ def update_dataset_geo_fields(self): # iterate over all datasets search_backend = config.get('ckanext.spatial.search_backend', 'postgis') - if search_backend != 'solr': + if search_backend != 'solr-bbox': raise ValueError('Solr is not your default search backend (ckanext.spatial.search_backend)') datasets = model.Session.query(model.Package).all() diff --git a/ckanext/geodatagov/harvesters/arcgis.py b/ckanext/geodatagov/harvesters/arcgis.py index 8632ba03..22b16015 100644 --- a/ckanext/geodatagov/harvesters/arcgis.py +++ b/ckanext/geodatagov/harvesters/arcgis.py @@ -23,6 +23,8 @@ from ckan.plugins.toolkit import add_template_directory, add_resource, requires_ckan_version from ckan.plugins import IConfigurer +from ckanext.geodatagov.helpers import string as custom_string + requires_ckan_version("2.9") @@ -118,7 +120,7 @@ def info(self): def extra_schema(self): return { 'private_datasets': [ignore_empty, boolean_validator], - 'extra_search_criteria': [ignore_empty, str], + 'extra_search_criteria': [ignore_empty, custom_string], } def gather_stage(self, harvest_job): @@ -287,7 +289,7 @@ def import_stage(self, harvest_object): package_schema = logic.schema.default_update_package_schema() tag_schema = logic.schema.default_tags_schema() - tag_schema['name'] = [not_empty, str] + tag_schema['name'] = [not_empty, custom_string] package_schema['tags'] = tag_schema context['schema'] = package_schema # TODO: user @@ -298,7 +300,7 @@ def import_stage(self, harvest_object): # We need to explicitly provide a package ID, otherwise ckanext-spatial # won't be be able to link the extent to the package. package_dict['id'] = str(uuid.uuid4()) - package_schema['id'] = [str] + package_schema['id'] = [custom_string] # Save reference to the package on the object harvest_object.package_id = package_dict['id'] diff --git a/ckanext/geodatagov/harvesters/waf_collection.py b/ckanext/geodatagov/harvesters/waf_collection.py index 684e07b7..0b969dee 100644 --- a/ckanext/geodatagov/harvesters/waf_collection.py +++ b/ckanext/geodatagov/harvesters/waf_collection.py @@ -13,6 +13,7 @@ ) # , validate_profiles; , validate_profiles from ckanext.harvest.model import HarvestObject from ckanext.harvest.model import HarvestObjectExtra as HOExtra +from ckanext.geodatagov.helpers import string class WAFCollectionHarvester(GeoDataGovWAFHarvester): @@ -26,7 +27,7 @@ def info(self): def extra_schema(self): extra_schema = super(WAFCollectionHarvester, self).extra_schema() - extra_schema["collection_metadata_url"] = [not_empty, str] + extra_schema["collection_metadata_url"] = [not_empty, string] log.debug( "Getting extra schema for WAFCollectionHarvester: {}".format(extra_schema) ) diff --git a/ckanext/geodatagov/harvesters/z3950.py b/ckanext/geodatagov/harvesters/z3950.py index f8bd743f..b5eb6201 100644 --- a/ckanext/geodatagov/harvesters/z3950.py +++ b/ckanext/geodatagov/harvesters/z3950.py @@ -17,6 +17,7 @@ from ckan.logic.validators import boolean_validator from ckan.plugins.toolkit import add_template_directory, add_resource, requires_ckan_version +from ckanext.geodatagov.helpers import string requires_ckan_version("2.9") @@ -43,7 +44,7 @@ def info(self): def extra_schema(self): return {'private_datasets': [ignore_empty, boolean_validator], - 'database': [not_empty, str], + 'database': [not_empty, string], 'port': [not_empty, convert_int]} def gather_stage(self, harvest_job): diff --git a/ckanext/geodatagov/helpers.py b/ckanext/geodatagov/helpers.py index fbed8393..1a1892d0 100644 --- a/ckanext/geodatagov/helpers.py +++ b/ckanext/geodatagov/helpers.py @@ -61,3 +61,7 @@ def get_harvest_source_config(harvester_id): def get_collection_package(collection_package_id): package = p.toolkit.get_action('package_show')({}, {'id': collection_package_id}) return package + + +def string(value): + return str(value) diff --git a/ckanext/geodatagov/logic.py b/ckanext/geodatagov/logic.py index ef66b400..7a4cd78e 100644 --- a/ckanext/geodatagov/logic.py +++ b/ckanext/geodatagov/logic.py @@ -6,6 +6,7 @@ import time import uuid +from ckan.lib.navl.validators import not_empty from ckan.logic import side_effect_free import ckan.logic.schema as schema from ckan.logic.action import get as core_get @@ -13,6 +14,7 @@ import ckan.plugins as p from ckanext.geodatagov.plugin import change_resource_details, split_tags from ckanext.geodatagov.harvesters.arcgis import _slugify +from ckanext.geodatagov.helpers import string from ckanext.harvest.model import HarvestObject # , HarvestJob from ckan.common import config @@ -238,7 +240,7 @@ def datajson_create(context, data_dict): 'extras': [{'key': 'organization_type', 'value': "Federal Government"}]}) context['schema'] = schema.default_create_package_schema() - context['schema']['id'] = [p.toolkit.get_validator('not_empty')] + context['schema']['id'] = [not_empty] context['return_id_only'] = True return p.toolkit.get_action('package_create')(context, new_package) @@ -303,7 +305,7 @@ def doi_create(context, data_dict): new_package["extras"].append({"key": "harvest_object_id", "value": obj.id}) context['schema'] = schema.default_create_package_schema() - context['schema']['id'] = [p.toolkit.get_validator('not_empty')] + context['schema']['id'] = [not_empty] context['return_id_only'] = True p.toolkit.get_action('package_create')(context, new_package) print(str(datetime.datetime.now()) + ' Imported doi id ' + new_package['id']) @@ -413,7 +415,7 @@ def rollup_save_action(context, data_dict): if p.toolkit.check_ckan_version(min_version='2.8'): search_backend = config.get('ckanext.spatial.search_backend', 'postgis') log.debug('Search backend {}'.format(search_backend)) - if search_backend == 'solr': + if search_backend == 'solr-bbox': old_spatial = new_extras_rollup.get('spatial', None) if old_spatial is not None: log.info('Old Spatial found {}'.format(old_spatial)) @@ -429,6 +431,10 @@ def rollup_save_action(context, data_dict): new_extras.append({'key': 'spatial', 'value': new_spatial}) # remove rolled spatial to skip run this process again new_extras_rollup['old-spatial'] = new_extras_rollup.pop('spatial') + else: + log.info('New spatial could not be created') + new_extras.append({'key': 'spatial', 'value': ''}) + new_extras_rollup['old-spatial'] = new_extras_rollup.pop('spatial') if new_extras_rollup: new_extras.append({'key': 'extras_rollup', 'value': json.dumps(new_extras_rollup)}) @@ -474,10 +480,18 @@ def translate_spatial(old_spatial): try: numbers_with_spaces = [int(i) for i in old_spatial_transformed.split(' ')] if all(isinstance(x, int) for x in numbers_with_spaces): - old_spatial_transformed = 'null' + old_spatial_transformed = '' except ValueError: pass + # If we have 4 numbers separated by commas, transform them as GeoJSON + parts = old_spatial_transformed.strip().split(',') + if len(parts) == 4 and all(is_number(x) for x in parts): + minx, miny, maxx, maxy = parts + params = {"minx": minx, "miny": miny, "maxx": maxx, "maxy": maxy} + new_spatial = geojson_tpl.format(**params) + return new_spatial + # Analyze with type of data is JSON valid try: geometry = json.loads(old_spatial_transformed) # NOQA F841 @@ -492,18 +506,13 @@ def translate_spatial(old_spatial): return old_spatial_transformed except BaseException: log.info('JSON that could not be parsed\n\t{}'.format(old_spatial_transformed)) - pass - # If we have 4 numbers separated by commas, transform them as GeoJSON - parts = old_spatial_transformed.strip().split(',') - if len(parts) == 4 and all(is_number(x) for x in parts): - minx, miny, maxx, maxy = parts - params = {"minx": minx, "miny": miny, "maxx": maxx, "maxy": maxy} - new_spatial = geojson_tpl.format(**params) - return new_spatial + try: + return get_geo_from_string(old_spatial) + except AttributeError: + pass - g = get_geo_from_string(old_spatial) - return g + return '' def is_number(s): @@ -541,6 +550,10 @@ def package_create(up_func, context, data_dict): """ before_package_create for CKAN 2.8 """ rollup_save_action(context, data_dict) data_dict = fix_dataset(data_dict) + # TODO: This fix is bad, find a better one :( + if 'schema' in context.keys(): + context['schema']['id'] = [string] + context['schema']['tags']['name'] = [not_empty, string] return up_func(context, data_dict) diff --git a/ckanext/geodatagov/model.py b/ckanext/geodatagov/model.py index 19e3ef3e..ef3d94be 100644 --- a/ckanext/geodatagov/model.py +++ b/ckanext/geodatagov/model.py @@ -8,7 +8,6 @@ log = logging.getLogger(__name__) miscs_feed_table = None -miscs_topic_csv_table = None class MiscsFeedException(Exception): @@ -19,14 +18,6 @@ class MiscsFeed(domain_object.DomainObject): pass -class MiscsTopicCSVException(Exception): - pass - - -class MiscsTopicCSV(domain_object.DomainObject): - pass - - class MiscsSolrSyncException(Exception): pass @@ -50,19 +41,6 @@ def setup(): else: log.debug('Geodatagov Miscs Feed table creation deferred') - if miscs_topic_csv_table is None: - define_miscs_topic_csv_table() - log.debug('Geodatagov Miscs Topic CSV table defined in memory') - - if model.package_table.exists(): - if not miscs_topic_csv_table.exists(): - miscs_topic_csv_table.create() - log.debug('Geodatagov Miscs Topic CSV table created') - else: - log.debug('Geodatagov Miscs Topic CSV table already exists') - else: - log.debug('Geodatagov Miscs Topic CSV table creation deferred') - def define_miscs_feed_table(): global miscs_feed_table @@ -72,14 +50,3 @@ def define_miscs_feed_table(): ) meta.mapper(MiscsFeed, miscs_feed_table) - - -def define_miscs_topic_csv_table(): - global miscs_topic_csv_table - miscs_topic_csv_table = Table('miscs_topic_csv', meta.metadata, - Column('id', types.UnicodeText, primary_key=True, default=model.types.make_uuid), - Column('date', types.UnicodeText, index=True, unique=True, nullable=False, default=u''), - Column('csv', types.UnicodeText, nullable=False, default=u''), - ) - - meta.mapper(MiscsTopicCSV, miscs_topic_csv_table) diff --git a/ckanext/geodatagov/plugin.py b/ckanext/geodatagov/plugin.py index 568f4748..02a17a60 100644 --- a/ckanext/geodatagov/plugin.py +++ b/ckanext/geodatagov/plugin.py @@ -405,53 +405,6 @@ def update_config(self, config): edit_url = None - UPDATE_CATEGORY_ACTIONS = ['package_update', 'dataset_update'] - ROLLUP_SAVE_ACTIONS = ['package_create', 'dataset_create', 'package_update', 'dataset_update'] - - # source ignored as queried diretly - EXTRAS_ROLLUP_KEY_IGNORE = ["metadata-source", "tags", "extras_rollup"] - - def before_action(self, action_name, context, data_dict): - """ before_action is a hook in CKAN 2.3 for ALL actions - This not exists at CKAN 2.8 and chained action do not exists at CKAN 2.3 """ - log.info('before_action CKAN {} {} {} {}'.format(ckan_version, action_name, context, data_dict)) - if action_name in self.UPDATE_CATEGORY_ACTIONS: - pkg_dict = p.toolkit.get_action('package_show')(context, {'id': data_dict['id']}) - if 'groups' not in data_dict: - data_dict['groups'] = pkg_dict.get('groups', []) - cats = {} - for extra in pkg_dict.get('extras', []): - if extra['key'].startswith('__category_tag_'): - cats[extra['key']] = extra['value'] - extras = data_dict.get('extras', []) - for item in extras: - if item['key'] in cats: - del cats[item['key']] - for cat in cats: - extras.append({'key': cat, 'value': cats[cat]}) - - # make sure rollup happens after any other actions - if action_name in self.ROLLUP_SAVE_ACTIONS: - extras_rollup = {} - new_extras = [] - for extra in data_dict.get('extras', []): - if extra['key'] in self.EXTRAS_ROLLUP_KEY_IGNORE: - new_extras.append(extra) - else: - extras_rollup[extra['key']] = extra['value'] - if extras_rollup: - found_extras_rollup = False - for new_extra in new_extras: - if new_extra['key'] == "extras_rollup": - # Update extras_rollup - new_extra['value'] = json.dumps(extras_rollup) - found_extras_rollup = True - if not found_extras_rollup: - # Insert extras_rollup if not found - new_extras.append({'key': 'extras_rollup', - 'value': json.dumps(extras_rollup)}) - data_dict['extras'] = new_extras - def configure(self, config): log.info('plugin initialized: %s', self.__class__.__name__) self.__class__.edit_url = config.get('saml2.user_edit') @@ -462,7 +415,7 @@ def saml2_user_edit_url(cls): # IPackageController - def before_view(self, pkg_dict): + def before_dataset_view(self, pkg_dict): for num, extra in enumerate(pkg_dict.get('extras', [])): if extra['key'] == 'tags': @@ -487,7 +440,7 @@ def before_view(self, pkg_dict): return pkg_dict - def before_index(self, pkg_dict): + def before_dataset_index(self, pkg_dict): tags = pkg_dict.get('tags', []) tags.extend(tag for tag in split_tags(pkg_dict.get('extras_tags', ''))) @@ -522,7 +475,7 @@ def before_index(self, pkg_dict): return pkg_dict - def before_search(self, search_params): + def before_dataset_search(self, search_params): fq = search_params.get('fq', '') @@ -549,7 +502,7 @@ def before_search(self, search_params): search_params['fq'] = fq return search_params - def after_show(self, context, data_dict): + def after_dataset_show(self, context, data_dict): current_extras = data_dict.get('extras', []) new_extras = [] @@ -635,7 +588,6 @@ class Miscs(p.SingletonPlugin): ''' p.implements(p.IConfigurer) p.implements(p.IConfigurable) - p.implements(p.IRoutes, inherit=True) p.implements(p.IBlueprint) # IConfigurer diff --git a/ckanext/geodatagov/templates/source/geodatagov_source_form.html b/ckanext/geodatagov/templates/source/geodatagov_source_form.html index 24cf9dab..3c0f255f 100644 --- a/ckanext/geodatagov/templates/source/geodatagov_source_form.html +++ b/ckanext/geodatagov/templates/source/geodatagov_source_form.html @@ -1,6 +1,6 @@ {% extends 'source/new_source_form.html' %} {% import 'macros/form.html' as form %} -{% resource 'harvest-extra-field/main' %} +{% asset 'harvest-extra-field/main' %} {% block extra_config %} @@ -19,7 +19,7 @@ {% set validator_profiles = source_config.get('validator_profiles') or data.validator_profiles %} {% set validator_schema = source_config.get('validator_schema') or data.validator_schema %} -