From 4647c1bb3b0350b31728323697901547a74eca10 Mon Sep 17 00:00:00 2001 From: avdata99 Date: Thu, 9 May 2024 10:45:28 -0300 Subject: [PATCH 1/5] Allow index and use as facet new fields --- README.md | 7 +++++ ckanext/scheming/plugins.py | 60 ++++++++++++++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1b6ce2b8..641dfa45 100644 --- a/README.md +++ b/README.md @@ -183,8 +183,15 @@ pages. Fields you exclude will not be shown to the end user, and will not be accepted when editing or updating this type of dataset. +#### Index dataset Fields +New schema fields that should be indexed by Solr for searching should be marked +with `index_search: true`. +#### Facet this field + +New schema fields that should be faceted in the search results should be marked +with `facet_field: true`. ## Group / Organization Schema Keys diff --git a/ckanext/scheming/plugins.py b/ckanext/scheming/plugins.py index 0980a684..d0ba819c 100644 --- a/ckanext/scheming/plugins.py +++ b/ckanext/scheming/plugins.py @@ -199,6 +199,8 @@ class SchemingDatasetsPlugin(p.SingletonPlugin, DefaultDatasetForm, p.implements(p.IDatasetForm, inherit=True) p.implements(p.IActions) p.implements(p.IValidators) + p.implements(p.IFacets, inherit=True) + p.implements(p.IPackageController, inherit=True) SCHEMA_OPTION = 'scheming.dataset_schemas' FALLBACK_OPTION = 'scheming.dataset_fallback' @@ -374,6 +376,62 @@ def prepare_dataset_blueprint(self, package_type, bp): ) return bp + def dataset_facets(self, facets_dict, package_type): + schemas = self._expanded_schemas + dataset_fields = schemas[package_type]['dataset_fields'] + for field in dataset_fields: + if not field.get('facet_field', False): + continue + # Add this label to facet + field_name = field['field_name'] + facet_field_name = f'extras_{field_name}' + facets_dict[facet_field_name] = field.get('label', field_name) + + return facets_dict + + def before_dataset_index(self, data_dict): + schemas = self._expanded_schemas + if data_dict['type'] not in schemas: + return data_dict + + dataset_fields = schemas[data_dict['type']]['dataset_fields'] + for field in dataset_fields: + if field['field_name'] not in data_dict: + continue + if not field.get('index_search', False): + continue + # index the field as extras_* + field_name = field['field_name'] + value = data_dict.get(field_name) + data_dict[f'extras_{field_name}'] = self.get_values_to_index(field, value) + + return data_dict + + def get_values_to_index(self, schema_field, value): + """ Prepare to index a single field value """ + if isinstance(value, list): + values = value + elif isinstance(value, str): + # Scheming is not clear on how to handle boolean values + if value.lower() in ['true', 'false']: + values = [p.toolkit.asbool(value)] + else: + try: + values = json.loads(value) + except ValueError: + values = [value] + else: + # TODO check if we need to handle other types + values = [value] + out = [] + # Allow fields with choices_helper + choices = helpers.scheming_field_choices(schema_field) + for item in values: + for choice in choices: + if choice['value'] == item: + out.append(choice['label']) + return out + def expand_form_composite(data, fieldnames): """ @@ -486,7 +544,7 @@ def before_dataset_index(self, data_dict): return self.before_index(data_dict) def before_index(self, data_dict): - schemas = SchemingDatasetsPlugin.instance._expanded_schemas + schemas = self.instance._expanded_schemas if data_dict['type'] not in schemas: return data_dict From 6c5a2134fcd2390a37692df24d1d44c65331f4f9 Mon Sep 17 00:00:00 2001 From: avdata99 Date: Thu, 9 May 2024 10:56:08 -0300 Subject: [PATCH 2/5] py2 --- ckanext/scheming/plugins.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/scheming/plugins.py b/ckanext/scheming/plugins.py index d0ba819c..ba87e3f0 100644 --- a/ckanext/scheming/plugins.py +++ b/ckanext/scheming/plugins.py @@ -384,7 +384,7 @@ def dataset_facets(self, facets_dict, package_type): continue # Add this label to facet field_name = field['field_name'] - facet_field_name = f'extras_{field_name}' + facet_field_name = 'extras_{}'.format(field_name) facets_dict[facet_field_name] = field.get('label', field_name) return facets_dict From 23f329815ae0d075cd6a6e0d6fdb0894badec135 Mon Sep 17 00:00:00 2001 From: avdata99 Date: Thu, 9 May 2024 10:58:59 -0300 Subject: [PATCH 3/5] py2b --- ckanext/scheming/plugins.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ckanext/scheming/plugins.py b/ckanext/scheming/plugins.py index ba87e3f0..39a72d92 100644 --- a/ckanext/scheming/plugins.py +++ b/ckanext/scheming/plugins.py @@ -402,8 +402,9 @@ def before_dataset_index(self, data_dict): continue # index the field as extras_* field_name = field['field_name'] + extras_field_name = 'extras_{}'.format(field_name) value = data_dict.get(field_name) - data_dict[f'extras_{field_name}'] = self.get_values_to_index(field, value) + data_dict[extras_field_name] = self.get_values_to_index(field, value) return data_dict From 3b5eef3381e34d09fd3265683c9a6486675de3c0 Mon Sep 17 00:00:00 2001 From: avdata99 Date: Thu, 9 May 2024 11:08:46 -0300 Subject: [PATCH 4/5] get --- ckanext/scheming/plugins.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ckanext/scheming/plugins.py b/ckanext/scheming/plugins.py index 39a72d92..dd34e16e 100644 --- a/ckanext/scheming/plugins.py +++ b/ckanext/scheming/plugins.py @@ -378,7 +378,7 @@ def prepare_dataset_blueprint(self, package_type, bp): def dataset_facets(self, facets_dict, package_type): schemas = self._expanded_schemas - dataset_fields = schemas[package_type]['dataset_fields'] + dataset_fields = schemas.get(package_type, {}).get('dataset_fields') for field in dataset_fields: if not field.get('facet_field', False): continue @@ -394,7 +394,8 @@ def before_dataset_index(self, data_dict): if data_dict['type'] not in schemas: return data_dict - dataset_fields = schemas[data_dict['type']]['dataset_fields'] + package_type = data_dict['type'] + dataset_fields = schemas.get(package_type, {}).get('dataset_fields') for field in dataset_fields: if field['field_name'] not in data_dict: continue From 1f2dfb675cedc6449141dc5fab60ed73da0f6fc9 Mon Sep 17 00:00:00 2001 From: avdata99 Date: Thu, 9 May 2024 11:10:20 -0300 Subject: [PATCH 5/5] undo bad change --- ckanext/scheming/plugins.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/scheming/plugins.py b/ckanext/scheming/plugins.py index dd34e16e..87e24af6 100644 --- a/ckanext/scheming/plugins.py +++ b/ckanext/scheming/plugins.py @@ -546,7 +546,7 @@ def before_dataset_index(self, data_dict): return self.before_index(data_dict) def before_index(self, data_dict): - schemas = self.instance._expanded_schemas + schemas = SchemingDatasetsPlugin.instance._expanded_schemas if data_dict['type'] not in schemas: return data_dict