Skip to content

Commit

Permalink
feat(dev): solr rebuild ui;
Browse files Browse the repository at this point in the history
- Init commit.
  • Loading branch information
JVickery-TBS committed Dec 2, 2024
1 parent 958b80b commit 7ec9af9
Show file tree
Hide file tree
Showing 19 changed files with 718 additions and 18 deletions.
22 changes: 15 additions & 7 deletions ckan/cli/search_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,21 @@ def rebuild(
u''' Rebuild search index '''
from ckan.lib.search import rebuild, commit
try:

rebuild(package_id,
only_missing=only_missing,
force=force,
defer_commit=(not commit_each),
quiet=quiet and not verbose,
clear=clear)
for pkg_id, total, indexed, err in rebuild(package_id,
only_missing=only_missing,
force=force,
defer_commit=(not commit_each),
clear=clear):
if not verbose:
if err:
click.echo('Failed to index dataset %s with error: %s' %
(pkg_id, err))
continue
if not err:
click.echo('[%s/%s] Indexed dataset %s' % (indexed, total, pkg_id))
else:
click.echo('[%s/%s] Failed to index dataset %s with error: %s' %
(indexed, total, pkg_id, err))
except logic.NotFound:
error_shout("Couldn't find package %s" % package_id)
except Exception as e:
Expand Down
14 changes: 14 additions & 0 deletions ckan/lib/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,13 @@ def bulk_process_template(self) -> str:
"""
return 'group/bulk_process.html'

def search_rebuild_template(self) -> str:
"""
Returns a string representing the location of the template to be
rendered for the search_rebuild page
"""
return 'group/search_rebuild.html'

def group_form(self) -> str:
return 'group/new_group_form.html'

Expand Down Expand Up @@ -603,6 +610,13 @@ def edit_template(self) -> str:
def activity_template(self) -> str:
return 'organization/activity_stream.html'

def search_rebuild_template(self) -> str:
"""
Returns a string representing the location of the template to be
rendered for the search_rebuild page
"""
return 'organization/search_rebuild.html'


class DefaultTranslation(object):
name: str
Expand Down
18 changes: 9 additions & 9 deletions ckan/lib/search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import cgitb
import warnings
import traceback
import json
import datetime

import xml.dom.minidom
from typing import Collection, Any, Optional, Type, overload
Expand Down Expand Up @@ -183,7 +185,6 @@ def rebuild(package_id: Optional[str] = None,
force: bool = False,
defer_commit: bool = False,
package_ids: Optional[Collection[str]] = None,
quiet: bool = False,
clear: bool = False):
'''
Rebuilds the search index.
Expand All @@ -210,12 +211,15 @@ def rebuild(package_id: Optional[str] = None,
log.info('Indexing just package %r...', pkg_dict['name'])
package_index.remove_dict(pkg_dict)
package_index.insert_dict(pkg_dict)
yield package_id, 1, 1, None
elif package_ids is not None:
for package_id in package_ids:
total_packages = len(package_ids)
for counter, package_id in enumerate(package_ids, 1):
pkg_dict = logic.get_action('package_show')(context,
{'id': package_id})
log.info('Indexing just package %r...', pkg_dict['name'])
package_index.update_dict(pkg_dict, True)
yield package_id, total_packages, counter, None
else:
packages = model.Session.query(model.Package.id)
if config.get('ckan.search.remove_deleted_packages'):
Expand All @@ -241,23 +245,19 @@ def rebuild(package_id: Optional[str] = None,
package_index.clear()

total_packages = len(package_ids)
for counter, pkg_id in enumerate(package_ids):
if not quiet:
sys.stdout.write(
"\rIndexing dataset {0}/{1}".format(
counter +1, total_packages)
)
sys.stdout.flush()
for counter, pkg_id in enumerate(package_ids, 1):
try:
package_index.update_dict(
logic.get_action('package_show')(context,
{'id': pkg_id}
),
defer_commit
)
yield pkg_id, total_packages, counter, None
except Exception as e:
log.error(u'Error while indexing dataset %s: %s' %
(pkg_id, repr(e)))
yield pkg_id, total_packages, counter, str(e)
if force:
log.error(text_traceback())
continue
Expand Down
94 changes: 94 additions & 0 deletions ckan/lib/search/jobs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# encoding: utf-8
import json
import datetime
from rq import get_current_job

from typing import Optional, Union

import ckan.logic as logic
import ckan.lib.search as search
import ckan.model as model

from ckan.plugins import toolkit

from logging import getLogger
log = getLogger(__name__)


def reindex_packages(package_ids: Optional[Union[list, None]] = None,
group_id: Optional[Union[str, None]] = None):
"""
Callback for a REDIS job
Uses task_status to track the state of a search.rebuild call.
This always commits each record in a forceful manner.
See ckan.lib.search.rebuild for more information.
:param package_ids: list of package IDs to pass to search.rebuild
:type package_ids: list
:param group_id: organization or group ID to reindex the records
:type group_id: string
"""
context = {
'model': model,
'ignore_auth': True,
'validate': False,
'use_cache': False
}

_entity_id = group_id if group_id else toolkit.config.get('ckan.site_id')
task = {
'entity_id': _entity_id,
'entity_type': 'group' if group_id else 'site',
'task_type': 'reindex_packages',
'last_updated': str(datetime.datetime.now(datetime.timezone.utc)),
'state': 'running',
'key': 'search_rebuild',
'value': '{}',
'error': '{}',
}

try:
task = logic.get_action('task_status_show')(
context, {'entity_id': _entity_id,
'task_type': 'reindex_packages',
'key': 'search_rebuild'})
task['state'] = 'running'
task['last_updated'] = str(datetime.datetime.now(datetime.timezone.utc))
logic.get_action('task_status_update')({
'session': model.meta.create_local_session(), 'ignore_auth': True},
task)
except logic.NotFound:
pass

value = json.loads(task.get('value', '{}'))
error = json.loads(task.get('error', '{}'))

value['job_id'] = get_current_job().id

for pkg_id, total, indexed, err in \
search.rebuild(force=True, package_ids=package_ids):

if not err:
log.info('[%s/%s] Indexed dataset %s' % (indexed, total, pkg_id))
else:
log.error('[%s/%s] Failed to index dataset %s with error: %s' %
(indexed, total, pkg_id, err))
value['indexed'] = indexed
value['total'] = total
if err:
error[pkg_id] = err
task['value'] = json.dumps(value)
task['last_updated'] = str(datetime.datetime.now(datetime.timezone.utc))
logic.get_action('task_status_update')(
{'session': model.meta.create_local_session(), 'ignore_auth': True},
task)

task['state'] = 'complete'
task['last_updated'] = str(datetime.datetime.now(datetime.timezone.utc))
logic.get_action('task_status_update')(
{'session': model.meta.create_local_session(), 'ignore_auth': True},
task)
3 changes: 2 additions & 1 deletion ckan/logic/action/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -2295,6 +2295,8 @@ def task_status_show(context: Context, data_dict: DataDict) -> ActionResult.Task
model = context['model']
id = data_dict.get('id')

_check_access('task_status_show', context, data_dict)

if id:
task_status = model.TaskStatus.get(id)
else:
Expand All @@ -2312,7 +2314,6 @@ def task_status_show(context: Context, data_dict: DataDict) -> ActionResult.Task
if task_status is None:
raise NotFound
context['task_status'] = task_status
_check_access('task_status_show', context, data_dict)

task_status_dict = model_dictize.task_status_dictize(task_status, context)
return task_status_dict
Expand Down
Loading

0 comments on commit 7ec9af9

Please sign in to comment.