From 69ddd99b014d548c967debe991a983cc7eebeba8 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Mon, 25 Nov 2024 16:05:20 +0100 Subject: [PATCH] fix unit test --- .../0024_corpusconfiguration_data_url.py | 19 +++++++++++++++++ backend/addcorpus/models.py | 21 +++++++++++++++---- backend/addcorpus/validation/creation.py | 20 +++++++++++++----- backend/addcorpus/validation/indexing.py | 13 ++++-------- 4 files changed, 55 insertions(+), 18 deletions(-) create mode 100644 backend/addcorpus/migrations/0024_corpusconfiguration_data_url.py diff --git a/backend/addcorpus/migrations/0024_corpusconfiguration_data_url.py b/backend/addcorpus/migrations/0024_corpusconfiguration_data_url.py new file mode 100644 index 000000000..59df766f5 --- /dev/null +++ b/backend/addcorpus/migrations/0024_corpusconfiguration_data_url.py @@ -0,0 +1,19 @@ +# Generated by Django 4.2.16 on 2024-11-25 10:29 + +import addcorpus.validation.creation +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('addcorpus', '0023_alter_corpusdocumentationpage_type_alter_field_name'), + ] + + operations = [ + migrations.AddField( + model_name='corpusconfiguration', + name='data_url', + field=models.CharField(blank=True, help_text='remote url containing source data files', max_length=200, validators=[addcorpus.validation.creation.validate_source_data_url]), + ), + ] diff --git a/backend/addcorpus/models.py b/backend/addcorpus/models.py index 671e68ac2..69bd188d0 100644 --- a/backend/addcorpus/models.py +++ b/backend/addcorpus/models.py @@ -9,13 +9,20 @@ from addcorpus.constants import CATEGORIES, MappingType, VisualizationType from addcorpus.validation.creation import ( - validate_es_mapping, validate_field_language, validate_implication, validate_language_code, + validate_es_mapping, + validate_field_language, + validate_implication, + validate_language_code, validate_mimetype, - validate_name_is_not_a_route_parameter, validate_name_has_no_ner_suffix, - validate_search_filter, validate_search_filter_with_mapping, + validate_name_is_not_a_route_parameter, + validate_name_has_no_ner_suffix, + validate_search_filter, + validate_search_filter_with_mapping, validate_searchable_field_has_full_text_search, - validate_sort_configuration, validate_visualizations_with_mapping, + validate_sort_configuration, + validate_visualizations_with_mapping, validate_source_data_directory, + validate_source_data_url, ) from addcorpus.validation.indexing import (validate_essential_fields, validate_has_configuration, validate_language_field, validate_has_data_directory) @@ -239,6 +246,12 @@ class CorpusConfiguration(models.Model): blank=True, help_text='path to directory containing source data files', ) + data_url = models.CharField( + max_length=200, + validators=[validate_source_data_url], + blank=True, + help_text='remote url containing source data files', + ) source_data_delimiter = models.CharField( max_length=1, choices=[ diff --git a/backend/addcorpus/validation/creation.py b/backend/addcorpus/validation/creation.py index 445a272fa..d61a689b1 100644 --- a/backend/addcorpus/validation/creation.py +++ b/backend/addcorpus/validation/creation.py @@ -6,14 +6,16 @@ import os import warnings -from addcorpus.constants import (FORBIDDEN_FIELD_NAMES, MappingType, - VisualizationType) -from addcorpus.python_corpora.filters import \ - VALID_MAPPINGS as VALID_SEARCH_FILTER_MAPPINGS from django.core.exceptions import ValidationError -from addcorpus.es_mappings import primary_mapping_type from langcodes import tag_is_valid +import requests +from addcorpus.constants import (FORBIDDEN_FIELD_NAMES, MappingType, + VisualizationType) +from addcorpus.python_corpora.filters import ( + VALID_MAPPINGS as VALID_SEARCH_FILTER_MAPPINGS, +) +from addcorpus.es_mappings import primary_mapping_type def supports_full_text_search(es_mapping): @@ -195,3 +197,11 @@ def validate_sort_configuration(sort_config): def validate_source_data_directory(value): if value and not os.path.isdir(value): raise ValidationError(f'{value} is not a directory') + + +def validate_source_data_url(value): + if value: + try: + requests.get(value) + except: + raise ValidationError(f'cannot connect to url {value}') diff --git a/backend/addcorpus/validation/indexing.py b/backend/addcorpus/validation/indexing.py index 92d0c89c2..79ae6f24a 100644 --- a/backend/addcorpus/validation/indexing.py +++ b/backend/addcorpus/validation/indexing.py @@ -90,16 +90,11 @@ def validate_has_data_directory(corpus): if not config.data_directory and not config.data_url: raise CorpusNotIndexableError('Missing data directory or url') - if corpus.data_dircetory and not os.path.isdir(config.data_directory): + if config.data_directory and not os.path.isdir(config.data_directory): raise CorpusNotIndexableError('Configured data directory does not exist.') - if corpus.data_url: - headers = {} - if corpus.data_api_key: - headers = {"Authorization": f"Token {corpus.data_api_key}"} + if config.data_url: try: - requests.get(corpus.data_url, headers=headers) + _response = requests.get(config.data_url) except ConnectionError: - raise CorpusNotIndexableError( - 'Cannot connect to the configured data url. Do you need to provide an API key?' - ) + raise CorpusNotIndexableError('Cannot connect to the configured data url.')