diff --git a/alyx/actions/admin.py b/alyx/actions/admin.py index 73b1c9599..f764ab5cc 100644 --- a/alyx/actions/admin.py +++ b/alyx/actions/admin.py @@ -390,7 +390,8 @@ def is_water_restricted(self, obj): class WeighingForm(BaseActionForm): def __init__(self, *args, **kwargs): super(WeighingForm, self).__init__(*args, **kwargs) - self.fields['subject'].queryset = self.current_user.get_allowed_subjects() + if 'subject' in self.fields: + self.fields['subject'].queryset = self.current_user.get_allowed_subjects() if self.fields.keys(): self.fields['weight'].widget.attrs.update({'autofocus': 'autofocus'}) @@ -455,10 +456,10 @@ class DatasetInline(BaseInlineAdmin): show_change_link = True model = Dataset extra = 1 - fields = ('name', 'dataset_type', 'collection', '_online', 'version', 'created_by', - 'created_datetime') + fields = ('name', 'dataset_type', 'collection', '_online', 'version', 'qc', + 'created_by', 'created_datetime') readonly_fields = fields - ordering = ("name",) + ordering = ('name',) def _online(self, obj): return obj.is_online diff --git a/alyx/actions/migrations/0021_alter_session_extended_qc.py b/alyx/actions/migrations/0021_alter_session_extended_qc.py new file mode 100644 index 000000000..17a3e5573 --- /dev/null +++ b/alyx/actions/migrations/0021_alter_session_extended_qc.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.10 on 2024-03-12 13:55 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('actions', '0020_alter_notification_notification_type_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='session', + name='extended_qc', + field=models.JSONField(blank=True, help_text='Structured data about session QC, formatted in a user-defined way', null=True), + ), + ] diff --git a/alyx/actions/migrations/0022_project_to_projects.py b/alyx/actions/migrations/0022_project_to_projects.py new file mode 100644 index 000000000..b0a400110 --- /dev/null +++ b/alyx/actions/migrations/0022_project_to_projects.py @@ -0,0 +1,39 @@ +# Generated by Django 4.2.10 on 2024-03-14 14:28 +import logging + +from django.db import migrations +from django.db.models import F, Q + +logger = logging.getLogger(__name__) + +def project2projects(apps, schema_editor): + """ + Find sessions where the project field (singular) value is not in the projects (plural) many-to-many + field and updates them. + + Tested on local instance. + """ + Session = apps.get_model('actions', 'Session') + sessions = Session.objects.exclude(Q(project__isnull=True) | Q(projects=F('project'))) + + # Check query worked + # from one.util import ensure_list + # for session in sessions.values('pk', 'project', 'projects'): + # assert session['project'] not in ensure_list(session['projects']) + + for session in sessions: + session.projects.add(session.project) + # session.project = None + # session.save() # No need to save + + assert Session.objects.exclude(Q(project__isnull=True) | Q(projects=F('project'))).count() == 0 + logger.info(f'project -> projects: {sessions.count():,g} sessions updated') + + +class Migration(migrations.Migration): + + dependencies = [ + ('actions', '0021_alter_session_extended_qc'), + ] + + operations = [migrations.RunPython(project2projects)] diff --git a/alyx/actions/migrations/0023_remove_session_project.py b/alyx/actions/migrations/0023_remove_session_project.py new file mode 100644 index 000000000..c746c3374 --- /dev/null +++ b/alyx/actions/migrations/0023_remove_session_project.py @@ -0,0 +1,17 @@ +# Generated by Django 4.2.10 on 2024-03-14 14:54 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('actions', '0022_project_to_projects'), + ] + + operations = [ + migrations.RemoveField( + model_name='session', + name='project', + ), + ] diff --git a/alyx/actions/models.py b/alyx/actions/models.py index 573696f94..c6b6adb96 100644 --- a/alyx/actions/models.py +++ b/alyx/actions/models.py @@ -1,7 +1,9 @@ from datetime import timedelta -import structlog from math import inf +import structlog +from one.alf.spec import QC + from django.conf import settings from django.core.validators import MinValueValidator from django.db import models @@ -240,9 +242,6 @@ class Session(BaseAction): parent_session = models.ForeignKey('Session', null=True, blank=True, on_delete=models.SET_NULL, help_text="Hierarchical parent to this session") - project = models.ForeignKey('subjects.Project', null=True, blank=True, - on_delete=models.SET_NULL, verbose_name='Session Project', - related_name='oldproject') projects = models.ManyToManyField('subjects.Project', blank=True, verbose_name='Session Projects') type = models.CharField(max_length=255, null=True, blank=True, @@ -253,27 +252,19 @@ class Session(BaseAction): n_trials = models.IntegerField(blank=True, null=True) n_correct_trials = models.IntegerField(blank=True, null=True) - QC_CHOICES = [ - (50, 'CRITICAL',), - (40, 'FAIL',), - (30, 'WARNING',), - (0, 'NOT_SET',), - (10, 'PASS',), - ] - - qc = models.IntegerField(default=0, choices=QC_CHOICES, + QC_CHOICES = [(e.value, e.name) for e in QC] + qc = models.IntegerField(default=QC.NOT_SET, choices=QC_CHOICES, help_text=' / '.join([str(q[0]) + ': ' + q[1] for q in QC_CHOICES])) + extended_qc = models.JSONField(null=True, blank=True, - help_text="Structured data about session QC," + help_text="Structured data about session QC, " "formatted in a user-defined way") auto_datetime = models.DateTimeField(auto_now=True, blank=True, null=True, verbose_name='last updated') def save(self, *args, **kwargs): - # Default project is the subject's project. - if not self.project_id: - self.project = self.subject.projects.first() + # Default project is the subject's projects. if not self.lab: self.lab = self.subject.lab return super(Session, self).save(*args, **kwargs) diff --git a/alyx/actions/serializers.py b/alyx/actions/serializers.py index 26f374174..0809e779a 100644 --- a/alyx/actions/serializers.py +++ b/alyx/actions/serializers.py @@ -98,12 +98,13 @@ class SessionDatasetsSerializer(serializers.ModelSerializer): queryset=DatasetType.objects.all(), ) default_revision = serializers.CharField(source='default_dataset') + qc = BaseSerializerEnumField(required=False) class Meta: list_serializer_class = FilterDatasetSerializer model = Dataset fields = ('id', 'name', 'dataset_type', 'data_url', 'url', 'file_size', - 'hash', 'version', 'collection', 'revision', 'default_revision') + 'hash', 'version', 'collection', 'revision', 'default_revision', 'qc') class SessionWaterAdminSerializer(serializers.ModelSerializer): diff --git a/alyx/actions/tests_rest.py b/alyx/actions/tests_rest.py index 4fe66f4f1..27531128f 100644 --- a/alyx/actions/tests_rest.py +++ b/alyx/actions/tests_rest.py @@ -266,8 +266,10 @@ def test_sessions(self): # test dataset type filters dtype1, _ = DatasetType.objects.get_or_create(name='trials.table') dtype2, _ = DatasetType.objects.get_or_create(name='wheel.position') - Dataset.objects.create(session=ses, name='_ibl_trials.table.pqt', dataset_type=dtype1) - Dataset.objects.create(session=ses, name='_ibl_wheel.position.npy', dataset_type=dtype2) + Dataset.objects.create( + session=ses, name='_ibl_trials.table.pqt', dataset_type=dtype1, qc=40) + Dataset.objects.create( + session=ses, name='_ibl_wheel.position.npy', dataset_type=dtype2, qc=30) d = self.ar(self.client.get(reverse('session-list') + '?dataset_types=wheel.position')) self.assertCountEqual([str(ses.pk)], (x['id'] for x in d)) q = '?dataset_types=wheel.position,trials.table' # Check with list @@ -280,6 +282,22 @@ def test_sessions(self): self.assertCountEqual([str(ses.pk)], (x['id'] for x in d)) q = '?datasets=wheel.position' self.assertFalse(self.ar(self.client.get(reverse('session-list') + q))) + # multiple datasets + q = '?datasets=_ibl_wheel.position.npy,_ibl_trials.table.pqt' + d = self.ar(self.client.get(reverse('session-list') + q)) + self.assertCountEqual([str(ses.pk)], (x['id'] for x in d)) + # datasets + qc (expect to return sessions where defined datasets have correct QC) + q = '?datasets=_ibl_wheel.position.npy,_ibl_trials.table.pqt&dataset_qc_lte=WARNING' + self.assertFalse(self.ar(self.client.get(reverse('session-list') + q))) + q = '?datasets=_ibl_wheel.position.npy&dataset_qc_lte=WARNING' + d = self.ar(self.client.get(reverse('session-list') + q)) + self.assertCountEqual([str(ses.pk)], (x['id'] for x in d), 'failed to return session') + # qc alone (expect to return sessions where any dataset has correct QC) + q = '?dataset_qc_lte=WARNING' + d = self.ar(self.client.get(reverse('session-list') + q)) + self.assertCountEqual([str(ses.pk)], (x['id'] for x in d), 'failed to return session') + q = '?dataset_qc_lte=10' + self.assertFalse(self.ar(self.client.get(reverse('session-list') + q))) def test_surgeries(self): from actions.models import Surgery diff --git a/alyx/actions/views.py b/alyx/actions/views.py index d0b788b0e..bec3461f6 100644 --- a/alyx/actions/views.py +++ b/alyx/actions/views.py @@ -1,6 +1,7 @@ from datetime import timedelta, date from operator import itemgetter +from one.alf.spec import QC from django.contrib.postgres.fields import JSONField from django.db.models import Count, Q, F, ExpressionWrapper, FloatField from django.db.models.deletion import Collector @@ -223,9 +224,11 @@ class ProcedureTypeList(generics.ListCreateAPIView): class SessionFilter(BaseActionFilter): - dataset_types = django_filters.CharFilter(field_name='dataset_types', - method='filter_dataset_types') + dataset_types = django_filters.CharFilter( + field_name='dataset_types', method='filter_dataset_types') datasets = django_filters.CharFilter(field_name='datasets', method='filter_datasets') + dataset_qc_lte = django_filters.CharFilter( + field_name='dataset_qc', method='filter_dataset_qc_lte') performance_gte = django_filters.NumberFilter(field_name='performance', method='filter_performance_gte') performance_lte = django_filters.NumberFilter(field_name='performance', @@ -284,13 +287,23 @@ def filter_dataset_types(self, queryset, _, value): def filter_datasets(self, queryset, _, value): # Note this may later be modified to include collections, e.g. ?datasets=alf/obj.attr.ext + qc = QC.validate(self.request.query_params.get('dataset_qc_lte', QC.FAIL)) dsets = value.split(',') - queryset = queryset.filter(data_dataset_session_related__name__in=dsets) + queryset = queryset.filter(data_dataset_session_related__name__in=dsets, + data_dataset_session_related__qc__lte=qc) queryset = queryset.annotate( dsets_count=Count('data_dataset_session_related', distinct=True)) queryset = queryset.filter(dsets_count__gte=len(dsets)) return queryset + def filter_dataset_qc_lte(self, queryset, _, value): + # If filtering on datasets too, `filter_datasets` handles both QC and Datasets + if 'datasets' in self.request.query_params: + return queryset + qc = QC.validate(value) + queryset = queryset.filter(data_dataset_session_related__qc__lte=qc) + return queryset + def filter_performance_gte(self, queryset, name, perf): queryset = queryset.exclude(n_trials__isnull=True) pf = ExpressionWrapper(100 * F('n_correct_trials') / F('n_trials'), @@ -326,13 +339,15 @@ class SessionAPIList(generics.ListCreateAPIView): - **subject**: subject nickname `/sessions?subject=Algernon` - **dataset_types**: dataset type(s) `/sessions?dataset_types=trials.table,camera.times` - **datasets**: dataset name(s) `/sessions?datasets=_ibl_leftCamera.times.npy` + - **dataset_qc_lte**: dataset QC values less than or equal to this + `/sessions?dataset_qc_lte=WARNING` - **number**: session number - **users**: experimenters (exact) - **date_range**: date `/sessions?date_range=2020-01-12,2020-01-16` - **lab**: lab name (exact) - **task_protocol** (icontains) - **location**: location name (icontains) - - **project**: project name (icontains) + - **projects**: project name (icontains) - **json**: queries on json fields, for example here `tutu` - exact/equal lookup: `/sessions?extended_qc=tutu,True`, - gte lookup: `/sessions/?extended_qc=tutu__gte,0.5`, @@ -354,10 +369,10 @@ class SessionAPIList(generics.ListCreateAPIView): - **histology**: returns sessions for which the subject has an histology session: `/sessions?histology=True` - **django**: generic filter allowing lookups (same syntax as json filter) - `/sessions?django=project__name__icontains,matlab - filters sessions that have matlab in the project name - `/sessions?django=~project__name__icontains,matlab - does the exclusive set: filters sessions that do not have matlab in the project name + `/sessions?django=projects__name__icontains,matlab` + filters sessions that have matlab in the project names + `/sessions?django=~projects__name__icontains,matlab` + does the exclusive set: filters sessions that do not have matlab in the project names [===> session model reference](/admin/doc/models/actions.session) """ diff --git a/alyx/alyx/__init__.py b/alyx/alyx/__init__.py index 37c1153c5..04af162bb 100644 --- a/alyx/alyx/__init__.py +++ b/alyx/alyx/__init__.py @@ -1 +1 @@ -VERSION = __version__ = '1.18.2' +VERSION = __version__ = '2.0.0' diff --git a/alyx/alyx/base.py b/alyx/alyx/base.py index 26aa4e082..ed1ce5f8d 100644 --- a/alyx/alyx/base.py +++ b/alyx/alyx/base.py @@ -6,7 +6,6 @@ import sys import pytz import uuid -from collections import OrderedDict import one.alf.spec from datetime import datetime import traceback @@ -454,7 +453,7 @@ def ar(self, r, code=200): """ self.assertTrue(r.status_code == code, r.data) pkeys = {'count', 'next', 'previous', 'results'} - if isinstance(r.data, OrderedDict) and set(r.data.keys()) == pkeys: + if isinstance(r.data, dict) and set(r.data.keys()) == pkeys: return r.data['results'] else: return r.data diff --git a/alyx/data/admin.py b/alyx/data/admin.py index f53a34b57..f8d503f1d 100644 --- a/alyx/data/admin.py +++ b/alyx/data/admin.py @@ -1,7 +1,7 @@ from django.db.models import Count, ProtectedError from django.contrib import admin, messages from django.utils.html import format_html -from django_admin_listfilter_dropdown.filters import RelatedDropdownFilter +from django_admin_listfilter_dropdown.filters import RelatedDropdownFilter, ChoiceDropdownFilter from rangefilter.filters import DateRangeFilter from .models import (DataRepositoryType, DataRepository, DataFormat, DatasetType, @@ -84,16 +84,17 @@ class FileRecordInline(BaseInlineAdmin): class DatasetAdmin(BaseExperimentalDataAdmin): fields = ['name', '_online', 'version', 'dataset_type', 'file_size', 'hash', 'session_ro', 'collection', 'auto_datetime', 'revision_', 'default_dataset', - '_protected', '_public', 'tags'] + '_protected', '_public', 'tags', 'qc'] readonly_fields = ['name_', 'session_ro', '_online', 'auto_datetime', 'revision_', - '_protected', '_public', 'tags'] + '_protected', '_public', 'tags', 'qc'] list_display = ['name_', '_online', 'version', 'collection', 'dataset_type_', 'file_size', - 'session_ro', 'created_by', 'created_datetime'] + 'session_ro', 'created_by', 'created_datetime', 'qc'] inlines = [FileRecordInline] list_filter = [('created_by', RelatedDropdownFilter), ('created_datetime', DateRangeFilter), ('dataset_type', RelatedDropdownFilter), - ('tags', RelatedDropdownFilter) + ('tags', RelatedDropdownFilter), + ('qc', ChoiceDropdownFilter) ] search_fields = ('session__id', 'name', 'collection', 'dataset_type__name', 'dataset_type__filename_pattern', 'version') diff --git a/alyx/data/fixtures/data.datasettype.json b/alyx/data/fixtures/data.datasettype.json index 910d55506..582c9dc2f 100644 --- a/alyx/data/fixtures/data.datasettype.json +++ b/alyx/data/fixtures/data.datasettype.json @@ -2220,5 +2220,16 @@ "description": "Look up table from photometry ROI, to fiber name registered in the database and Allen brain location", "filename_pattern": "*photometryROI.locations*" } + }, + { + "model": "data.datasettype", + "pk": "140cd2a9-91c1-45ee-9d19-77e8d39abb5f", + "fields": { + "json": null, + "name": "laserStimulation.intervals", + "created_by": null, + "description": "The start and end times of the laser stimulation period.", + "filename_pattern": "" + } } ] diff --git a/alyx/data/migrations/0019_dataset_qc.py b/alyx/data/migrations/0019_dataset_qc.py new file mode 100644 index 000000000..e2d0cc5d9 --- /dev/null +++ b/alyx/data/migrations/0019_dataset_qc.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.10 on 2024-02-13 15:16 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('data', '0018_alter_dataset_collection_alter_revision_name'), + ] + + operations = [ + migrations.AddField( + model_name='dataset', + name='qc', + field=models.IntegerField(choices=[(50, 'CRITICAL'), (40, 'FAIL'), (30, 'WARNING'), (0, 'NOT_SET'), (10, 'PASS')], default=0, help_text='50: CRITICAL / 40: FAIL / 30: WARNING / 0: NOT_SET / 10: PASS'), + ), + ] diff --git a/alyx/data/models.py b/alyx/data/models.py index 83fd08e7a..df205f051 100644 --- a/alyx/data/models.py +++ b/alyx/data/models.py @@ -1,4 +1,5 @@ import structlog +from one.alf.spec import QC from django.core.validators import RegexValidator from django.db import models @@ -351,6 +352,10 @@ class Dataset(BaseExperimentalData): help_text="Whether this dataset is the default " "latest revision") + QC_CHOICES = [(e.value, e.name) for e in QC] + qc = models.IntegerField(default=QC.NOT_SET, choices=QC_CHOICES, + help_text=' / '.join([str(q[0]) + ': ' + q[1] for q in QC_CHOICES])) + @property def is_online(self): fr = self.file_records.filter(data_repository__globus_is_personal=False) diff --git a/alyx/data/serializers.py b/alyx/data/serializers.py index 54a131bcd..c455611f8 100644 --- a/alyx/data/serializers.py +++ b/alyx/data/serializers.py @@ -2,9 +2,12 @@ from rest_framework import serializers from django.db.models import Count, Q, BooleanField +from one.alf.spec import QC + from .models import (DataRepositoryType, DataRepository, DataFormat, DatasetType, Dataset, Download, FileRecord, Revision, Tag) from .transfers import _get_session, _change_default_dataset +from alyx.base import BaseSerializerEnumField from actions.models import Session from subjects.models import Subject from misc.models import LabMember @@ -142,6 +145,7 @@ class DatasetSerializer(serializers.HyperlinkedModelSerializer): default_dataset = serializers.BooleanField(required=False, allow_null=True) public = serializers.ReadOnlyField() protected = serializers.ReadOnlyField() + qc = BaseSerializerEnumField(required=False) file_records = DatasetFileRecordsSerializer(read_only=True, many=True) experiment_number = serializers.SerializerMethodField() @@ -178,6 +182,9 @@ def create(self, validated_data): name = validated_data.get('name', None) default = validated_data.get('default_dataset', None) session = validated_data.get('session', None) + # validate QC value + if 'qc' in validated_data: + validated_data['qc'] = QC.validate(validated_data['qc']) if session: if default is not False: @@ -213,7 +220,7 @@ class Meta: 'session', 'file_size', 'hash', 'version', 'experiment_number', 'file_records', 'subject', 'date', 'number', 'auto_datetime', 'revision', - 'default_dataset', 'protected', 'public', 'tags') + 'default_dataset', 'protected', 'public', 'tags', 'qc', 'json') extra_kwargs = { 'subject': {'write_only': True}, 'date': {'write_only': True}, diff --git a/alyx/data/tests_rest.py b/alyx/data/tests_rest.py index 6ffbb4108..43e3a5589 100644 --- a/alyx/data/tests_rest.py +++ b/alyx/data/tests_rest.py @@ -128,6 +128,7 @@ def test_dataset_filerecord(self): self.assertTrue(new_mod_date > mod_date) def test_dataset(self): + # Test dataset creation via the datasets endpoint data = { 'name': 'some-dataset', 'dataset_type': 'dst', @@ -145,6 +146,8 @@ def test_dataset(self): self.assertEqual(r.data['collection'], None) # Check that it has been set as the default dataset self.assertEqual(r.data['default_dataset'], True) + # Check QC value is NOT_SET by default + self.assertEqual(r.data['qc'], 'NOT_SET') # Make sure a session has been created. session = r.data['session'] r = self.client.get(session) @@ -162,6 +165,7 @@ def test_dataset(self): 'date': '2018-01-01', 'number': 2, 'collection': 'test_path', + 'qc': 'PASS' } r = self.post(reverse('dataset-list'), data) @@ -169,6 +173,7 @@ def test_dataset(self): self.assertEqual(r.data['revision'], None) self.assertEqual(r.data['collection'], data['collection']) self.assertEqual(r.data['default_dataset'], True) + self.assertEqual(r.data['qc'], 'PASS') data_url = r.data['url'] # But if we change the collection, we are okay @@ -342,6 +347,24 @@ def test_register_files_hostname(self): self.assertEqual(ds0.version, '1.1.1') self.assertEqual(ds1.version, '2.2.2') + def test_qc_validation(self): + # this tests the validation of dataset QC outcomes + data = { + 'path': '%s/2018-01-01/2/dir' % self.subject, + 'filenames': 'a.b.e1,a.c.e2', + 'hostname': 'hostname', + 'qc': '10,critical' # Both numerical and string QC values should be parsed + } + r = self.post(reverse('register-file'), data) + records = self.ar(r, 201) + self.assertEqual([10, 50], [rec['qc'] for rec in records]) + self._assert_registration(r, data) + # a single QC value should be applied to all datasets + data['qc'] = 'FAIL' + r = self.post(reverse('register-file'), data) + records = self.ar(r, 201) + self.assertEqual([40, 40], [rec['qc'] for rec in records]) + def test_register_files_hash(self): # this is old use case where we register one dataset according to the hostname, no need # for a lab in this case @@ -738,6 +761,40 @@ def test_protected_view(self): self.assertEqual(name, 'test_prot/a.b.e1') self.assertEqual(prot_info, []) + def test_check_protected(self): + self.post(reverse('datarepository-list'), {'name': 'drb1', 'hostname': 'hostb1'}) + self.post(reverse('lab-list'), {'name': 'labb', 'repositories': ['drb1']}) + + # Create protected tag + self.client.post(reverse('tag-list'), {'name': 'tag1', 'protected': True}) + + # Create some datasets and register + data = {'path': '%s/2018-01-01/002/' % self.subject, + 'filenames': 'test_prot/a.c.e2', + 'name': 'drb1', # this is the repository name + } + + d = self.ar(self.client.post(reverse('register-file'), data), 201) + + # Check the same dataset to see if it is protected, should be unprotected + # and get a status 200 respons + _ = data.pop('name') + + r = self.ar(self.client.get(reverse('check-protected'), data=data, + content_type='application/json'), 200) + self.assertEqual(r['status_code'], 200) + + # add protected tag to the first dataset + dataset1 = Dataset.objects.get(pk=d[0]['id']) + tag1 = Tag.objects.get(name='tag1') + dataset1.tags.add(tag1) + + # Check the same dataset to see if it is protected + r = self.ar(self.client.get(reverse('check-protected'), data=data, + content_type='application/json'), 200) + self.assertEqual(r['status_code'], 403) + self.assertEqual(r['error'], 'One or more datasets is protected') + def test_revisions(self): # Check revision lookup with name self.post(reverse('revision-list'), {'name': 'v2'}) diff --git a/alyx/data/transfers.py b/alyx/data/transfers.py index 01c219b3f..bc6b744ed 100644 --- a/alyx/data/transfers.py +++ b/alyx/data/transfers.py @@ -11,6 +11,7 @@ import numpy as np from one.alf.files import add_uuid_string, folder_parts from one.registration import get_dataset_type +from one.alf.spec import QC from alyx import settings from data.models import FileRecord, Dataset, DatasetType, DataFormat, DataRepository @@ -244,7 +245,7 @@ def _check_dataset_protected(session, collection, filename): def _create_dataset_file_records( rel_dir_path=None, filename=None, session=None, user=None, repositories=None, exists_in=None, collection=None, hash=None, - file_size=None, version=None, revision=None, default=None): + file_size=None, version=None, revision=None, default=None, qc=None): assert session is not None revision_name = f'#{revision.name}#' if revision else '' @@ -265,6 +266,12 @@ def _create_dataset_file_records( dataset_type=dataset_type, data_format=data_format, revision=revision ) dataset.default_dataset = default is True + try: + dataset.qc = int(QC.validate(qc or 'NOT_SET')) + except ValueError: + data = {'status_code': 400, + 'detail': f'Invalid QC value "{qc}" for dataset "{relative_path}"'} + return None, Response(data=data, status=403) dataset.save() # If the dataset already existed see if it is protected (i.e can't be overwritten) diff --git a/alyx/data/urls.py b/alyx/data/urls.py index b67ce0a6c..9218ff3f6 100644 --- a/alyx/data/urls.py +++ b/alyx/data/urls.py @@ -14,6 +14,9 @@ 'post': 'create' }) +check_protected = dv.ProtectedFileViewSet.as_view({ + 'get': 'list' +}) urlpatterns = [ path('data-formats', dv.DataFormatList.as_view(), @@ -78,4 +81,7 @@ path('sync-file-status', sync_file_status, name="sync-file-status"), + path('check-protected', check_protected, + name="check-protected"), + ] diff --git a/alyx/data/views.py b/alyx/data/views.py index ff86ffeaf..6e2216c22 100644 --- a/alyx/data/views.py +++ b/alyx/data/views.py @@ -157,6 +157,7 @@ class DatasetFilter(BaseFilterSet): protected = django_filters.BooleanFilter(method='filter_protected') tag = django_filters.CharFilter('tags__name') revision = django_filters.CharFilter('revision__name') + qc = django_filters.CharFilter(method='enum_field_filter') class Meta: model = Dataset @@ -212,6 +213,7 @@ class DatasetList(generics.ListCreateAPIView): - **tag**: tag name '/datasets?tag=repeated_site - **public**: only returns datasets that are public or not public - **protected**: only returns datasets that are protected or not protected + - **qc**: only returns datasets with this QC value `/datasets?qc=PASS` [===> dataset model reference](/admin/doc/models/data.dataset) """ @@ -306,6 +308,7 @@ def _make_dataset_response(dataset): 'collection': dataset.collection, 'revision': getattr(dataset.revision, 'name', None), 'default': dataset.default_dataset, + 'qc': dataset.qc } out['file_records'] = file_records return out @@ -324,6 +327,79 @@ def _parse_path(path): return subject, date, session_number +class ProtectedFileViewSet(mixins.ListModelMixin, + viewsets.GenericViewSet): + + serializer_class = serializers.Serializer + + def list(self, request): + """ + Endpoint to check if set of files are protected or not + + The session is retrieved by the ALF convention in the relative path, so this field has to + match the format Subject/Date/Number as shown below. + + The client side REST query should look like this: + + ```python + r_ = {'created_by': 'user_name_alyx', + 'path': 'ZM_1085/2019-02-12/002/alf', # relative path to repo path + 'filenames': ['file1', 'file2'], + } + ``` + + Returns a response indicating if any of the datasets are protected or not + - Status 403 if a dataset is protected, details contains a list of protected datasets + - Status 200 is none of the datasets are protected + """ + + req = request.GET.dict() if len(request.data) == 0 else request.data + + user = req.get('created_by', None) + if user: + user = get_user_model().objects.get(username=user) + else: + user = request.user + + rel_dir_path = req.get('path', '') + if not rel_dir_path: + raise ValueError("The path argument is required.") + + # Extract the data repository from the hostname, the subject, the directory path. + rel_dir_path = rel_dir_path.replace('\\', '/') + rel_dir_path = rel_dir_path.replace('//', '/') + subject, date, session_number = _parse_path(rel_dir_path) + + filenames = req.get('filenames', ()) + if isinstance(filenames, str): + filenames = filenames.split(',') + + session = _get_session( + subject=subject, date=date, number=session_number, user=user) + assert session + + # Loop through the files to see if any are protected + prot_response = [] + protected = [] + for file in filenames: + info, resp = _get_name_collection_revision(file, rel_dir_path) + if resp: + return resp + prot, prot_info = _check_dataset_protected( + session, info['collection'], info['filename']) + protected.append(prot) + prot_response.append({file: prot_info}) + if any(protected): + data = {'status_code': 403, + 'error': 'One or more datasets is protected', + 'details': prot_response} + return Response(data=data) + else: + data = {'status_code': 200, + 'details': 'None of the datasets are protected'} + return Response(data=data) + + class RegisterFileViewSet(mixins.CreateModelMixin, viewsets.GenericViewSet): @@ -353,6 +429,7 @@ def create(self, request): 'hashes': ['f9c26e42-8f22-4f07-8fdd-bb51a63bedaa', 'f9c26e42-8f22-4f07-8fdd-bb51a63bedad'] # optional 'filesizes': [145684, 354213], # optional + 'qc': ['NOT_SET', 'PASS'], # optional 'server_only': True, # optional, defaults to False. Will only create file # records in the server repositories and skips local repositories 'versions': ['1.4.4', '1.4.4'], # optional, usually refers to the software version @@ -373,7 +450,7 @@ def create(self, request): ``` If the dataset already exists, it will use the file hash to deduce if the file has been - patched or not (ie. the filerecords will be created as not existing) + patched or not (i.e. the filerecords will be created as not existing) """ user = request.data.get('created_by', None) if user: @@ -420,6 +497,13 @@ def create(self, request): if isinstance(filesizes, str): filesizes = filesizes.split(',') + # qc if provided + qcs = request.data.get('qc', [None] * len(filenames)) or 'NOT_SET' + if isinstance(qcs, str): + qcs = qcs.split(',') + if len(qcs) == 1: + qcs = qcs * len(filenames) + # flag to discard file records creation on local repositories, defaults to False server_only = request.data.get('server_only', False) if isinstance(server_only, str): @@ -480,7 +564,7 @@ def create(self, request): return Response(data=data, status=403) response = [] - for filename, hash, fsize, version in zip(filenames, hashes, filesizes, versions): + for filename, hash, fsize, version, qc in zip(filenames, hashes, filesizes, versions, qcs): if not filename: continue info, resp = _get_name_collection_revision(filename, rel_dir_path) @@ -497,7 +581,7 @@ def create(self, request): collection=info['collection'], rel_dir_path=info['rel_dir_path'], filename=info['filename'], session=session, user=user, repositories=repositories, exists_in=exists_in, hash=hash, file_size=fsize, version=version, - revision=revision, default=default) + revision=revision, default=default, qc=qc) if resp: return resp out = _make_dataset_response(dataset) diff --git a/alyx/experiments/models.py b/alyx/experiments/models.py index c0836e22c..db42de593 100644 --- a/alyx/experiments/models.py +++ b/alyx/experiments/models.py @@ -181,8 +181,12 @@ class TrajectoryEstimate(models.Model): class Meta: constraints = [ + models.UniqueConstraint(fields=['provenance', 'chronic_insertion'], + condition=models.Q(probe_insertion__isnull=True), + name='unique_trajectory_per_chronic_provenance'), models.UniqueConstraint(fields=['provenance', 'probe_insertion'], - name='unique_trajectory_per_provenance') + condition=models.Q(probe_insertion__isnull=False), + name='unique_trajectory_per_provenance'), ] def __str__(self): diff --git a/alyx/experiments/serializers.py b/alyx/experiments/serializers.py index 6685dc5f7..92ad06a85 100644 --- a/alyx/experiments/serializers.py +++ b/alyx/experiments/serializers.py @@ -27,9 +27,10 @@ class Meta: class TrajectoryEstimateSerializer(serializers.ModelSerializer): probe_insertion = serializers.SlugRelatedField( - read_only=False, required=False, slug_field='id', many=False, + read_only=False, required=False, slug_field='id', many=False, allow_null=True, queryset=ProbeInsertion.objects.all(), ) + x = serializers.FloatField(required=True, allow_null=True) y = serializers.FloatField(required=True, allow_null=True) z = serializers.FloatField(required=False, allow_null=True) @@ -45,6 +46,15 @@ class TrajectoryEstimateSerializer(serializers.ModelSerializer): queryset=CoordinateSystem.objects.all(), ) + def to_internal_value(self, data): + if data.get('chronic_insertion', None) is None: + data['chronic_insertion'] = None + + if data.get('probe_insertion', None) is None: + data['probe_insertion'] = None + + return super(TrajectoryEstimateSerializer, self).to_internal_value(data) + class Meta: model = TrajectoryEstimate fields = '__all__' @@ -87,7 +97,7 @@ class Meta: list_serializer_class = FilterDatasetSerializer model = Dataset fields = ('id', 'name', 'dataset_type', 'data_url', 'url', 'file_size', - 'hash', 'version', 'collection') + 'hash', 'version', 'collection', 'qc') class ChronicProbeInsertionListSerializer(serializers.ModelSerializer): diff --git a/alyx/experiments/tests_rest.py b/alyx/experiments/tests_rest.py index 3a2c99cda..c2138d1e3 100644 --- a/alyx/experiments/tests_rest.py +++ b/alyx/experiments/tests_rest.py @@ -344,9 +344,9 @@ def test_dataset_filters(self): tag, _ = Tag.objects.get_or_create(name='tag_test') d1 = Dataset.objects.create(session=self.session, name='spikes.times.npy', - dataset_type=dtype1, collection='alf/probe_00') + dataset_type=dtype1, collection='alf/probe_00', qc=30) Dataset.objects.create(session=self.session, name='clusters.amps.npy', - dataset_type=dtype2, collection='alf/probe_00') + dataset_type=dtype2, collection='alf/probe_00', qc=40) d1.tags.add(tag) d1.save() @@ -368,10 +368,28 @@ def test_dataset_filters(self): d = self.ar(self.client.get(reverse('probeinsertion-list') + q)) self.assertEqual(len(d), 1) self.assertEqual(probe['id'], d[0]['id']) - q = '?datasets=clusters.amps' self.assertFalse(self.ar(self.client.get(reverse('probeinsertion-list') + q))) + # test dataset + qc filters + q = '?datasets=spikes.times.npy,clusters.amps.npy&dataset_qc_lte=FAIL' + d = self.ar(self.client.get(reverse('probeinsertion-list') + q)) + self.assertEqual(len(d), 1, 'Expect insertion returned as all dsets match QC') + q = '?datasets=spikes.times.npy,clusters.amps.npy&dataset_qc_lte=WARNING' + d = self.ar(self.client.get(reverse('probeinsertion-list') + q)) + self.assertEqual(len(d), 0, 'Expect none returned as one dset doesn''t match QC') + q = '?datasets=spikes.times.npy&dataset_qc_lte=30' # QC code should also work + d = self.ar(self.client.get(reverse('probeinsertion-list') + q)) + self.assertEqual(len(d), 1, 'Expect insertion returned as searched dset matches QC') + + # test qc alone + q = '?dataset_qc_lte=WARNING' + d = self.ar(self.client.get(reverse('probeinsertion-list') + q)) + self.assertEqual(len(d), 1, 'Expect insertion returned as at least 1 dset matches QC') + q = '?dataset_qc_lte=10' # PASS + d = self.ar(self.client.get(reverse('probeinsertion-list') + q)) + self.assertEqual(len(d), 0, 'Expect none returned as no dset matches QC') + # test filtering by tag q = '?tag=tag_test' d = self.ar(self.client.get(reverse('probeinsertion-list') + q)) @@ -447,7 +465,7 @@ def test_create_list_delete_fov(self): url = reverse('fovlocation-list') with transaction.atomic(): response = self.post(url, loc_dict) - self.ar(response, 500) + self.assertIn(response.status_code, (400, 500)) # In later versions status code is 400 url = reverse('fieldsofview-list') # FOV location containing atlas ID 9 should no longer be default provenance and therefore diff --git a/alyx/experiments/views.py b/alyx/experiments/views.py index db6937745..233dd8dbf 100644 --- a/alyx/experiments/views.py +++ b/alyx/experiments/views.py @@ -1,5 +1,6 @@ import logging +from one.alf.spec import QC from rest_framework import generics from django_filters.rest_framework import CharFilter, UUIDFilter, NumberFilter from django.db.models import Count, Q @@ -73,8 +74,9 @@ class ProbeInsertionFilter(BaseFilterSet): model = CharFilter('model__name') dataset_types = CharFilter(field_name='dataset_types', method='filter_dataset_types') datasets = CharFilter(field_name='datasets', method='filter_datasets') + dataset_qc_lte = CharFilter(field_name='dataset_qc', method='filter_dataset_qc_lte') lab = CharFilter(field_name='session__lab__name', lookup_expr='iexact') - project = CharFilter(field_name='session__project__name', lookup_expr='icontains') + project = CharFilter(field_name='session__projects__name', lookup_expr='icontains') task_protocol = CharFilter(field_name='session__task_protocol', lookup_expr='icontains') tag = CharFilter(field_name='tag', method='filter_tag') # brain region filters @@ -110,13 +112,21 @@ def filter_dataset_types(self, queryset, _, value): return queryset def filter_datasets(self, queryset, _, value): + qc = QC.validate(self.request.query_params.get('dataset_qc_lte', QC.FAIL)) dsets = value.split(',') - queryset = queryset.filter(datasets__name__in=dsets) + queryset = queryset.filter(datasets__name__in=dsets, datasets__qc__lte=qc) queryset = queryset.annotate( dsets_count=Count('datasets', distinct=True)) queryset = queryset.filter(dsets_count__gte=len(dsets)) return queryset + def filter_dataset_qc_lte(self, queryset, _, value): + # If filtering on datasets too, `filter_datasets` handles both QC and Datasets + if 'datasets' in self.request.query_params: + return queryset + qc = QC.validate(value) + return queryset.filter(datasets__qc__lte=qc) + class Meta: model = ProbeInsertion exclude = ['json'] @@ -139,6 +149,7 @@ class ProbeInsertionList(generics.ListCreateAPIView): - **tag**: tag name (icontains) - **dataset_types**: dataset type(s) - **datasets**: datasets name(s) + - **dataset_qc_lte**: dataset QC value, e.g. PASS, WARNING, FAIL, CRITICAL - **atlas_name**: returns a session if any channel name icontains the value: `/insertions?brain_region=visual cortex` - **atlas_acronym**: returns a session if any of its channels name exactly @@ -425,7 +436,7 @@ class FOVList(generics.ListCreateAPIView): `/fields-of-view?provenance=Estimate` - **atlas**: One or more brain regions covered by a field of view - **subject**: subject nickname: `/fields-of-view?subject=Algernon` - - **project**: the + - **project**: the project name - **date**: session date: `/fields-of-view?date=2020-01-15` - **experiment_number**: session number `/fields-of-view?experiment_number=1` - **session**: `/fields-of-view?session=aad23144-0e52-4eac-80c5-c4ee2decb198` diff --git a/alyx/misc/management/commands/one_cache.py b/alyx/misc/management/commands/one_cache.py index 0ec25dbef..7c3ea682a 100644 --- a/alyx/misc/management/commands/one_cache.py +++ b/alyx/misc/management/commands/one_cache.py @@ -16,6 +16,8 @@ import pyarrow as pa from tqdm import tqdm from one.alf.cache import _metadata +from one.util import QC_TYPE +from one.alf.spec import QC from one.remote.aws import get_s3_virtual_host from django.db import connection @@ -30,7 +32,7 @@ from experiments.models import ProbeInsertion logger = logging.getLogger(__name__) -ONE_API_VERSION = '1.13.0' # Minimum compatible ONE api version +ONE_API_VERSION = '2.7' # Minimum compatible ONE api version def measure_time(func): @@ -382,7 +384,7 @@ def generate_datasets_frame(tags=None, batch_size=100_000) -> pd.DataFrame: fields = ( 'id', 'name', 'file_size', 'hash', 'collection', 'revision__name', 'default_dataset', 'session__id', 'session__start_time__date', 'session__number', - 'session__subject__nickname', 'session__lab__name', 'exists_flatiron', 'exists_aws' + 'session__subject__nickname', 'session__lab__name', 'exists_flatiron', 'exists_aws', 'qc' ) fields_map = {'session__id': 'eid', 'default_dataset': 'default_revision'} @@ -411,6 +413,9 @@ def generate_datasets_frame(tags=None, batch_size=100_000) -> pd.DataFrame: df[['id', 'eid']] = df[['id', 'eid']].astype(str) df = df.set_index(['eid', 'id']) + # Convert QC enum int to pandas category + df['qc'] = pd.Categorical([QC(i).name for i in df['qc']], dtype=QC_TYPE) + all_df = pd.concat([all_df, df], ignore_index=False, copy=False) logger.debug(f'Final datasets frame = {getsizeof(all_df) / 1024 ** 2:.1f} MiB') diff --git a/requirements.txt b/requirements.txt index 61b645d45..cdf85406e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,4 +28,4 @@ python-magic pytz structlog>=21.5.0 webdavclient3 -ONE-api>=2.1.0 +ONE-api~=2.7rc0 diff --git a/requirements_frozen.txt b/requirements_frozen.txt index f4d585feb..1fa65296c 100644 --- a/requirements_frozen.txt +++ b/requirements_frozen.txt @@ -1,7 +1,7 @@ -asgiref==3.7.2 +asgiref==3.8.0 backports.zoneinfo==0.2.1 -boto3==1.34.61 -botocore==1.34.61 +boto3==1.34.68 +botocore==1.34.68 certifi==2024.2.2 cffi==1.16.0 charset-normalizer==3.3.2 @@ -26,38 +26,38 @@ django-mptt==0.14.0 django-polymorphic==3.1.0 django-reversion==5.0.12 django-storages==1.14.2 -django-structlog==7.1.0 +django-structlog==8.0.0 django-test-without-migrations==0.6 -djangorestframework==3.14.0 +djangorestframework==3.15.0 docopt==0.6.2 docutils==0.20.1 drfdocs==0.0.11 flake8==7.0.0 -fonttools==4.49.0 +fonttools==4.50.0 globus-cli==3.26.0 globus-sdk==3.37.0 iblutil==1.8.0 idna==3.6 -importlib_metadata==7.0.2 -importlib_resources==6.3.0 +importlib_metadata==7.1.0 +importlib_resources==6.4.0 itypes==1.2.0 Jinja2==3.1.3 jmespath==1.0.1 kiwisolver==1.4.5 llvmlite==0.41.1 lxml==5.1.0 -Markdown==3.5.2 +Markdown==3.6 MarkupSafe==2.1.5 matplotlib==3.7.5 mccabe==0.7.0 numba==0.58.1 numpy==1.24.4 -ONE-api==2.6.0 +ONE-api==2.7rc2 packaging==24.0 pandas==2.0.3 pillow==10.2.0 psycopg2-binary==2.9.9 -pyarrow==15.0.1 +pyarrow==15.0.2 pycodestyle==2.11.1 pycparser==2.21 pyflakes==3.2.0 @@ -69,7 +69,7 @@ python-magic==0.4.27 pytz==2024.1 PyYAML==6.0.1 requests==2.31.0 -s3transfer==0.10.0 +s3transfer==0.10.1 six==1.16.0 sqlparse==0.4.4 structlog==24.1.0 @@ -79,4 +79,4 @@ tzdata==2024.1 uritemplate==4.1.1 urllib3==1.26.18 webdavclient3==3.14.6 -zipp==3.18.0 +zipp==3.18.1 diff --git a/scripts/sync_ucl/prune_cortexlab.py b/scripts/sync_ucl/prune_cortexlab.py index e3dbd9099..1e04db2eb 100755 --- a/scripts/sync_ucl/prune_cortexlab.py +++ b/scripts/sync_ucl/prune_cortexlab.py @@ -17,16 +17,13 @@ json_file_out = '../scripts/sync_ucl/cortexlab_pruned.json' -# Since we currently still use both the project and the projects field, we need to filter for -# either containing an IBL project -ibl_proj = (Q(project__name__icontains='ibl') | Q(projects__name__icontains='ibl') | - Q(project__name='practice') | Q(projects__name='practice')) +# Filter for sessions containing an IBL project +ibl_proj = Q(projects__name__icontains='ibl') | Q(projects__name='practice') ses = Session.objects.using('cortexlab').filter(ibl_proj) # remove all subjects that never had anything to do with IBL sub_ibl = list(ses.values_list('subject', flat=True)) sub_ibl += list(Subject.objects.values_list('pk', flat=True)) -sub_ibl += list(Subject.objects.using('cortexlab').filter( - projects__name__icontains='ibl').values_list('pk', flat=True)) +sub_ibl += list(Subject.objects.using('cortexlab').filter(ibl_proj).values_list('pk', flat=True)) Subject.objects.using('cortexlab').exclude(pk__in=sub_ibl).delete() # then remove base Sessions @@ -76,7 +73,7 @@ # import projects from cortexlab. remove those that don't correspond to any session -pk_projs = list(filter(None, flatten(ses_ucl.values_list('project', 'projects').distinct()))) +pk_projs = list(filter(None, flatten(ses_ucl.values_list('projects').distinct()))) pk_projs += list(Project.objects.values_list('pk', flat=True)) Project.objects.using('cortexlab').exclude(pk__in=pk_projs).delete() diff --git a/setup.cfg b/setup.cfg index ca0902f76..d332421a5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,7 +4,7 @@ universal = 1 [tool:pytest] [flake8] -ignore = E117,E265,E731,F403,E741,E722,W504 +ignore = E117,E265,E731,F403,E741,E722,W504,D max-line-length = 99 exclude = migrations diff --git a/setup.py b/setup.py index 92bb3f3e4..ecba39ac4 100755 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 from pathlib import Path -from getpass import getpass +from getpass import getpass, getuser import os import os.path as op import platform @@ -141,8 +141,8 @@ def _replace_in_file(source_file, target_file, replacements=None, target_mode='w try: _system(f'sudo mkdir -p {file_log_json.parent}') _system(f'sudo mkdir -p {file_log.parent}') - _system(f'sudo chown {os.getlogin()}:www-data -fR {file_log.parent}') - _system(f'sudo chown {os.getlogin()}:www-data -fR {file_log_json.parent}') + _system(f'sudo chown {getuser()}:www-data -fR {file_log.parent}') + _system(f'sudo chown {getuser()}:www-data -fR {file_log_json.parent}') _system(f'touch {file_log_json}') _system(f'touch {file_log}') _system('python3 alyx/manage.py makemigrations')