diff --git a/alyx/actions/admin.py b/alyx/actions/admin.py index c050063a..f764ab5c 100644 --- a/alyx/actions/admin.py +++ b/alyx/actions/admin.py @@ -456,10 +456,10 @@ class DatasetInline(BaseInlineAdmin): show_change_link = True model = Dataset extra = 1 - fields = ('name', 'dataset_type', 'collection', '_online', 'version', 'created_by', - 'created_datetime') + fields = ('name', 'dataset_type', 'collection', '_online', 'version', 'qc', + 'created_by', 'created_datetime') readonly_fields = fields - ordering = ("name",) + ordering = ('name',) def _online(self, obj): return obj.is_online diff --git a/alyx/actions/models.py b/alyx/actions/models.py index 573696f9..edec7c74 100644 --- a/alyx/actions/models.py +++ b/alyx/actions/models.py @@ -1,7 +1,9 @@ from datetime import timedelta -import structlog from math import inf +import structlog +from one.alf.spec import QC + from django.conf import settings from django.core.validators import MinValueValidator from django.db import models @@ -253,16 +255,10 @@ class Session(BaseAction): n_trials = models.IntegerField(blank=True, null=True) n_correct_trials = models.IntegerField(blank=True, null=True) - QC_CHOICES = [ - (50, 'CRITICAL',), - (40, 'FAIL',), - (30, 'WARNING',), - (0, 'NOT_SET',), - (10, 'PASS',), - ] - - qc = models.IntegerField(default=0, choices=QC_CHOICES, + QC_CHOICES = [(e.value, e.name) for e in QC] + qc = models.IntegerField(default=QC.NOT_SET, choices=QC_CHOICES, help_text=' / '.join([str(q[0]) + ': ' + q[1] for q in QC_CHOICES])) + extended_qc = models.JSONField(null=True, blank=True, help_text="Structured data about session QC," "formatted in a user-defined way") diff --git a/alyx/actions/serializers.py b/alyx/actions/serializers.py index 26f37417..0809e779 100644 --- a/alyx/actions/serializers.py +++ b/alyx/actions/serializers.py @@ -98,12 +98,13 @@ class SessionDatasetsSerializer(serializers.ModelSerializer): queryset=DatasetType.objects.all(), ) default_revision = serializers.CharField(source='default_dataset') + qc = BaseSerializerEnumField(required=False) class Meta: list_serializer_class = FilterDatasetSerializer model = Dataset fields = ('id', 'name', 'dataset_type', 'data_url', 'url', 'file_size', - 'hash', 'version', 'collection', 'revision', 'default_revision') + 'hash', 'version', 'collection', 'revision', 'default_revision', 'qc') class SessionWaterAdminSerializer(serializers.ModelSerializer): diff --git a/alyx/actions/tests_rest.py b/alyx/actions/tests_rest.py index 4fe66f4f..8bf83c2a 100644 --- a/alyx/actions/tests_rest.py +++ b/alyx/actions/tests_rest.py @@ -266,8 +266,8 @@ def test_sessions(self): # test dataset type filters dtype1, _ = DatasetType.objects.get_or_create(name='trials.table') dtype2, _ = DatasetType.objects.get_or_create(name='wheel.position') - Dataset.objects.create(session=ses, name='_ibl_trials.table.pqt', dataset_type=dtype1) - Dataset.objects.create(session=ses, name='_ibl_wheel.position.npy', dataset_type=dtype2) + Dataset.objects.create(session=ses, name='_ibl_trials.table.pqt', dataset_type=dtype1, qc=40) + Dataset.objects.create(session=ses, name='_ibl_wheel.position.npy', dataset_type=dtype2, qc=30) d = self.ar(self.client.get(reverse('session-list') + '?dataset_types=wheel.position')) self.assertCountEqual([str(ses.pk)], (x['id'] for x in d)) q = '?dataset_types=wheel.position,trials.table' # Check with list @@ -280,6 +280,22 @@ def test_sessions(self): self.assertCountEqual([str(ses.pk)], (x['id'] for x in d)) q = '?datasets=wheel.position' self.assertFalse(self.ar(self.client.get(reverse('session-list') + q))) + # multiple datasets + q = '?datasets=_ibl_wheel.position.npy,_ibl_trials.table.pqt' + d = self.ar(self.client.get(reverse('session-list') + q)) + self.assertCountEqual([str(ses.pk)], (x['id'] for x in d)) + # datasets + qc (expect to return sessions where defined datasets have correct QC) + q = '?datasets=_ibl_wheel.position.npy,_ibl_trials.table.pqt&dataset_qc_lte=WARNING' + self.assertFalse(self.ar(self.client.get(reverse('session-list') + q))) + q = '?datasets=_ibl_wheel.position.npy&dataset_qc_lte=WARNING' + d = self.ar(self.client.get(reverse('session-list') + q)) + self.assertCountEqual([str(ses.pk)], (x['id'] for x in d), 'failed to return session') + # qc alone (expect to return sessions where any dataset has correct QC) + q = '?dataset_qc_lte=WARNING' + d = self.ar(self.client.get(reverse('session-list') + q)) + self.assertCountEqual([str(ses.pk)], (x['id'] for x in d), 'failed to return session') + q = '?dataset_qc_lte=10' + self.assertFalse(self.ar(self.client.get(reverse('session-list') + q))) def test_surgeries(self): from actions.models import Surgery diff --git a/alyx/actions/views.py b/alyx/actions/views.py index d0b788b0..664c30c0 100644 --- a/alyx/actions/views.py +++ b/alyx/actions/views.py @@ -1,6 +1,7 @@ from datetime import timedelta, date from operator import itemgetter +from one.alf.spec import QC from django.contrib.postgres.fields import JSONField from django.db.models import Count, Q, F, ExpressionWrapper, FloatField from django.db.models.deletion import Collector @@ -226,6 +227,7 @@ class SessionFilter(BaseActionFilter): dataset_types = django_filters.CharFilter(field_name='dataset_types', method='filter_dataset_types') datasets = django_filters.CharFilter(field_name='datasets', method='filter_datasets') + dataset_qc_lte = django_filters.CharFilter(field_name='dataset_qc', method='filter_dataset_qc_lte') performance_gte = django_filters.NumberFilter(field_name='performance', method='filter_performance_gte') performance_lte = django_filters.NumberFilter(field_name='performance', @@ -284,13 +286,23 @@ def filter_dataset_types(self, queryset, _, value): def filter_datasets(self, queryset, _, value): # Note this may later be modified to include collections, e.g. ?datasets=alf/obj.attr.ext + qc = QC.validate(self.request.query_params.get('dataset_qc_lte', QC.FAIL)) dsets = value.split(',') - queryset = queryset.filter(data_dataset_session_related__name__in=dsets) + queryset = queryset.filter(data_dataset_session_related__name__in=dsets, + data_dataset_session_related__qc__lte=qc) queryset = queryset.annotate( dsets_count=Count('data_dataset_session_related', distinct=True)) queryset = queryset.filter(dsets_count__gte=len(dsets)) return queryset + def filter_dataset_qc_lte(self, queryset, _, value): + # If filtering on datasets too, `filter_datasets` handles both QC and Datasets + if 'datasets' in self.request.query_params: + return queryset + qc = QC.validate(value) + queryset = queryset.filter(data_dataset_session_related__qc__lte=qc) + return queryset + def filter_performance_gte(self, queryset, name, perf): queryset = queryset.exclude(n_trials__isnull=True) pf = ExpressionWrapper(100 * F('n_correct_trials') / F('n_trials'), @@ -326,6 +338,8 @@ class SessionAPIList(generics.ListCreateAPIView): - **subject**: subject nickname `/sessions?subject=Algernon` - **dataset_types**: dataset type(s) `/sessions?dataset_types=trials.table,camera.times` - **datasets**: dataset name(s) `/sessions?datasets=_ibl_leftCamera.times.npy` + - **dataset_qc_lte**: dataset QC values less than or equal to this + `/sessions?dataset_qc_lte=WARNING` - **number**: session number - **users**: experimenters (exact) - **date_range**: date `/sessions?date_range=2020-01-12,2020-01-16` @@ -354,9 +368,9 @@ class SessionAPIList(generics.ListCreateAPIView): - **histology**: returns sessions for which the subject has an histology session: `/sessions?histology=True` - **django**: generic filter allowing lookups (same syntax as json filter) - `/sessions?django=project__name__icontains,matlab + `/sessions?django=project__name__icontains,matlab` filters sessions that have matlab in the project name - `/sessions?django=~project__name__icontains,matlab + `/sessions?django=~project__name__icontains,matlab` does the exclusive set: filters sessions that do not have matlab in the project name [===> session model reference](/admin/doc/models/actions.session) diff --git a/alyx/alyx/__init__.py b/alyx/alyx/__init__.py index 37c1153c..04af162b 100644 --- a/alyx/alyx/__init__.py +++ b/alyx/alyx/__init__.py @@ -1 +1 @@ -VERSION = __version__ = '1.18.2' +VERSION = __version__ = '2.0.0' diff --git a/alyx/data/admin.py b/alyx/data/admin.py index f53a34b5..6c3de048 100644 --- a/alyx/data/admin.py +++ b/alyx/data/admin.py @@ -1,7 +1,7 @@ from django.db.models import Count, ProtectedError from django.contrib import admin, messages from django.utils.html import format_html -from django_admin_listfilter_dropdown.filters import RelatedDropdownFilter +from django_admin_listfilter_dropdown.filters import RelatedDropdownFilter, ChoiceDropdownFilter from rangefilter.filters import DateRangeFilter from .models import (DataRepositoryType, DataRepository, DataFormat, DatasetType, @@ -84,16 +84,17 @@ class FileRecordInline(BaseInlineAdmin): class DatasetAdmin(BaseExperimentalDataAdmin): fields = ['name', '_online', 'version', 'dataset_type', 'file_size', 'hash', 'session_ro', 'collection', 'auto_datetime', 'revision_', 'default_dataset', - '_protected', '_public', 'tags'] + '_protected', '_public', 'tags', 'qc'] readonly_fields = ['name_', 'session_ro', '_online', 'auto_datetime', 'revision_', '_protected', '_public', 'tags'] list_display = ['name_', '_online', 'version', 'collection', 'dataset_type_', 'file_size', - 'session_ro', 'created_by', 'created_datetime'] + 'session_ro', 'created_by', 'created_datetime', 'qc'] inlines = [FileRecordInline] list_filter = [('created_by', RelatedDropdownFilter), ('created_datetime', DateRangeFilter), ('dataset_type', RelatedDropdownFilter), - ('tags', RelatedDropdownFilter) + ('tags', RelatedDropdownFilter), + ('qc', ChoiceDropdownFilter) ] search_fields = ('session__id', 'name', 'collection', 'dataset_type__name', 'dataset_type__filename_pattern', 'version') diff --git a/alyx/data/migrations/0019_dataset_qc.py b/alyx/data/migrations/0019_dataset_qc.py new file mode 100644 index 00000000..e2d0cc5d --- /dev/null +++ b/alyx/data/migrations/0019_dataset_qc.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.10 on 2024-02-13 15:16 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('data', '0018_alter_dataset_collection_alter_revision_name'), + ] + + operations = [ + migrations.AddField( + model_name='dataset', + name='qc', + field=models.IntegerField(choices=[(50, 'CRITICAL'), (40, 'FAIL'), (30, 'WARNING'), (0, 'NOT_SET'), (10, 'PASS')], default=0, help_text='50: CRITICAL / 40: FAIL / 30: WARNING / 0: NOT_SET / 10: PASS'), + ), + ] diff --git a/alyx/data/models.py b/alyx/data/models.py index 83fd08e7..df205f05 100644 --- a/alyx/data/models.py +++ b/alyx/data/models.py @@ -1,4 +1,5 @@ import structlog +from one.alf.spec import QC from django.core.validators import RegexValidator from django.db import models @@ -351,6 +352,10 @@ class Dataset(BaseExperimentalData): help_text="Whether this dataset is the default " "latest revision") + QC_CHOICES = [(e.value, e.name) for e in QC] + qc = models.IntegerField(default=QC.NOT_SET, choices=QC_CHOICES, + help_text=' / '.join([str(q[0]) + ': ' + q[1] for q in QC_CHOICES])) + @property def is_online(self): fr = self.file_records.filter(data_repository__globus_is_personal=False) diff --git a/alyx/data/serializers.py b/alyx/data/serializers.py index 54a131bc..b7858bee 100644 --- a/alyx/data/serializers.py +++ b/alyx/data/serializers.py @@ -5,6 +5,7 @@ from .models import (DataRepositoryType, DataRepository, DataFormat, DatasetType, Dataset, Download, FileRecord, Revision, Tag) from .transfers import _get_session, _change_default_dataset +from alyx.base import BaseSerializerEnumField from actions.models import Session from subjects.models import Subject from misc.models import LabMember @@ -142,6 +143,7 @@ class DatasetSerializer(serializers.HyperlinkedModelSerializer): default_dataset = serializers.BooleanField(required=False, allow_null=True) public = serializers.ReadOnlyField() protected = serializers.ReadOnlyField() + qc = BaseSerializerEnumField(required=False) file_records = DatasetFileRecordsSerializer(read_only=True, many=True) experiment_number = serializers.SerializerMethodField() @@ -213,7 +215,7 @@ class Meta: 'session', 'file_size', 'hash', 'version', 'experiment_number', 'file_records', 'subject', 'date', 'number', 'auto_datetime', 'revision', - 'default_dataset', 'protected', 'public', 'tags') + 'default_dataset', 'protected', 'public', 'tags', 'qc') extra_kwargs = { 'subject': {'write_only': True}, 'date': {'write_only': True}, diff --git a/alyx/data/views.py b/alyx/data/views.py index ff86ffea..316e3eaa 100644 --- a/alyx/data/views.py +++ b/alyx/data/views.py @@ -157,6 +157,7 @@ class DatasetFilter(BaseFilterSet): protected = django_filters.BooleanFilter(method='filter_protected') tag = django_filters.CharFilter('tags__name') revision = django_filters.CharFilter('revision__name') + qc = django_filters.CharFilter(method='enum_field_filter') class Meta: model = Dataset @@ -212,6 +213,7 @@ class DatasetList(generics.ListCreateAPIView): - **tag**: tag name '/datasets?tag=repeated_site - **public**: only returns datasets that are public or not public - **protected**: only returns datasets that are protected or not protected + - **qc**: only returns datasets with this QC value `/datasets?qc=PASS` [===> dataset model reference](/admin/doc/models/data.dataset) """ diff --git a/alyx/experiments/serializers.py b/alyx/experiments/serializers.py index 6685dc5f..0c2c64ec 100644 --- a/alyx/experiments/serializers.py +++ b/alyx/experiments/serializers.py @@ -87,7 +87,7 @@ class Meta: list_serializer_class = FilterDatasetSerializer model = Dataset fields = ('id', 'name', 'dataset_type', 'data_url', 'url', 'file_size', - 'hash', 'version', 'collection') + 'hash', 'version', 'collection', 'qc') class ChronicProbeInsertionListSerializer(serializers.ModelSerializer): diff --git a/alyx/experiments/tests_rest.py b/alyx/experiments/tests_rest.py index 3a2c99cd..a717d86f 100644 --- a/alyx/experiments/tests_rest.py +++ b/alyx/experiments/tests_rest.py @@ -344,9 +344,9 @@ def test_dataset_filters(self): tag, _ = Tag.objects.get_or_create(name='tag_test') d1 = Dataset.objects.create(session=self.session, name='spikes.times.npy', - dataset_type=dtype1, collection='alf/probe_00') + dataset_type=dtype1, collection='alf/probe_00', qc=30) Dataset.objects.create(session=self.session, name='clusters.amps.npy', - dataset_type=dtype2, collection='alf/probe_00') + dataset_type=dtype2, collection='alf/probe_00', qc=40) d1.tags.add(tag) d1.save() @@ -368,10 +368,28 @@ def test_dataset_filters(self): d = self.ar(self.client.get(reverse('probeinsertion-list') + q)) self.assertEqual(len(d), 1) self.assertEqual(probe['id'], d[0]['id']) - q = '?datasets=clusters.amps' self.assertFalse(self.ar(self.client.get(reverse('probeinsertion-list') + q))) + # test dataset + qc filters + q = '?datasets=spikes.times.npy,clusters.amps.npy&dataset_qc_lte=FAIL' + d = self.ar(self.client.get(reverse('probeinsertion-list') + q)) + self.assertEqual(len(d), 1, 'Expect insertion returned as all dsets match QC') + q = '?datasets=spikes.times.npy,clusters.amps.npy&dataset_qc_lte=WARNING' + d = self.ar(self.client.get(reverse('probeinsertion-list') + q)) + self.assertEqual(len(d), 0, 'Expect none returned as one dset doesn''t match QC') + q = '?datasets=spikes.times.npy&dataset_qc_lte=30' # QC code should also work + d = self.ar(self.client.get(reverse('probeinsertion-list') + q)) + self.assertEqual(len(d), 1, 'Expect insertion returned as searched dset matches QC') + + # test qc alone + q = '?dataset_qc_lte=WARNING' + d = self.ar(self.client.get(reverse('probeinsertion-list') + q)) + self.assertEqual(len(d), 1, 'Expect insertion returned as at least 1 dset matches QC') + q = '?dataset_qc_lte=10' # PASS + d = self.ar(self.client.get(reverse('probeinsertion-list') + q)) + self.assertEqual(len(d), 0, 'Expect none returned as no dset matches QC') + # test filtering by tag q = '?tag=tag_test' d = self.ar(self.client.get(reverse('probeinsertion-list') + q)) diff --git a/alyx/experiments/views.py b/alyx/experiments/views.py index db693774..7b329f93 100644 --- a/alyx/experiments/views.py +++ b/alyx/experiments/views.py @@ -1,5 +1,6 @@ import logging +from one.alf.spec import QC from rest_framework import generics from django_filters.rest_framework import CharFilter, UUIDFilter, NumberFilter from django.db.models import Count, Q @@ -73,6 +74,7 @@ class ProbeInsertionFilter(BaseFilterSet): model = CharFilter('model__name') dataset_types = CharFilter(field_name='dataset_types', method='filter_dataset_types') datasets = CharFilter(field_name='datasets', method='filter_datasets') + dataset_qc_lte = CharFilter(field_name='dataset_qc', method='filter_dataset_qc_lte') lab = CharFilter(field_name='session__lab__name', lookup_expr='iexact') project = CharFilter(field_name='session__project__name', lookup_expr='icontains') task_protocol = CharFilter(field_name='session__task_protocol', lookup_expr='icontains') @@ -110,13 +112,21 @@ def filter_dataset_types(self, queryset, _, value): return queryset def filter_datasets(self, queryset, _, value): + qc = QC.validate(self.request.query_params.get('dataset_qc_lte', QC.FAIL)) dsets = value.split(',') - queryset = queryset.filter(datasets__name__in=dsets) + queryset = queryset.filter(datasets__name__in=dsets, datasets__qc__lte=qc) queryset = queryset.annotate( dsets_count=Count('datasets', distinct=True)) queryset = queryset.filter(dsets_count__gte=len(dsets)) return queryset + def filter_dataset_qc_lte(self, queryset, _, value): + # If filtering on datasets too, `filter_datasets` handles both QC and Datasets + if 'datasets' in self.request.query_params: + return queryset + qc = QC.validate(value) + return queryset.filter(datasets__qc__lte=qc) + class Meta: model = ProbeInsertion exclude = ['json'] @@ -139,6 +149,7 @@ class ProbeInsertionList(generics.ListCreateAPIView): - **tag**: tag name (icontains) - **dataset_types**: dataset type(s) - **datasets**: datasets name(s) + - **dataset_qc_lte**: dataset QC value, e.g. PASS, WARNING, FAIL, CRITICAL - **atlas_name**: returns a session if any channel name icontains the value: `/insertions?brain_region=visual cortex` - **atlas_acronym**: returns a session if any of its channels name exactly diff --git a/alyx/misc/management/commands/one_cache.py b/alyx/misc/management/commands/one_cache.py index 0ec25dbe..a5e96ec9 100644 --- a/alyx/misc/management/commands/one_cache.py +++ b/alyx/misc/management/commands/one_cache.py @@ -16,6 +16,8 @@ import pyarrow as pa from tqdm import tqdm from one.alf.cache import _metadata +from one.util import QC_TYPE +from one.alf.spec import QC from one.remote.aws import get_s3_virtual_host from django.db import connection @@ -30,7 +32,7 @@ from experiments.models import ProbeInsertion logger = logging.getLogger(__name__) -ONE_API_VERSION = '1.13.0' # Minimum compatible ONE api version +ONE_API_VERSION = '2.7.0' # Minimum compatible ONE api version def measure_time(func): @@ -382,7 +384,7 @@ def generate_datasets_frame(tags=None, batch_size=100_000) -> pd.DataFrame: fields = ( 'id', 'name', 'file_size', 'hash', 'collection', 'revision__name', 'default_dataset', 'session__id', 'session__start_time__date', 'session__number', - 'session__subject__nickname', 'session__lab__name', 'exists_flatiron', 'exists_aws' + 'session__subject__nickname', 'session__lab__name', 'exists_flatiron', 'exists_aws', 'qc' ) fields_map = {'session__id': 'eid', 'default_dataset': 'default_revision'} @@ -411,6 +413,9 @@ def generate_datasets_frame(tags=None, batch_size=100_000) -> pd.DataFrame: df[['id', 'eid']] = df[['id', 'eid']].astype(str) df = df.set_index(['eid', 'id']) + # Convert QC enum int to pandas category + df['qc'] = pd.Categorical([QC(i).name for i in df['qc']], dtype=QC_TYPE) + all_df = pd.concat([all_df, df], ignore_index=False, copy=False) logger.debug(f'Final datasets frame = {getsizeof(all_df) / 1024 ** 2:.1f} MiB') diff --git a/requirements.txt b/requirements.txt index 61b645d4..5463f901 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,4 +28,4 @@ python-magic pytz structlog>=21.5.0 webdavclient3 -ONE-api>=2.1.0 +ONE-api>=2.7.0