Skip to content

Commit

Permalink
Issue #830
Browse files Browse the repository at this point in the history
  • Loading branch information
k1o0 committed Feb 14, 2024
1 parent 1e994cf commit eb27efe
Show file tree
Hide file tree
Showing 16 changed files with 122 additions and 33 deletions.
6 changes: 3 additions & 3 deletions alyx/actions/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,10 +456,10 @@ class DatasetInline(BaseInlineAdmin):
show_change_link = True
model = Dataset
extra = 1
fields = ('name', 'dataset_type', 'collection', '_online', 'version', 'created_by',
'created_datetime')
fields = ('name', 'dataset_type', 'collection', '_online', 'version', 'qc',
'created_by', 'created_datetime')
readonly_fields = fields
ordering = ("name",)
ordering = ('name',)

def _online(self, obj):
return obj.is_online
Expand Down
16 changes: 6 additions & 10 deletions alyx/actions/models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from datetime import timedelta
import structlog
from math import inf

import structlog
from one.alf.spec import QC

from django.conf import settings
from django.core.validators import MinValueValidator
from django.db import models
Expand Down Expand Up @@ -253,16 +255,10 @@ class Session(BaseAction):
n_trials = models.IntegerField(blank=True, null=True)
n_correct_trials = models.IntegerField(blank=True, null=True)

QC_CHOICES = [
(50, 'CRITICAL',),
(40, 'FAIL',),
(30, 'WARNING',),
(0, 'NOT_SET',),
(10, 'PASS',),
]

qc = models.IntegerField(default=0, choices=QC_CHOICES,
QC_CHOICES = [(e.value, e.name) for e in QC]
qc = models.IntegerField(default=QC.NOT_SET, choices=QC_CHOICES,
help_text=' / '.join([str(q[0]) + ': ' + q[1] for q in QC_CHOICES]))

extended_qc = models.JSONField(null=True, blank=True,
help_text="Structured data about session QC,"
"formatted in a user-defined way")
Expand Down
3 changes: 2 additions & 1 deletion alyx/actions/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,13 @@ class SessionDatasetsSerializer(serializers.ModelSerializer):
queryset=DatasetType.objects.all(),
)
default_revision = serializers.CharField(source='default_dataset')
qc = BaseSerializerEnumField(required=False)

class Meta:
list_serializer_class = FilterDatasetSerializer
model = Dataset
fields = ('id', 'name', 'dataset_type', 'data_url', 'url', 'file_size',
'hash', 'version', 'collection', 'revision', 'default_revision')
'hash', 'version', 'collection', 'revision', 'default_revision', 'qc')


class SessionWaterAdminSerializer(serializers.ModelSerializer):
Expand Down
20 changes: 18 additions & 2 deletions alyx/actions/tests_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,8 @@ def test_sessions(self):
# test dataset type filters
dtype1, _ = DatasetType.objects.get_or_create(name='trials.table')
dtype2, _ = DatasetType.objects.get_or_create(name='wheel.position')
Dataset.objects.create(session=ses, name='_ibl_trials.table.pqt', dataset_type=dtype1)
Dataset.objects.create(session=ses, name='_ibl_wheel.position.npy', dataset_type=dtype2)
Dataset.objects.create(session=ses, name='_ibl_trials.table.pqt', dataset_type=dtype1, qc=40)
Dataset.objects.create(session=ses, name='_ibl_wheel.position.npy', dataset_type=dtype2, qc=30)
d = self.ar(self.client.get(reverse('session-list') + '?dataset_types=wheel.position'))
self.assertCountEqual([str(ses.pk)], (x['id'] for x in d))
q = '?dataset_types=wheel.position,trials.table' # Check with list
Expand All @@ -280,6 +280,22 @@ def test_sessions(self):
self.assertCountEqual([str(ses.pk)], (x['id'] for x in d))
q = '?datasets=wheel.position'
self.assertFalse(self.ar(self.client.get(reverse('session-list') + q)))
# multiple datasets
q = '?datasets=_ibl_wheel.position.npy,_ibl_trials.table.pqt'
d = self.ar(self.client.get(reverse('session-list') + q))
self.assertCountEqual([str(ses.pk)], (x['id'] for x in d))
# datasets + qc (expect to return sessions where defined datasets have correct QC)
q = '?datasets=_ibl_wheel.position.npy,_ibl_trials.table.pqt&dataset_qc_lte=WARNING'
self.assertFalse(self.ar(self.client.get(reverse('session-list') + q)))
q = '?datasets=_ibl_wheel.position.npy&dataset_qc_lte=WARNING'
d = self.ar(self.client.get(reverse('session-list') + q))
self.assertCountEqual([str(ses.pk)], (x['id'] for x in d), 'failed to return session')
# qc alone (expect to return sessions where any dataset has correct QC)
q = '?dataset_qc_lte=WARNING'
d = self.ar(self.client.get(reverse('session-list') + q))
self.assertCountEqual([str(ses.pk)], (x['id'] for x in d), 'failed to return session')
q = '?dataset_qc_lte=10'
self.assertFalse(self.ar(self.client.get(reverse('session-list') + q)))

def test_surgeries(self):
from actions.models import Surgery
Expand Down
20 changes: 17 additions & 3 deletions alyx/actions/views.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from datetime import timedelta, date
from operator import itemgetter

from one.alf.spec import QC
from django.contrib.postgres.fields import JSONField
from django.db.models import Count, Q, F, ExpressionWrapper, FloatField
from django.db.models.deletion import Collector
Expand Down Expand Up @@ -226,6 +227,7 @@ class SessionFilter(BaseActionFilter):
dataset_types = django_filters.CharFilter(field_name='dataset_types',
method='filter_dataset_types')
datasets = django_filters.CharFilter(field_name='datasets', method='filter_datasets')
dataset_qc_lte = django_filters.CharFilter(field_name='dataset_qc', method='filter_dataset_qc_lte')
performance_gte = django_filters.NumberFilter(field_name='performance',
method='filter_performance_gte')
performance_lte = django_filters.NumberFilter(field_name='performance',
Expand Down Expand Up @@ -284,13 +286,23 @@ def filter_dataset_types(self, queryset, _, value):

def filter_datasets(self, queryset, _, value):
# Note this may later be modified to include collections, e.g. ?datasets=alf/obj.attr.ext
qc = QC.validate(self.request.query_params.get('dataset_qc_lte', QC.FAIL))
dsets = value.split(',')
queryset = queryset.filter(data_dataset_session_related__name__in=dsets)
queryset = queryset.filter(data_dataset_session_related__name__in=dsets,
data_dataset_session_related__qc__lte=qc)
queryset = queryset.annotate(
dsets_count=Count('data_dataset_session_related', distinct=True))
queryset = queryset.filter(dsets_count__gte=len(dsets))
return queryset

def filter_dataset_qc_lte(self, queryset, _, value):
# If filtering on datasets too, `filter_datasets` handles both QC and Datasets
if 'datasets' in self.request.query_params:
return queryset
qc = QC.validate(value)
queryset = queryset.filter(data_dataset_session_related__qc__lte=qc)
return queryset

def filter_performance_gte(self, queryset, name, perf):
queryset = queryset.exclude(n_trials__isnull=True)
pf = ExpressionWrapper(100 * F('n_correct_trials') / F('n_trials'),
Expand Down Expand Up @@ -326,6 +338,8 @@ class SessionAPIList(generics.ListCreateAPIView):
- **subject**: subject nickname `/sessions?subject=Algernon`
- **dataset_types**: dataset type(s) `/sessions?dataset_types=trials.table,camera.times`
- **datasets**: dataset name(s) `/sessions?datasets=_ibl_leftCamera.times.npy`
- **dataset_qc_lte**: dataset QC values less than or equal to this
`/sessions?dataset_qc_lte=WARNING`
- **number**: session number
- **users**: experimenters (exact)
- **date_range**: date `/sessions?date_range=2020-01-12,2020-01-16`
Expand Down Expand Up @@ -354,9 +368,9 @@ class SessionAPIList(generics.ListCreateAPIView):
- **histology**: returns sessions for which the subject has an histology session:
`/sessions?histology=True`
- **django**: generic filter allowing lookups (same syntax as json filter)
`/sessions?django=project__name__icontains,matlab
`/sessions?django=project__name__icontains,matlab`
filters sessions that have matlab in the project name
`/sessions?django=~project__name__icontains,matlab
`/sessions?django=~project__name__icontains,matlab`
does the exclusive set: filters sessions that do not have matlab in the project name
[===> session model reference](/admin/doc/models/actions.session)
Expand Down
2 changes: 1 addition & 1 deletion alyx/alyx/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
VERSION = __version__ = '1.18.2'
VERSION = __version__ = '2.0.0'
9 changes: 5 additions & 4 deletions alyx/data/admin.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from django.db.models import Count, ProtectedError
from django.contrib import admin, messages
from django.utils.html import format_html
from django_admin_listfilter_dropdown.filters import RelatedDropdownFilter
from django_admin_listfilter_dropdown.filters import RelatedDropdownFilter, ChoiceDropdownFilter
from rangefilter.filters import DateRangeFilter

from .models import (DataRepositoryType, DataRepository, DataFormat, DatasetType,
Expand Down Expand Up @@ -84,16 +84,17 @@ class FileRecordInline(BaseInlineAdmin):
class DatasetAdmin(BaseExperimentalDataAdmin):
fields = ['name', '_online', 'version', 'dataset_type', 'file_size', 'hash',
'session_ro', 'collection', 'auto_datetime', 'revision_', 'default_dataset',
'_protected', '_public', 'tags']
'_protected', '_public', 'tags', 'qc']
readonly_fields = ['name_', 'session_ro', '_online', 'auto_datetime', 'revision_',
'_protected', '_public', 'tags']
list_display = ['name_', '_online', 'version', 'collection', 'dataset_type_', 'file_size',
'session_ro', 'created_by', 'created_datetime']
'session_ro', 'created_by', 'created_datetime', 'qc']
inlines = [FileRecordInline]
list_filter = [('created_by', RelatedDropdownFilter),
('created_datetime', DateRangeFilter),
('dataset_type', RelatedDropdownFilter),
('tags', RelatedDropdownFilter)
('tags', RelatedDropdownFilter),
('qc', ChoiceDropdownFilter)
]
search_fields = ('session__id', 'name', 'collection', 'dataset_type__name',
'dataset_type__filename_pattern', 'version')
Expand Down
18 changes: 18 additions & 0 deletions alyx/data/migrations/0019_dataset_qc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.10 on 2024-02-13 15:16

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('data', '0018_alter_dataset_collection_alter_revision_name'),
]

operations = [
migrations.AddField(
model_name='dataset',
name='qc',
field=models.IntegerField(choices=[(50, 'CRITICAL'), (40, 'FAIL'), (30, 'WARNING'), (0, 'NOT_SET'), (10, 'PASS')], default=0, help_text='50: CRITICAL / 40: FAIL / 30: WARNING / 0: NOT_SET / 10: PASS'),
),
]
5 changes: 5 additions & 0 deletions alyx/data/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import structlog
from one.alf.spec import QC

from django.core.validators import RegexValidator
from django.db import models
Expand Down Expand Up @@ -351,6 +352,10 @@ class Dataset(BaseExperimentalData):
help_text="Whether this dataset is the default "
"latest revision")

QC_CHOICES = [(e.value, e.name) for e in QC]
qc = models.IntegerField(default=QC.NOT_SET, choices=QC_CHOICES,
help_text=' / '.join([str(q[0]) + ': ' + q[1] for q in QC_CHOICES]))

@property
def is_online(self):
fr = self.file_records.filter(data_repository__globus_is_personal=False)
Expand Down
4 changes: 3 additions & 1 deletion alyx/data/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .models import (DataRepositoryType, DataRepository, DataFormat, DatasetType,
Dataset, Download, FileRecord, Revision, Tag)
from .transfers import _get_session, _change_default_dataset
from alyx.base import BaseSerializerEnumField
from actions.models import Session
from subjects.models import Subject
from misc.models import LabMember
Expand Down Expand Up @@ -142,6 +143,7 @@ class DatasetSerializer(serializers.HyperlinkedModelSerializer):
default_dataset = serializers.BooleanField(required=False, allow_null=True)
public = serializers.ReadOnlyField()
protected = serializers.ReadOnlyField()
qc = BaseSerializerEnumField(required=False)
file_records = DatasetFileRecordsSerializer(read_only=True, many=True)

experiment_number = serializers.SerializerMethodField()
Expand Down Expand Up @@ -213,7 +215,7 @@ class Meta:
'session', 'file_size', 'hash', 'version',
'experiment_number', 'file_records',
'subject', 'date', 'number', 'auto_datetime', 'revision',
'default_dataset', 'protected', 'public', 'tags')
'default_dataset', 'protected', 'public', 'tags', 'qc')
extra_kwargs = {
'subject': {'write_only': True},
'date': {'write_only': True},
Expand Down
2 changes: 2 additions & 0 deletions alyx/data/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ class DatasetFilter(BaseFilterSet):
protected = django_filters.BooleanFilter(method='filter_protected')
tag = django_filters.CharFilter('tags__name')
revision = django_filters.CharFilter('revision__name')
qc = django_filters.CharFilter(method='enum_field_filter')

class Meta:
model = Dataset
Expand Down Expand Up @@ -212,6 +213,7 @@ class DatasetList(generics.ListCreateAPIView):
- **tag**: tag name '/datasets?tag=repeated_site
- **public**: only returns datasets that are public or not public
- **protected**: only returns datasets that are protected or not protected
- **qc**: only returns datasets with this QC value `/datasets?qc=PASS`
[===> dataset model reference](/admin/doc/models/data.dataset)
"""
Expand Down
2 changes: 1 addition & 1 deletion alyx/experiments/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class Meta:
list_serializer_class = FilterDatasetSerializer
model = Dataset
fields = ('id', 'name', 'dataset_type', 'data_url', 'url', 'file_size',
'hash', 'version', 'collection')
'hash', 'version', 'collection', 'qc')


class ChronicProbeInsertionListSerializer(serializers.ModelSerializer):
Expand Down
24 changes: 21 additions & 3 deletions alyx/experiments/tests_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,9 +344,9 @@ def test_dataset_filters(self):
tag, _ = Tag.objects.get_or_create(name='tag_test')

d1 = Dataset.objects.create(session=self.session, name='spikes.times.npy',
dataset_type=dtype1, collection='alf/probe_00')
dataset_type=dtype1, collection='alf/probe_00', qc=30)
Dataset.objects.create(session=self.session, name='clusters.amps.npy',
dataset_type=dtype2, collection='alf/probe_00')
dataset_type=dtype2, collection='alf/probe_00', qc=40)
d1.tags.add(tag)
d1.save()

Expand All @@ -368,10 +368,28 @@ def test_dataset_filters(self):
d = self.ar(self.client.get(reverse('probeinsertion-list') + q))
self.assertEqual(len(d), 1)
self.assertEqual(probe['id'], d[0]['id'])

q = '?datasets=clusters.amps'
self.assertFalse(self.ar(self.client.get(reverse('probeinsertion-list') + q)))

# test dataset + qc filters
q = '?datasets=spikes.times.npy,clusters.amps.npy&dataset_qc_lte=FAIL'
d = self.ar(self.client.get(reverse('probeinsertion-list') + q))
self.assertEqual(len(d), 1, 'Expect insertion returned as all dsets match QC')
q = '?datasets=spikes.times.npy,clusters.amps.npy&dataset_qc_lte=WARNING'
d = self.ar(self.client.get(reverse('probeinsertion-list') + q))
self.assertEqual(len(d), 0, 'Expect none returned as one dset doesn''t match QC')
q = '?datasets=spikes.times.npy&dataset_qc_lte=30' # QC code should also work
d = self.ar(self.client.get(reverse('probeinsertion-list') + q))
self.assertEqual(len(d), 1, 'Expect insertion returned as searched dset matches QC')

# test qc alone
q = '?dataset_qc_lte=WARNING'
d = self.ar(self.client.get(reverse('probeinsertion-list') + q))
self.assertEqual(len(d), 1, 'Expect insertion returned as at least 1 dset matches QC')
q = '?dataset_qc_lte=10' # PASS
d = self.ar(self.client.get(reverse('probeinsertion-list') + q))
self.assertEqual(len(d), 0, 'Expect none returned as no dset matches QC')

# test filtering by tag
q = '?tag=tag_test'
d = self.ar(self.client.get(reverse('probeinsertion-list') + q))
Expand Down
13 changes: 12 additions & 1 deletion alyx/experiments/views.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging

from one.alf.spec import QC
from rest_framework import generics
from django_filters.rest_framework import CharFilter, UUIDFilter, NumberFilter
from django.db.models import Count, Q
Expand Down Expand Up @@ -73,6 +74,7 @@ class ProbeInsertionFilter(BaseFilterSet):
model = CharFilter('model__name')
dataset_types = CharFilter(field_name='dataset_types', method='filter_dataset_types')
datasets = CharFilter(field_name='datasets', method='filter_datasets')
dataset_qc_lte = CharFilter(field_name='dataset_qc', method='filter_dataset_qc_lte')
lab = CharFilter(field_name='session__lab__name', lookup_expr='iexact')
project = CharFilter(field_name='session__project__name', lookup_expr='icontains')
task_protocol = CharFilter(field_name='session__task_protocol', lookup_expr='icontains')
Expand Down Expand Up @@ -110,13 +112,21 @@ def filter_dataset_types(self, queryset, _, value):
return queryset

def filter_datasets(self, queryset, _, value):
qc = QC.validate(self.request.query_params.get('dataset_qc_lte', QC.FAIL))
dsets = value.split(',')
queryset = queryset.filter(datasets__name__in=dsets)
queryset = queryset.filter(datasets__name__in=dsets, datasets__qc__lte=qc)
queryset = queryset.annotate(
dsets_count=Count('datasets', distinct=True))
queryset = queryset.filter(dsets_count__gte=len(dsets))
return queryset

def filter_dataset_qc_lte(self, queryset, _, value):
# If filtering on datasets too, `filter_datasets` handles both QC and Datasets
if 'datasets' in self.request.query_params:
return queryset
qc = QC.validate(value)
return queryset.filter(datasets__qc__lte=qc)

class Meta:
model = ProbeInsertion
exclude = ['json']
Expand All @@ -139,6 +149,7 @@ class ProbeInsertionList(generics.ListCreateAPIView):
- **tag**: tag name (icontains)
- **dataset_types**: dataset type(s)
- **datasets**: datasets name(s)
- **dataset_qc_lte**: dataset QC value, e.g. PASS, WARNING, FAIL, CRITICAL
- **atlas_name**: returns a session if any channel name icontains
the value: `/insertions?brain_region=visual cortex`
- **atlas_acronym**: returns a session if any of its channels name exactly
Expand Down
9 changes: 7 additions & 2 deletions alyx/misc/management/commands/one_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
import pyarrow as pa
from tqdm import tqdm
from one.alf.cache import _metadata
from one.util import QC_TYPE
from one.alf.spec import QC
from one.remote.aws import get_s3_virtual_host

from django.db import connection
Expand All @@ -30,7 +32,7 @@
from experiments.models import ProbeInsertion

logger = logging.getLogger(__name__)
ONE_API_VERSION = '1.13.0' # Minimum compatible ONE api version
ONE_API_VERSION = '2.7.0' # Minimum compatible ONE api version


def measure_time(func):
Expand Down Expand Up @@ -382,7 +384,7 @@ def generate_datasets_frame(tags=None, batch_size=100_000) -> pd.DataFrame:
fields = (
'id', 'name', 'file_size', 'hash', 'collection', 'revision__name', 'default_dataset',
'session__id', 'session__start_time__date', 'session__number',
'session__subject__nickname', 'session__lab__name', 'exists_flatiron', 'exists_aws'
'session__subject__nickname', 'session__lab__name', 'exists_flatiron', 'exists_aws', 'qc'
)
fields_map = {'session__id': 'eid', 'default_dataset': 'default_revision'}

Expand Down Expand Up @@ -411,6 +413,9 @@ def generate_datasets_frame(tags=None, batch_size=100_000) -> pd.DataFrame:
df[['id', 'eid']] = df[['id', 'eid']].astype(str)
df = df.set_index(['eid', 'id'])

# Convert QC enum int to pandas category
df['qc'] = pd.Categorical([QC(i).name for i in df['qc']], dtype=QC_TYPE)

all_df = pd.concat([all_df, df], ignore_index=False, copy=False)

logger.debug(f'Final datasets frame = {getsizeof(all_df) / 1024 ** 2:.1f} MiB')
Expand Down
Loading

0 comments on commit eb27efe

Please sign in to comment.