Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Magic #71

Merged
merged 37 commits into from
Aug 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
acc3db8
simplifying some datasets
maxme1 Dec 13, 2023
5d6ad30
started poking around
maxme1 Jan 10, 2024
9ddb09d
Merge branch 'dev' into magic
maxme1 Apr 15, 2024
f40d675
converted more datasets
maxme1 Apr 26, 2024
cf8b5e7
magic kits
May 14, 2024
a305f80
removed more magic
maxme1 May 14, 2024
ff51475
totalsegmentator
maxme1 May 15, 2024
22ebc5d
midrc
maxme1 May 15, 2024
7185acb
mood
maxme1 May 20, 2024
5ead92b
fields
maxme1 May 20, 2024
3df8846
cancer500 and nlst
maxme1 Jun 4, 2024
d7f1d9c
typo
maxme1 Jun 4, 2024
9e54818
ids
maxme1 Jun 5, 2024
30db1d3
bimcv
maxme1 Jun 5, 2024
253c05b
typo
maxme1 Jun 12, 2024
326c99f
lidc
evgenyasoboleva Jun 18, 2024
1cb527e
removed deprecated fields
maxme1 Jun 19, 2024
a6cab40
check on init
maxme1 Jun 19, 2024
a1c9f6e
added type annotations
evgenyasoboleva Jun 19, 2024
5556d59
Merge branch 'magic' of https://github.com/neuro-ml/amid into magic
evgenyasoboleva Jun 19, 2024
76ab86a
added more type annotations
evgenyasoboleva Jun 19, 2024
daad7ac
typo
maxme1 Jun 19, 2024
a6bac86
removed an unneeded branch
maxme1 Jun 19, 2024
631dea4
more datasets converted
maxme1 Jun 23, 2024
f1debea
more migration
maxme1 Jun 23, 2024
4ba7f4c
more migration
maxme1 Jun 23, 2024
f20c25a
removed the checksum class
maxme1 Jun 23, 2024
2b23aa0
mapping
maxme1 Jun 23, 2024
0aeb205
name clash
maxme1 Jun 23, 2024
bff6163
typo
maxme1 Jun 25, 2024
3197ea0
fields
maxme1 Jul 1, 2024
0b1f603
better errors
maxme1 Jul 2, 2024
2db8b8a
fixed typos
maxme1 Jul 2, 2024
71e3dc1
removed unneeded reqs
maxme1 Jul 2, 2024
47fde6c
removed unneeded imports
maxme1 Jul 3, 2024
58c30c7
Merge remote-tracking branch 'refs/remotes/origin/master' into magic
Aug 4, 2024
652e3a9
bumped
Aug 4, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2022-2023 NeuroML Group
Copyright (c) 2022-2024 NeuroML Group

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
80 changes: 37 additions & 43 deletions README.md

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion amid/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from .egd import EGD
from .flare2022 import FLARE2022
from .hcp import HCP
from .internals import CacheColumns, CacheToDisk
from .lidc import LIDC
from .lits import LiTS
from .liver_medseg import LiverMedseg
Expand Down
2 changes: 1 addition & 1 deletion amid/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.13.0'
__version__ = '0.14.0'
166 changes: 88 additions & 78 deletions amid/amos/dataset.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,33 @@
from functools import lru_cache
from pathlib import Path
from functools import cached_property
from zipfile import ZipFile

import nibabel
import numpy as np
import pandas as pd
from connectome import Source, Transform, meta
from connectome.interface.nodes import Silent
from jboc import collect, composed

from ..internals import licenses, normalize
from ..internals import Dataset, field, licenses, register
from ..utils import open_nii_gz_file, unpack
from .utils import label


ARCHIVE_NAME_SEG = 'amos22.zip'
ARCHIVE_ROOT_NAME = 'amos22'
ERRORS = ['5514', '5437'] # these ids are damaged in the zip archives


# TODO: add MRI


class AMOSBase(Source):
@register(
body_region='Abdomen',
license=licenses.CC_BY_40,
link='https://zenodo.org/record/7262581',
modality=('CT', 'MRI'),
raw_data_size='23G', # TODO: update size with unlabelled
prep_data_size='89,5G',
task='Supervised multi-modality abdominal multi-organ segmentation',
)
class AMOS(Dataset):
"""
AMOS provides 500 CT and 100 MRI scans collected from multi-center, multi-vendor, multi-modality, multi-phase,
multi-disease patients, each with voxel-level annotations of 15 abdominal organs, providing challenging examples
Expand Down Expand Up @@ -52,64 +60,61 @@ class AMOSBase(Source):
versatile medical image segmentation [Data set]. Zenodo. https://doi.org/10.5281/zenodo.7262581
"""

_root: str = None

def _base(_root: Silent):
return Path(_root)

@meta
def ids(_id2split, _ids_unlabelled):
labelled = sorted(_id2split)
unlabelled = sorted(_ids_unlabelled)
@property
def ids(self):
labelled = sorted(self._id2split)
unlabelled = sorted(self._ids_unlabelled)
return labelled + unlabelled

def image(i, _id2split, _base, _archive_name):
@field
def image(self, i):
"""Corresponding 3D image."""
if i in ERRORS:
return None # this image is damaged in the archive

archive_name, archive_root = _archive_name
if i in _id2split:
archive_name, archive_root = self._archive_name(i)
if i in self._id2split:
archive_name = ARCHIVE_NAME_SEG
archive_root = ARCHIVE_ROOT_NAME
file = f'images{_id2split[i]}/amos_{i}.nii.gz'
file = f'images{self._id2split[i]}/amos_{i}.nii.gz'
else:
file = f'amos_{i}.nii.gz'

with unpack(_base / archive_name, file, archive_root, '.zip') as (unpacked, is_unpacked):
with unpack(self.root / archive_name, file, archive_root, '.zip') as (unpacked, is_unpacked):
if is_unpacked:
return np.asarray(nibabel.load(unpacked).dataobj)
else:
with open_nii_gz_file(unpacked) as image:
return np.asarray(image.dataobj)

def affine(i, _id2split, _base, _archive_name):
@field
def affine(self, i):
"""The 4x4 matrix that gives the image's spatial orientation."""
if i in ERRORS:
return None # this image is damaged in the archive
archive_name, archive_root = _archive_name
if i in _id2split:
archive_name, archive_root = self._archive_name(i)
if i in self._id2split:
archive_name = ARCHIVE_NAME_SEG
archive_root = ARCHIVE_ROOT_NAME
file = f'images{_id2split[i]}/amos_{i}.nii.gz'
file = f'images{self._id2split[i]}/amos_{i}.nii.gz'
else:
file = f'amos_{i}.nii.gz'

with unpack(_base / archive_name, file, archive_root, '.zip') as (unpacked, is_unpacked):
with unpack(self.root / archive_name, file, archive_root, '.zip') as (unpacked, is_unpacked):
if is_unpacked:
return nibabel.load(unpacked).affine
else:
with open_nii_gz_file(unpacked) as image:
return image.affine

def mask(i, _id2split, _base):
if i in _id2split:
file = f'labels{_id2split[i]}/amos_{i}.nii.gz'
else:
@field
def mask(self, i):
if i not in self._id2split:
return

file = f'labels{self._id2split[i]}/amos_{i}.nii.gz'
try:
with unpack(_base / ARCHIVE_NAME_SEG, file, ARCHIVE_ROOT_NAME, '.zip') as (unpacked, is_unpacked):
with unpack(self.root / ARCHIVE_NAME_SEG, file, ARCHIVE_ROOT_NAME, '.zip') as (unpacked, is_unpacked):
if is_unpacked:
return np.asarray(nibabel.load(unpacked).dataobj)
else:
Expand All @@ -118,54 +123,71 @@ def mask(i, _id2split, _base):
except FileNotFoundError:
return

def image_modality(i):
@field
def image_modality(self, i):
"""Returns image modality, `CT` or `MRI`."""
if 500 < int(i) <= 600:
return 'MRI'
return 'CT'

# labels

birth_date = label("Patient's Birth Date")
sex = label("Patient's Sex")
age = label("Patient's Age")
manufacturer_model = label("Manufacturer's Model Name")
manufacturer = label('Manufacturer')
acquisition_date = label('Acquisition Date')
site = label('Site')

@lru_cache(None)
def _id2split(_base):
id2split = {}

with ZipFile(_base / ARCHIVE_NAME_SEG) as zf:
@field
def birth_date(self, i):
return self._label(i, "Patient's Birth Date")

@field
def sex(self, i):
return self._label(i, "Patient's Sex")

@field
def age(self, i):
return self._label(i, "Patient's Age")

@field
def manufacturer_model(self, i):
return self._label(i, "Manufacturer's Model Name")

@field
def manufacturer(self, i):
return self._label(i, 'Manufacturer')

@field
def acquisition_date(self, i):
return self._label(i, 'Acquisition Date')

@field
def site(self, i):
return self._label(i, 'Site')

@cached_property
@composed(dict)
def _id2split(self):
with ZipFile(self.root / ARCHIVE_NAME_SEG) as zf:
for x in zf.namelist():
if (len(x.strip('/').split('/')) == 3) and x.endswith('.nii.gz'):
file, split = x.split('/')[-1], x.split('/')[-2][-2:]
id_ = file.split('.')[0].split('_')[-1]

id2split[id_] = split
yield id_, split

return id2split

def _ids_unlabelled(_base):
ids_unlabelled = []
@cached_property
@collect
def _ids_unlabelled(self):
for archive in [
'amos22_unlabeled_ct_5000_5399.zip',
'amos22_unlabeled_ct_5400_5899.zip',
'amos22_unlabeled_ct_5900_6199.zip',
'amos22_unlabeled_ct_6200_6899.zip',
]:
with ZipFile(_base / archive) as zf:
with ZipFile(self.root / archive) as zf:
for x in zf.namelist():
if x.endswith('.nii.gz'):
file = x.split('/')[-1]
id_ = file.split('.')[0].split('_')[-1]
ids_unlabelled.append(id_)
return ids_unlabelled
yield id_

@lru_cache(None)
def _meta(_base):
@cached_property
def _meta(self):
files = [
'labeled_data_meta_0000_0599.csv',
'unlabeled_data_meta_5400_5899.csv',
Expand All @@ -175,11 +197,12 @@ def _meta(_base):

dfs = []
for file in files:
with unpack(_base, file) as (unpacked, _):
with unpack(self.root, file) as (unpacked, _):
dfs.append(pd.read_csv(unpacked))
return pd.concat(dfs)

def _archive_name(i):
@staticmethod
def _archive_name(self, i):
if 5000 <= int(i) < 5400:
return 'amos22_unlabeled_ct_5000_5399.zip', 'amos_unlabeled_ct_5000_5399'
elif 5400 <= int(i) < 5900:
Expand All @@ -189,24 +212,11 @@ def _archive_name(i):
else:
return 'amos22_unlabeled_ct_6200_6899.zip', 'amos22_unlabeled_6200_6899'

def _label(self, i, column):
# ambiguous data in meta
if int(i) in [500, 600]:
return None
elif int(i) not in self._meta['amos_id']:
return None

class SpacingFromAffine(Transform):
__inherit__ = True

def spacing(affine):
return nibabel.affines.voxel_sizes(affine)


AMOS = normalize(
AMOSBase,
'AMOS',
'amos',
body_region='Abdomen',
license=licenses.CC_BY_40,
link='https://zenodo.org/record/7262581',
modality=('CT', 'MRI'),
raw_data_size='23G', # TODO: update size with unlabelled
prep_data_size='89,5G',
task='Supervised multi-modality abdominal multi-organ segmentation',
normalizers=[SpacingFromAffine()],
)
return self._meta[self._meta['amos_id'] == int(i)][column].item()
17 changes: 0 additions & 17 deletions amid/amos/utils.py

This file was deleted.

Loading
Loading