Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Amos #66

Merged
merged 11 commits into from
Oct 17, 2023
Merged

Amos #66

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ print(entry.split, entry.patient)

| Name | Entries | Body region | Modality |
|:-----------------------------------------------------------------------------------------------------------------------------------|----------:|:------------------------------------|:-----------------------------------------------------------------------|
| <a href="https://neuro-ml.github.io/amid/latest/datasets-api/#amid.amos.dataset.AMOS">AMOS</a> | 600 | Abdomen | CT, MRI |
| <a href="https://neuro-ml.github.io/amid/latest/datasets-api/#amid.amos.dataset.AMOS">AMOS</a> | 2465 | Abdomen | CT, MRI |
| <a href="https://neuro-ml.github.io/amid/latest/datasets-api/#amid.bimcv.BIMCVCovid19">BIMCVCovid19</a> | 16335 | Chest | CT |
| <a href="https://neuro-ml.github.io/amid/latest/datasets-api/#amid.brats2021.BraTS2021">BraTS2021</a> | 5880 | Head | MRI T1, MRI T1Gd, MRI T2, MRI T2-FLAIR |
| <a href="https://neuro-ml.github.io/amid/latest/datasets-api/#amid.cc359.dataset.CC359">CC359</a> | 359 | Head | MRI T1 |
Expand Down
109 changes: 87 additions & 22 deletions amid/amos/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
from .utils import label


ARCHIVE_NAME = 'amos22.zip'
ARCHIVE_NAME_SEG = 'amos22.zip'
ARCHIVE_ROOT_NAME = 'amos22'
ERRORS = ['5514', '5437'] # these ids are damaged in the zip archives
# TODO: add MRI


class AMOSBase(Source):
Expand Down Expand Up @@ -56,42 +58,71 @@ def _base(_root: Silent):
return Path(_root)

@meta
def ids(_id2split):
return sorted(_id2split)

def image(i, _id2split, _base):
file = f'images{_id2split[i]}/amos_{i}.nii.gz'

with unpack(_base / ARCHIVE_NAME, file, ARCHIVE_ROOT_NAME, '.zip') as (unpacked, is_unpacked):
def ids(_id2split, _ids_unlabelled):
labelled = sorted(_id2split)
unlabelled = sorted(_ids_unlabelled)
return labelled + unlabelled

def image(i, _id2split, _base, _archive_name):
"""Corresponding 3D image."""
if i in ERRORS:
return None # this image is damaged in the archive

archive_name, archive_root = _archive_name
if i in _id2split:
archive_name = ARCHIVE_NAME_SEG
archive_root = ARCHIVE_ROOT_NAME
file = f'images{_id2split[i]}/amos_{i}.nii.gz'
else:
file = f'amos_{i}.nii.gz'

with unpack(_base / archive_name, file, archive_root, '.zip') as (unpacked, is_unpacked):
if is_unpacked:
return np.asarray(nibabel.load(unpacked).dataobj)
else:
with open_nii_gz_file(unpacked) as image:
return np.asarray(image.dataobj)

def affine(i, _id2split, _base):
"""The 4x4 matrix that gives the image's spatial orientation"""
file = f'images{_id2split[i]}/amos_{i}.nii.gz'

with unpack(_base / ARCHIVE_NAME, file, ARCHIVE_ROOT_NAME, '.zip') as (unpacked, is_unpacked):
def affine(i, _id2split, _base, _archive_name):
"""The 4x4 matrix that gives the image's spatial orientation."""
if i in ERRORS:
return None # this image is damaged in the archive
archive_name, archive_root = _archive_name
if i in _id2split:
archive_name = ARCHIVE_NAME_SEG
archive_root = ARCHIVE_ROOT_NAME
file = f'images{_id2split[i]}/amos_{i}.nii.gz'
else:
file = f'amos_{i}.nii.gz'

with unpack(_base / archive_name, file, archive_root, '.zip') as (unpacked, is_unpacked):
if is_unpacked:
return nibabel.load(unpacked).affine
else:
with open_nii_gz_file(unpacked) as image:
return image.affine

def mask(i, _id2split, _base):
file = f'labels{_id2split[i]}/amos_{i}.nii.gz'
if i in _id2split:
file = f'labels{_id2split[i]}/amos_{i}.nii.gz'
else:
return

try:
with unpack(_base / ARCHIVE_NAME, file, ARCHIVE_ROOT_NAME, '.zip') as (unpacked, is_unpacked):
with unpack(_base / ARCHIVE_NAME_SEG, file, ARCHIVE_ROOT_NAME, '.zip') as (unpacked, is_unpacked):
if is_unpacked:
return np.asarray(nibabel.load(unpacked).dataobj)
else:
with open_nii_gz_file(unpacked) as image:
return np.asarray(image.dataobj)
except FileNotFoundError:
return None
return

def image_modality(i):
"""Returns image modality, `CT` or `MRI`."""
if 500 < int(i) <= 600:
return 'MRI'
return 'CT'

# labels

Expand All @@ -107,7 +138,7 @@ def mask(i, _id2split, _base):
def _id2split(_base):
id2split = {}

with ZipFile(_base / ARCHIVE_NAME) as zf:
with ZipFile(_base / ARCHIVE_NAME_SEG) as zf:
for x in zf.namelist():
if (len(x.strip('/').split('/')) == 3) and x.endswith('.nii.gz'):
file, split = x.split('/')[-1], x.split('/')[-2][-2:]
Expand All @@ -117,12 +148,46 @@ def _id2split(_base):

return id2split

def _ids_unlabelled(_base):
ids_unlabelled = []
for archive in [
'amos22_unlabeled_ct_5000_5399.zip',
'amos22_unlabeled_ct_5400_5899.zip',
'amos22_unlabeled_ct_5900_6199.zip',
'amos22_unlabeled_ct_6200_6899.zip',
]:
with ZipFile(_base / archive) as zf:
for x in zf.namelist():
if x.endswith('.nii.gz'):
file = x.split('/')[-1]
id_ = file.split('.')[0].split('_')[-1]
ids_unlabelled.append(id_)
return ids_unlabelled

@lru_cache(None)
def _meta(_base):
file = 'labeled_data_meta_0000_0599.csv'

with unpack(_base, file) as (unpacked, _):
return pd.read_csv(unpacked)
files = [
'labeled_data_meta_0000_0599.csv',
'unlabeled_data_meta_5400_5899.csv',
'unlabeled_data_meta_5000_5399.csv',
'unlabeled_data_meta_5900_6199.csv',
]

dfs = []
for file in files:
with unpack(_base, file) as (unpacked, _):
dfs.append(pd.read_csv(unpacked))
return pd.concat(dfs)

def _archive_name(i):
if 5000 <= int(i) < 5400:
return 'amos22_unlabeled_ct_5000_5399.zip', 'amos_unlabeled_ct_5000_5399'
elif 5400 <= int(i) < 5900:
return 'amos22_unlabeled_ct_5400_5899.zip', 'amos_unlabeled_ct_5400_5899'
elif 5900 <= int(i) < 6200:
return 'amos22_unlabeled_ct_5900_6199.zip', 'amos22_unlabeled_ct_5900_6199'
else:
return 'amos22_unlabeled_ct_6200_6899.zip', 'amos22_unlabeled_6200_6899'


class SpacingFromAffine(Transform):
Expand All @@ -140,7 +205,7 @@ def spacing(affine):
license=licenses.CC_BY_40,
link='https://zenodo.org/record/7262581',
modality=('CT', 'MRI'),
raw_data_size='23G',
raw_data_size='23G', # TODO: update size with unlabelled
prep_data_size='89,5G',
task='Supervised multi-modality abdominal multi-organ segmentation',
normalizers=[SpacingFromAffine()],
Expand Down
2 changes: 2 additions & 0 deletions amid/amos/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ def loader(column, i, _meta):
# ambiguous data in meta
if int(i) in [500, 600]:
return None
elif int(i) not in _meta['amos_id']:
return None

return _meta[_meta['amos_id'] == int(i)][column].item()

Expand Down
2 changes: 1 addition & 1 deletion amid/data/amos.hash
Original file line number Diff line number Diff line change
@@ -1 +1 @@
T:9311589f6b781685cc82e84bc605f7ea679cc2bf68a278b0de823ef57b1d7d9af42692059f19c51c31ca3f247bf0f9281d8487bdbb88feefd973b1cf62b54a6d
T:6db6448dc4627ae833ad594f50152357582fac226c933b88bb754f1db422d1780b81d3375c2707d83023b52ff228112ffe6379e41debcbbedf0b1fb36c544eee