diff --git a/README.md b/README.md
index 924c712a..82e3fe58 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ print(entry.split, entry.patient)
| Name | Entries | Body region | Modality |
|:-----------------------------------------------------------------------------------------------------------------------------------|----------:|:------------------------------------|:-----------------------------------------------------------------------|
-| AMOS | 600 | Abdomen | CT, MRI |
+| AMOS | 2465 | Abdomen | CT, MRI |
| BIMCVCovid19 | 16335 | Chest | CT |
| BraTS2021 | 5880 | Head | MRI T1, MRI T1Gd, MRI T2, MRI T2-FLAIR |
| CC359 | 359 | Head | MRI T1 |
diff --git a/amid/amos/dataset.py b/amid/amos/dataset.py
index 24086793..9c567828 100644
--- a/amid/amos/dataset.py
+++ b/amid/amos/dataset.py
@@ -13,8 +13,10 @@
from .utils import label
-ARCHIVE_NAME = 'amos22.zip'
+ARCHIVE_NAME_SEG = 'amos22.zip'
ARCHIVE_ROOT_NAME = 'amos22'
+ERRORS = ['5514', '5437'] # these ids are damaged in the zip archives
+# TODO: add MRI
class AMOSBase(Source):
@@ -56,24 +58,44 @@ def _base(_root: Silent):
return Path(_root)
@meta
- def ids(_id2split):
- return sorted(_id2split)
-
- def image(i, _id2split, _base):
- file = f'images{_id2split[i]}/amos_{i}.nii.gz'
-
- with unpack(_base / ARCHIVE_NAME, file, ARCHIVE_ROOT_NAME, '.zip') as (unpacked, is_unpacked):
+ def ids(_id2split, _ids_unlabelled):
+ labelled = sorted(_id2split)
+ unlabelled = sorted(_ids_unlabelled)
+ return labelled + unlabelled
+
+ def image(i, _id2split, _base, _archive_name):
+ """Corresponding 3D image."""
+ if i in ERRORS:
+ return None # this image is damaged in the archive
+
+ archive_name, archive_root = _archive_name
+ if i in _id2split:
+ archive_name = ARCHIVE_NAME_SEG
+ archive_root = ARCHIVE_ROOT_NAME
+ file = f'images{_id2split[i]}/amos_{i}.nii.gz'
+ else:
+ file = f'amos_{i}.nii.gz'
+
+ with unpack(_base / archive_name, file, archive_root, '.zip') as (unpacked, is_unpacked):
if is_unpacked:
return np.asarray(nibabel.load(unpacked).dataobj)
else:
with open_nii_gz_file(unpacked) as image:
return np.asarray(image.dataobj)
- def affine(i, _id2split, _base):
- """The 4x4 matrix that gives the image's spatial orientation"""
- file = f'images{_id2split[i]}/amos_{i}.nii.gz'
-
- with unpack(_base / ARCHIVE_NAME, file, ARCHIVE_ROOT_NAME, '.zip') as (unpacked, is_unpacked):
+ def affine(i, _id2split, _base, _archive_name):
+ """The 4x4 matrix that gives the image's spatial orientation."""
+ if i in ERRORS:
+ return None # this image is damaged in the archive
+ archive_name, archive_root = _archive_name
+ if i in _id2split:
+ archive_name = ARCHIVE_NAME_SEG
+ archive_root = ARCHIVE_ROOT_NAME
+ file = f'images{_id2split[i]}/amos_{i}.nii.gz'
+ else:
+ file = f'amos_{i}.nii.gz'
+
+ with unpack(_base / archive_name, file, archive_root, '.zip') as (unpacked, is_unpacked):
if is_unpacked:
return nibabel.load(unpacked).affine
else:
@@ -81,17 +103,26 @@ def affine(i, _id2split, _base):
return image.affine
def mask(i, _id2split, _base):
- file = f'labels{_id2split[i]}/amos_{i}.nii.gz'
+ if i in _id2split:
+ file = f'labels{_id2split[i]}/amos_{i}.nii.gz'
+ else:
+ return
try:
- with unpack(_base / ARCHIVE_NAME, file, ARCHIVE_ROOT_NAME, '.zip') as (unpacked, is_unpacked):
+ with unpack(_base / ARCHIVE_NAME_SEG, file, ARCHIVE_ROOT_NAME, '.zip') as (unpacked, is_unpacked):
if is_unpacked:
return np.asarray(nibabel.load(unpacked).dataobj)
else:
with open_nii_gz_file(unpacked) as image:
return np.asarray(image.dataobj)
except FileNotFoundError:
- return None
+ return
+
+ def image_modality(i):
+ """Returns image modality, `CT` or `MRI`."""
+ if 500 < int(i) <= 600:
+ return 'MRI'
+ return 'CT'
# labels
@@ -107,7 +138,7 @@ def mask(i, _id2split, _base):
def _id2split(_base):
id2split = {}
- with ZipFile(_base / ARCHIVE_NAME) as zf:
+ with ZipFile(_base / ARCHIVE_NAME_SEG) as zf:
for x in zf.namelist():
if (len(x.strip('/').split('/')) == 3) and x.endswith('.nii.gz'):
file, split = x.split('/')[-1], x.split('/')[-2][-2:]
@@ -117,12 +148,46 @@ def _id2split(_base):
return id2split
+ def _ids_unlabelled(_base):
+ ids_unlabelled = []
+ for archive in [
+ 'amos22_unlabeled_ct_5000_5399.zip',
+ 'amos22_unlabeled_ct_5400_5899.zip',
+ 'amos22_unlabeled_ct_5900_6199.zip',
+ 'amos22_unlabeled_ct_6200_6899.zip',
+ ]:
+ with ZipFile(_base / archive) as zf:
+ for x in zf.namelist():
+ if x.endswith('.nii.gz'):
+ file = x.split('/')[-1]
+ id_ = file.split('.')[0].split('_')[-1]
+ ids_unlabelled.append(id_)
+ return ids_unlabelled
+
@lru_cache(None)
def _meta(_base):
- file = 'labeled_data_meta_0000_0599.csv'
-
- with unpack(_base, file) as (unpacked, _):
- return pd.read_csv(unpacked)
+ files = [
+ 'labeled_data_meta_0000_0599.csv',
+ 'unlabeled_data_meta_5400_5899.csv',
+ 'unlabeled_data_meta_5000_5399.csv',
+ 'unlabeled_data_meta_5900_6199.csv',
+ ]
+
+ dfs = []
+ for file in files:
+ with unpack(_base, file) as (unpacked, _):
+ dfs.append(pd.read_csv(unpacked))
+ return pd.concat(dfs)
+
+ def _archive_name(i):
+ if 5000 <= int(i) < 5400:
+ return 'amos22_unlabeled_ct_5000_5399.zip', 'amos_unlabeled_ct_5000_5399'
+ elif 5400 <= int(i) < 5900:
+ return 'amos22_unlabeled_ct_5400_5899.zip', 'amos_unlabeled_ct_5400_5899'
+ elif 5900 <= int(i) < 6200:
+ return 'amos22_unlabeled_ct_5900_6199.zip', 'amos22_unlabeled_ct_5900_6199'
+ else:
+ return 'amos22_unlabeled_ct_6200_6899.zip', 'amos22_unlabeled_6200_6899'
class SpacingFromAffine(Transform):
@@ -140,7 +205,7 @@ def spacing(affine):
license=licenses.CC_BY_40,
link='https://zenodo.org/record/7262581',
modality=('CT', 'MRI'),
- raw_data_size='23G',
+ raw_data_size='23G', # TODO: update size with unlabelled
prep_data_size='89,5G',
task='Supervised multi-modality abdominal multi-organ segmentation',
normalizers=[SpacingFromAffine()],
diff --git a/amid/amos/utils.py b/amid/amos/utils.py
index 5bd5fb14..9902f20b 100644
--- a/amid/amos/utils.py
+++ b/amid/amos/utils.py
@@ -7,6 +7,8 @@ def loader(column, i, _meta):
# ambiguous data in meta
if int(i) in [500, 600]:
return None
+ elif int(i) not in _meta['amos_id']:
+ return None
return _meta[_meta['amos_id'] == int(i)][column].item()
diff --git a/amid/data/amos.hash b/amid/data/amos.hash
index 4a31b6f1..d2d30959 100644
--- a/amid/data/amos.hash
+++ b/amid/data/amos.hash
@@ -1 +1 @@
-T:9311589f6b781685cc82e84bc605f7ea679cc2bf68a278b0de823ef57b1d7d9af42692059f19c51c31ca3f247bf0f9281d8487bdbb88feefd973b1cf62b54a6d
\ No newline at end of file
+T:6db6448dc4627ae833ad594f50152357582fac226c933b88bb754f1db422d1780b81d3375c2707d83023b52ff228112ffe6379e41debcbbedf0b1fb36c544eee
\ No newline at end of file