From aa359d1e57aba54cf6ba4d7ea9b7a0c7011fc20f Mon Sep 17 00:00:00 2001 From: Alexey Shevtsov Date: Thu, 3 Oct 2024 17:03:51 +0300 Subject: [PATCH 1/9] added CURVAS dataset interface --- amid/curvas.py | 105 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 amid/curvas.py diff --git a/amid/curvas.py b/amid/curvas.py new file mode 100644 index 0000000..f2e61ad --- /dev/null +++ b/amid/curvas.py @@ -0,0 +1,105 @@ +import gzip +import zipfile +from zipfile import ZipFile + +import nibabel +import numpy as np + +from .internals import Dataset, field, register + + +@register( + body_region='Abdomen', + license=None, + link='https://zenodo.org/records/13767408', + modality='CT', + # TODO: prep_data_size='G', + raw_data_size='30G', + task='Abdominal organ pathologies segmentation', +) +class CURVAS(Dataset): + """ + Pancreas, liver and kidney cysts segmentation from multi-rater annotated data. + + The dataset was used at the MICCAI 2024 CURVAS challenge. + + Parameters + ---------- + root : str, Path, optional + path to the folder containing the raw downloaded archives. + If not provided, the cache is assumed to be already populated. + + Notes + ----- + Download link: https://zenodo.org/records/13767408 + + The `root` folder should contain the three downloaded .zip archives, namely: + `training_set.zip`, `validation_set.zip` and `testing_set.zip`. + + Examples + -------- + >>> # Place the downloaded folders in any folder and pass the path to the constructor: + >>> ds = CURVAS(root='/path/to/downloaded/data/folder/') + >>> print(len(ds.ids)) + # 90 + >>> print(ds.image(ds.ids[5]).shape) + # (512, 512, 1045) + >>> print(ds.mask(ds.ids[35]).shape) + # (512, 512, 992) + + """ + + @property + def ids(self): + def _extract(split): + archive = self.root / f'{split}_set.zip' + with ZipFile(archive) as zf: + namelist = [x for x in zf.namelist() if len(x.rstrip('/').split('/')) == 2] + ids = [f'{x.split("/")[1]}-{split}' for x in namelist] + return ids + + return sorted([ + *_extract('training'), # 20 Training cases + *_extract('validation'), # 5 Validation cases + *_extract('testing'), # 65 Testing cases + ]) + + def _file(self, i, obj): + uid, split = i.split('-') + + archive = self.root / f'{split}_set.zip' + with ZipFile(archive) as zf: + file = f'{split}_set/{uid}/{obj}.nii.gz' + return zipfile.Path(archive, file) + + raise ValueError(f'Id "{i}" not found') + + @field + def image(self, i) -> np.ndarray: + with self._file(i, 'image').open('rb') as opened: + with gzip.GzipFile(fileobj=opened) as nii: + nii = nibabel.FileHolder(fileobj=nii) + image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii}) + return np.asarray(image.dataobj) + + @field + def affine(self, i) -> np.ndarray: + """The 4x4 matrix that gives the image's spatial orientation""" + with self._file(i, 'image').open('rb') as opened: + with gzip.GzipFile(fileobj=opened) as nii: + nii = nibabel.FileHolder(fileobj=nii) + image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii}) + return image.affine + + @field + def masks(self, i) -> dict: + masks = {} + for x in range(1, 4): + with self._file(i, f'annotation_{x}').open('rb') as opened: + with gzip.GzipFile(fileobj=opened) as nii: + nii = nibabel.FileHolder(fileobj=nii) + image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii}) + + masks[f'annotation_{x}'] = np.asarray(image.dataobj) + + return masks From 85ce9ea7e6dc15ec6cfa842fc61ca035ec6a7acc Mon Sep 17 00:00:00 2001 From: Alexey Shevtsov Date: Thu, 3 Oct 2024 17:09:10 +0300 Subject: [PATCH 2/9] simplified --- amid/curvas.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/amid/curvas.py b/amid/curvas.py index f2e61ad..c825d4f 100644 --- a/amid/curvas.py +++ b/amid/curvas.py @@ -33,7 +33,7 @@ class CURVAS(Dataset): ----- Download link: https://zenodo.org/records/13767408 - The `root` folder should contain the three downloaded .zip archives, namely: + The `root` folder should contain the three downloaded .zip archives, namely: `training_set.zip`, `validation_set.zip` and `testing_set.zip`. Examples @@ -68,11 +68,9 @@ def _file(self, i, obj): uid, split = i.split('-') archive = self.root / f'{split}_set.zip' - with ZipFile(archive) as zf: - file = f'{split}_set/{uid}/{obj}.nii.gz' - return zipfile.Path(archive, file) + file = f'{split}_set/{uid}/{obj}.nii.gz' - raise ValueError(f'Id "{i}" not found') + return zipfile.Path(archive, file) @field def image(self, i) -> np.ndarray: From 2c1674e80a1253365a67e5bf7de0912b9259c735 Mon Sep 17 00:00:00 2001 From: Alexey Shevtsov Date: Thu, 3 Oct 2024 19:16:04 +0300 Subject: [PATCH 3/9] reformatted --- amid/curvas.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/amid/curvas.py b/amid/curvas.py index c825d4f..a73e036 100644 --- a/amid/curvas.py +++ b/amid/curvas.py @@ -58,11 +58,13 @@ def _extract(split): ids = [f'{x.split("/")[1]}-{split}' for x in namelist] return ids - return sorted([ - *_extract('training'), # 20 Training cases - *_extract('validation'), # 5 Validation cases - *_extract('testing'), # 65 Testing cases - ]) + return sorted( + [ + *_extract('training'), # 20 Training cases + *_extract('validation'), # 5 Validation cases + *_extract('testing'), # 65 Testing cases + ] + ) def _file(self, i, obj): uid, split = i.split('-') From 5e7da7634c98af5890684bbfdab054564034a892 Mon Sep 17 00:00:00 2001 From: Alexey Shevtsov Date: Thu, 3 Oct 2024 19:37:08 +0300 Subject: [PATCH 4/9] final touch --- amid/curvas.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/amid/curvas.py b/amid/curvas.py index a73e036..35bd331 100644 --- a/amid/curvas.py +++ b/amid/curvas.py @@ -5,15 +5,15 @@ import nibabel import numpy as np -from .internals import Dataset, field, register +from .internals import Dataset, field, licenses, register @register( body_region='Abdomen', - license=None, + license=licenses.CC_BY_40, link='https://zenodo.org/records/13767408', modality='CT', - # TODO: prep_data_size='G', + prep_data_size='30G', raw_data_size='30G', task='Abdominal organ pathologies segmentation', ) From 7047767c86b0d1f2bd3d32e9af1e80de935a1f8b Mon Sep 17 00:00:00 2001 From: Alexey Shevtsov Date: Thu, 3 Oct 2024 19:39:05 +0300 Subject: [PATCH 5/9] docs --- README.md | 1 + docs/datasets-api.md | 2 ++ docs/datasets.md | 1 + 3 files changed, 4 insertions(+) diff --git a/README.md b/README.md index a9c5e91..d517ce2 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,7 @@ print(entry.split, entry.patient) | CLDetection2023 | 400 | Head | X-ray | | CRLM | 197 | Abdomen | CT, SEG | | CT_ICH | 75 | Head | CT | +| CURVAS | 90 | Abdomen | CT | | CrossMoDA | 484 | Head | MRI T1c, MRI T2hr | | DeepLesion | 20094 | Abdomen, Thorax | CT | | EGD | 3096 | Head | FLAIR, MRI T1, MRI T1GD, MRI T2 | diff --git a/docs/datasets-api.md b/docs/datasets-api.md index 745b983..0d57ae7 100644 --- a/docs/datasets-api.md +++ b/docs/datasets-api.md @@ -14,6 +14,8 @@ ::: amid.ct_ich.CT_ICH +::: amid.curvas.CURVAS + ::: amid.crossmoda.CrossMoDA ::: amid.deeplesion.DeepLesion diff --git a/docs/datasets.md b/docs/datasets.md index 1afeeef..b4ab379 100644 --- a/docs/datasets.md +++ b/docs/datasets.md @@ -9,6 +9,7 @@ | CLDetection2023 | 400 | Head | CC BY-NC 4.0 | X-ray | 1.8G | 1.5G | Keypoint detection | Source | | CRLM | 197 | Abdomen | CC BY 4.0 | CT, SEG | 11G | 11G | Segmentation, Classification | Source | | CT_ICH | 75 | Head | PhysioNet Restricted Health Data License 1.5.0 | CT | 661M | 2,8G | Intracranial hemorrhage segmentation | Source | +| CURVAS | 90 | Abdomen | CC BY 4.0 | CT | 30G | 30G | Abdominal organ pathologies segmentation | Source | | CrossMoDA | 484 | Head | CC BY-NC-SA 4.0 | MRI T1c, MRI T2hr | 8,96G | 17G | Segmentation, Classification, Domain Adaptation | Source | | DeepLesion | 20094 | Abdomen, Thorax | | CT | 259G | 259G | Localisation, Detection, Classification | Source | | EGD | 3096 | Head | EGD data license | FLAIR, MRI T1, MRI T1GD, MRI T2 | 107,49G | 40G | Segmentation | Source | From 64c3db3eb9d42d36c6a58a18ec73a20f24c3762a Mon Sep 17 00:00:00 2001 From: Alexey Shevtsov Date: Thu, 3 Oct 2024 19:41:51 +0300 Subject: [PATCH 6/9] removed populate from docs --- docs/CONTRIBUTING.md | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index b948d8e..2ba8df8 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -101,25 +101,9 @@ id_ = dataset.ids[0] print(dataset.image(id_).shape) ``` -6\. Populate the dataset: - -```shell -amid populate LiTS /shared/data/LiTS -``` - -!!! tip - Use the option `--n-jobs` to speed up the process. - -!!! tip - Use the option `--help` for a more detailed information on this command. - -7\. If there is no error, the file `amid/data/lits.hash` will appear (the name depends on `short_name` given to `normalize`). - -8\. Check the codestyle using the `lint.sh` script in the repository's root and make changes if flake8 is not happy: +6\. Check the codestyle using the `lint.sh` script in the repository's root and make changes if flake8 is not happy: ```shell pip install -r lint-requirements.txt # only for the first time ./lint.sh -``` - -9\. Commit all the files you added, including the `*.hash` one. +``` \ No newline at end of file From d89a2584bbcfb0249ad33818738ca1bda9a34601 Mon Sep 17 00:00:00 2001 From: Alexey Shevtsov Date: Fri, 4 Oct 2024 16:54:08 +0300 Subject: [PATCH 7/9] space-aware datatypes --- amid/curvas.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/amid/curvas.py b/amid/curvas.py index 35bd331..6108197 100644 --- a/amid/curvas.py +++ b/amid/curvas.py @@ -1,5 +1,6 @@ import gzip import zipfile +from typing import Dict from zipfile import ZipFile import nibabel @@ -80,7 +81,7 @@ def image(self, i) -> np.ndarray: with gzip.GzipFile(fileobj=opened) as nii: nii = nibabel.FileHolder(fileobj=nii) image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii}) - return np.asarray(image.dataobj) + return np.asarray(image.dataobj).astype(np.int16) @field def affine(self, i) -> np.ndarray: @@ -92,7 +93,7 @@ def affine(self, i) -> np.ndarray: return image.affine @field - def masks(self, i) -> dict: + def masks(self, i) -> Dict[str, np.ndarray]: masks = {} for x in range(1, 4): with self._file(i, f'annotation_{x}').open('rb') as opened: @@ -100,6 +101,6 @@ def masks(self, i) -> dict: nii = nibabel.FileHolder(fileobj=nii) image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii}) - masks[f'annotation_{x}'] = np.asarray(image.dataobj) + masks[f'annotation_{x}'] = np.asarray(image.dataobj).astype(np.uint8) return masks From bfa11f55c06b1270d1de8e15c285e0fadb6e8989 Mon Sep 17 00:00:00 2001 From: Alexey Shevtsov Date: Fri, 4 Oct 2024 18:45:56 +0300 Subject: [PATCH 8/9] oi innit bruv --- amid/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/amid/__init__.py b/amid/__init__.py index da29d9b..425261c 100644 --- a/amid/__init__.py +++ b/amid/__init__.py @@ -10,6 +10,7 @@ from .covid_1110 import MoscowCovid1110 from .crlm import CRLM from .crossmoda import CrossMoDA +from .curvas import CURVAS from .ct_ich import CT_ICH from .deeplesion import DeepLesion from .egd import EGD From 0fffbf0cb936dd175403680bde7b00daf6259c21 Mon Sep 17 00:00:00 2001 From: Alexey Shevtsov Date: Fri, 4 Oct 2024 18:47:56 +0300 Subject: [PATCH 9/9] order --- amid/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/amid/__init__.py b/amid/__init__.py index 425261c..b26e169 100644 --- a/amid/__init__.py +++ b/amid/__init__.py @@ -10,8 +10,8 @@ from .covid_1110 import MoscowCovid1110 from .crlm import CRLM from .crossmoda import CrossMoDA -from .curvas import CURVAS from .ct_ich import CT_ICH +from .curvas import CURVAS from .deeplesion import DeepLesion from .egd import EGD from .flare2022 import FLARE2022