From aa359d1e57aba54cf6ba4d7ea9b7a0c7011fc20f Mon Sep 17 00:00:00 2001
From: Alexey Shevtsov <a.shevtsov@ira-labs.com>
Date: Thu, 3 Oct 2024 17:03:51 +0300
Subject: [PATCH 1/9] added CURVAS dataset interface

---
 amid/curvas.py | 105 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 105 insertions(+)
 create mode 100644 amid/curvas.py

diff --git a/amid/curvas.py b/amid/curvas.py
new file mode 100644
index 0000000..f2e61ad
--- /dev/null
+++ b/amid/curvas.py
@@ -0,0 +1,105 @@
+import gzip
+import zipfile
+from zipfile import ZipFile
+
+import nibabel
+import numpy as np
+
+from .internals import Dataset, field, register
+
+
+@register(
+    body_region='Abdomen',
+    license=None,
+    link='https://zenodo.org/records/13767408',
+    modality='CT',
+    # TODO: prep_data_size='G',
+    raw_data_size='30G',
+    task='Abdominal organ pathologies segmentation',
+)
+class CURVAS(Dataset):
+    """
+    Pancreas, liver and kidney cysts segmentation from multi-rater annotated data.
+
+    The dataset was used at the MICCAI 2024 CURVAS challenge.
+
+    Parameters
+    ----------
+    root : str, Path, optional
+        path to the folder containing the raw downloaded archives.
+        If not provided, the cache is assumed to be already populated.
+
+    Notes
+    -----
+    Download link: https://zenodo.org/records/13767408
+
+    The `root` folder should contain the three downloaded .zip archives, namely: 
+    `training_set.zip`, `validation_set.zip` and `testing_set.zip`.
+
+    Examples
+    --------
+    >>> # Place the downloaded folders in any folder and pass the path to the constructor:
+    >>> ds = CURVAS(root='/path/to/downloaded/data/folder/')
+    >>> print(len(ds.ids))
+    # 90
+    >>> print(ds.image(ds.ids[5]).shape)
+    # (512, 512, 1045)
+    >>> print(ds.mask(ds.ids[35]).shape)
+    # (512, 512, 992)
+
+    """
+
+    @property
+    def ids(self):
+        def _extract(split):
+            archive = self.root / f'{split}_set.zip'
+            with ZipFile(archive) as zf:
+                namelist = [x for x in zf.namelist() if len(x.rstrip('/').split('/')) == 2]
+                ids = [f'{x.split("/")[1]}-{split}' for x in namelist]
+                return ids
+
+        return sorted([
+            *_extract('training'),    # 20 Training   cases
+            *_extract('validation'),  # 5  Validation cases
+            *_extract('testing'),     # 65 Testing    cases
+        ])
+
+    def _file(self, i, obj):
+        uid, split = i.split('-')
+
+        archive = self.root / f'{split}_set.zip'
+        with ZipFile(archive) as zf:
+            file = f'{split}_set/{uid}/{obj}.nii.gz'
+            return zipfile.Path(archive, file)
+
+        raise ValueError(f'Id "{i}" not found')
+
+    @field
+    def image(self, i) -> np.ndarray:
+        with self._file(i, 'image').open('rb') as opened:
+            with gzip.GzipFile(fileobj=opened) as nii:
+                nii = nibabel.FileHolder(fileobj=nii)
+                image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
+                return np.asarray(image.dataobj)
+
+    @field
+    def affine(self, i) -> np.ndarray:
+        """The 4x4 matrix that gives the image's spatial orientation"""
+        with self._file(i, 'image').open('rb') as opened:
+            with gzip.GzipFile(fileobj=opened) as nii:
+                nii = nibabel.FileHolder(fileobj=nii)
+                image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
+                return image.affine
+
+    @field
+    def masks(self, i) -> dict:
+        masks = {}
+        for x in range(1, 4):
+            with self._file(i, f'annotation_{x}').open('rb') as opened:
+                with gzip.GzipFile(fileobj=opened) as nii:
+                    nii = nibabel.FileHolder(fileobj=nii)
+                    image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
+
+                    masks[f'annotation_{x}'] = np.asarray(image.dataobj)
+
+        return masks

From 85ce9ea7e6dc15ec6cfa842fc61ca035ec6a7acc Mon Sep 17 00:00:00 2001
From: Alexey Shevtsov <a.shevtsov@ira-labs.com>
Date: Thu, 3 Oct 2024 17:09:10 +0300
Subject: [PATCH 2/9] simplified

---
 amid/curvas.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/amid/curvas.py b/amid/curvas.py
index f2e61ad..c825d4f 100644
--- a/amid/curvas.py
+++ b/amid/curvas.py
@@ -33,7 +33,7 @@ class CURVAS(Dataset):
     -----
     Download link: https://zenodo.org/records/13767408
 
-    The `root` folder should contain the three downloaded .zip archives, namely: 
+    The `root` folder should contain the three downloaded .zip archives, namely:
     `training_set.zip`, `validation_set.zip` and `testing_set.zip`.
 
     Examples
@@ -68,11 +68,9 @@ def _file(self, i, obj):
         uid, split = i.split('-')
 
         archive = self.root / f'{split}_set.zip'
-        with ZipFile(archive) as zf:
-            file = f'{split}_set/{uid}/{obj}.nii.gz'
-            return zipfile.Path(archive, file)
+        file = f'{split}_set/{uid}/{obj}.nii.gz'
 
-        raise ValueError(f'Id "{i}" not found')
+        return zipfile.Path(archive, file)
 
     @field
     def image(self, i) -> np.ndarray:

From 2c1674e80a1253365a67e5bf7de0912b9259c735 Mon Sep 17 00:00:00 2001
From: Alexey Shevtsov <a.shevtsov@ira-labs.com>
Date: Thu, 3 Oct 2024 19:16:04 +0300
Subject: [PATCH 3/9] reformatted

---
 amid/curvas.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/amid/curvas.py b/amid/curvas.py
index c825d4f..a73e036 100644
--- a/amid/curvas.py
+++ b/amid/curvas.py
@@ -58,11 +58,13 @@ def _extract(split):
                 ids = [f'{x.split("/")[1]}-{split}' for x in namelist]
                 return ids
 
-        return sorted([
-            *_extract('training'),    # 20 Training   cases
-            *_extract('validation'),  # 5  Validation cases
-            *_extract('testing'),     # 65 Testing    cases
-        ])
+        return sorted(
+            [
+                *_extract('training'),  # 20 Training   cases
+                *_extract('validation'),  # 5  Validation cases
+                *_extract('testing'),  # 65 Testing    cases
+            ]
+        )
 
     def _file(self, i, obj):
         uid, split = i.split('-')

From 5e7da7634c98af5890684bbfdab054564034a892 Mon Sep 17 00:00:00 2001
From: Alexey Shevtsov <a.shevtsov@ira-labs.com>
Date: Thu, 3 Oct 2024 19:37:08 +0300
Subject: [PATCH 4/9] final touch

---
 amid/curvas.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/amid/curvas.py b/amid/curvas.py
index a73e036..35bd331 100644
--- a/amid/curvas.py
+++ b/amid/curvas.py
@@ -5,15 +5,15 @@
 import nibabel
 import numpy as np
 
-from .internals import Dataset, field, register
+from .internals import Dataset, field, licenses, register
 
 
 @register(
     body_region='Abdomen',
-    license=None,
+    license=licenses.CC_BY_40,
     link='https://zenodo.org/records/13767408',
     modality='CT',
-    # TODO: prep_data_size='G',
+    prep_data_size='30G',
     raw_data_size='30G',
     task='Abdominal organ pathologies segmentation',
 )

From 7047767c86b0d1f2bd3d32e9af1e80de935a1f8b Mon Sep 17 00:00:00 2001
From: Alexey Shevtsov <a.shevtsov@ira-labs.com>
Date: Thu, 3 Oct 2024 19:39:05 +0300
Subject: [PATCH 5/9] docs

---
 README.md            | 1 +
 docs/datasets-api.md | 2 ++
 docs/datasets.md     | 1 +
 3 files changed, 4 insertions(+)

diff --git a/README.md b/README.md
index a9c5e91..d517ce2 100644
--- a/README.md
+++ b/README.md
@@ -42,6 +42,7 @@ print(entry.split, entry.patient)
 | <a href="https://neuro-ml.github.io/amid/latest/datasets-api/#amid.cl_detection.CLDetection2023">CLDetection2023</a>               |       400 | Head                                | X-ray                                                                  |
 | <a href="https://neuro-ml.github.io/amid/latest/datasets-api/#amid.crlm.CRLM">CRLM</a>                                             |       197 | Abdomen                             | CT, SEG                                                                |
 | <a href="https://neuro-ml.github.io/amid/latest/datasets-api/#amid.ct_ich.CT_ICH">CT_ICH</a>                                       |        75 | Head                                | CT                                                                     |
+| <a href="https://neuro-ml.github.io/amid/latest/datasets-api/#amid.curvas.CURVAS">CURVAS</a>                                       |        90 | Abdomen                             | CT                                                                     |
 | <a href="https://neuro-ml.github.io/amid/latest/datasets-api/#amid.crossmoda.CrossMoDA">CrossMoDA</a>                              |       484 | Head                                | MRI T1c, MRI T2hr                                                      |
 | <a href="https://neuro-ml.github.io/amid/latest/datasets-api/#amid.deeplesion.DeepLesion">DeepLesion</a>                           |     20094 | Abdomen, Thorax                     | CT                                                                     |
 | <a href="https://neuro-ml.github.io/amid/latest/datasets-api/#amid.egd.EGD">EGD</a>                                                |      3096 | Head                                | FLAIR, MRI T1, MRI T1GD, MRI T2                                        |
diff --git a/docs/datasets-api.md b/docs/datasets-api.md
index 745b983..0d57ae7 100644
--- a/docs/datasets-api.md
+++ b/docs/datasets-api.md
@@ -14,6 +14,8 @@
 
 ::: amid.ct_ich.CT_ICH
 
+::: amid.curvas.CURVAS
+
 ::: amid.crossmoda.CrossMoDA
 
 ::: amid.deeplesion.DeepLesion
diff --git a/docs/datasets.md b/docs/datasets.md
index 1afeeef..b4ab379 100644
--- a/docs/datasets.md
+++ b/docs/datasets.md
@@ -9,6 +9,7 @@
 | <a href="https://neuro-ml.github.io/amid/0.14.0/datasets-api/#amid.cl_detection.CLDetection2023">CLDetection2023</a>               |       400 | Head                                | <a href="https://creativecommons.org/licenses/by-nc/4.0/">CC BY-NC 4.0</a>                                                                            | X-ray                                                                  | 1.8G             | 1.5G            | Keypoint detection                                           | <a href="https://github.com/cwwang1979/CL-detection2023/">Source</a>                                                                      |
 | <a href="https://neuro-ml.github.io/amid/0.14.0/datasets-api/#amid.crlm.CRLM">CRLM</a>                                             |       197 | Abdomen                             | <a href="https://creativecommons.org/licenses/by/4.0/">CC BY 4.0</a>                                                                                  | CT, SEG                                                                | 11G              | 11G             | Segmentation, Classification                                 | <a href="https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=89096268#89096268412b832037484784bd78caf58e052641">Source</a> |
 | <a href="https://neuro-ml.github.io/amid/0.14.0/datasets-api/#amid.ct_ich.CT_ICH">CT_ICH</a>                                       |        75 | Head                                | <a href="https://www.physionet.org/about/licenses/physionet-restricted-health-data-license-150/">PhysioNet Restricted Health Data License 1.5.0</a>   | CT                                                                     | 661M             | 2,8G            | Intracranial hemorrhage segmentation                         | <a href="https://physionet.org/content/ct-ich/1.3.1/">Source</a>                                                                          |
+| <a href="https://neuro-ml.github.io/amid/0.14.0/datasets-api/#amid.curvas.CURVAS">CURVAS</a>                                       |        90 | Abdomen                             | <a href="https://creativecommons.org/licenses/by/4.0/">CC BY 4.0</a>                                                                                  | CT                                                                     | 30G              | 30G             | Abdominal organ pathologies segmentation                     | <a href="https://zenodo.org/records/13767408">Source</a>                                                                                  |
 | <a href="https://neuro-ml.github.io/amid/0.14.0/datasets-api/#amid.crossmoda.CrossMoDA">CrossMoDA</a>                              |       484 | Head                                | <a href="https://creativecommons.org/licenses/by-nc-sa/4.0/">CC BY-NC-SA 4.0</a>                                                                      | MRI T1c, MRI T2hr                                                      | 8,96G            | 17G             | Segmentation, Classification, Domain Adaptation              | <a href="https://zenodo.org/record/6504722#.YsgwnNJByV4">Source</a>                                                                       |
 | <a href="https://neuro-ml.github.io/amid/0.14.0/datasets-api/#amid.deeplesion.DeepLesion">DeepLesion</a>                           |     20094 | Abdomen, Thorax                     |                                                                                                                                                       | CT                                                                     | 259G             | 259G            | Localisation, Detection, Classification                      | <a href="https://nihcc.app.box.com/v/DeepLesion">Source</a>                                                                               |
 | <a href="https://neuro-ml.github.io/amid/0.14.0/datasets-api/#amid.egd.EGD">EGD</a>                                                |      3096 | Head                                | EGD data license                                                                                                                                      | FLAIR, MRI T1, MRI T1GD, MRI T2                                        | 107,49G          | 40G             | Segmentation                                                 | <a href="https://xnat.bmia.nl/data/archive/projects/egd">Source</a>                                                                       |

From 64c3db3eb9d42d36c6a58a18ec73a20f24c3762a Mon Sep 17 00:00:00 2001
From: Alexey Shevtsov <a.shevtsov@ira-labs.com>
Date: Thu, 3 Oct 2024 19:41:51 +0300
Subject: [PATCH 6/9] removed populate from docs

---
 docs/CONTRIBUTING.md | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md
index b948d8e..2ba8df8 100644
--- a/docs/CONTRIBUTING.md
+++ b/docs/CONTRIBUTING.md
@@ -101,25 +101,9 @@ id_ = dataset.ids[0]
 print(dataset.image(id_).shape)
 ```
 
-6\. Populate the dataset:
-
-```shell
-amid populate LiTS /shared/data/LiTS
-```
-
-!!! tip 
-    Use the option `--n-jobs` to speed up the process.
-
-!!! tip
-    Use the option `--help` for a more detailed information on this command.
-
-7\. If there is no error, the file `amid/data/lits.hash` will appear (the name depends on `short_name` given to `normalize`).
-
-8\. Check the codestyle using the `lint.sh` script in the repository's root and make changes if flake8 is not happy:
+6\. Check the codestyle using the `lint.sh` script in the repository's root and make changes if flake8 is not happy:
 
 ```shell
 pip install -r lint-requirements.txt # only for the first time
 ./lint.sh
-```
-
-9\. Commit all the files you added, including the `*.hash` one.
+```
\ No newline at end of file

From d89a2584bbcfb0249ad33818738ca1bda9a34601 Mon Sep 17 00:00:00 2001
From: Alexey Shevtsov <a.shevtsov@ira-labs.com>
Date: Fri, 4 Oct 2024 16:54:08 +0300
Subject: [PATCH 7/9] space-aware datatypes

---
 amid/curvas.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/amid/curvas.py b/amid/curvas.py
index 35bd331..6108197 100644
--- a/amid/curvas.py
+++ b/amid/curvas.py
@@ -1,5 +1,6 @@
 import gzip
 import zipfile
+from typing import Dict
 from zipfile import ZipFile
 
 import nibabel
@@ -80,7 +81,7 @@ def image(self, i) -> np.ndarray:
             with gzip.GzipFile(fileobj=opened) as nii:
                 nii = nibabel.FileHolder(fileobj=nii)
                 image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
-                return np.asarray(image.dataobj)
+                return np.asarray(image.dataobj).astype(np.int16)
 
     @field
     def affine(self, i) -> np.ndarray:
@@ -92,7 +93,7 @@ def affine(self, i) -> np.ndarray:
                 return image.affine
 
     @field
-    def masks(self, i) -> dict:
+    def masks(self, i) -> Dict[str, np.ndarray]:
         masks = {}
         for x in range(1, 4):
             with self._file(i, f'annotation_{x}').open('rb') as opened:
@@ -100,6 +101,6 @@ def masks(self, i) -> dict:
                     nii = nibabel.FileHolder(fileobj=nii)
                     image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
 
-                    masks[f'annotation_{x}'] = np.asarray(image.dataobj)
+                    masks[f'annotation_{x}'] = np.asarray(image.dataobj).astype(np.uint8)
 
         return masks

From bfa11f55c06b1270d1de8e15c285e0fadb6e8989 Mon Sep 17 00:00:00 2001
From: Alexey Shevtsov <a.shevtsov@ira-labs.com>
Date: Fri, 4 Oct 2024 18:45:56 +0300
Subject: [PATCH 8/9] oi innit bruv

---
 amid/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/amid/__init__.py b/amid/__init__.py
index da29d9b..425261c 100644
--- a/amid/__init__.py
+++ b/amid/__init__.py
@@ -10,6 +10,7 @@
 from .covid_1110 import MoscowCovid1110
 from .crlm import CRLM
 from .crossmoda import CrossMoDA
+from .curvas import CURVAS
 from .ct_ich import CT_ICH
 from .deeplesion import DeepLesion
 from .egd import EGD

From 0fffbf0cb936dd175403680bde7b00daf6259c21 Mon Sep 17 00:00:00 2001
From: Alexey Shevtsov <a.shevtsov@ira-labs.com>
Date: Fri, 4 Oct 2024 18:47:56 +0300
Subject: [PATCH 9/9] order

---
 amid/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/amid/__init__.py b/amid/__init__.py
index 425261c..b26e169 100644
--- a/amid/__init__.py
+++ b/amid/__init__.py
@@ -10,8 +10,8 @@
 from .covid_1110 import MoscowCovid1110
 from .crlm import CRLM
 from .crossmoda import CrossMoDA
-from .curvas import CURVAS
 from .ct_ich import CT_ICH
+from .curvas import CURVAS
 from .deeplesion import DeepLesion
 from .egd import EGD
 from .flare2022 import FLARE2022