From ae2be07fd059f8458b58391b9c5a3189dbd888ff Mon Sep 17 00:00:00 2001 From: Franklin Ogidi <41602287+fcogidi@users.noreply.github.com> Date: Wed, 22 Jan 2025 17:00:32 -0500 Subject: [PATCH] Update versions for pre-commit hooks --- .pre-commit-config.yaml | 10 +++++----- mmlearn/cli/_instantiators.py | 6 +++--- mmlearn/cli/run.py | 18 +++++++++--------- mmlearn/datasets/chexpert.py | 12 ++++++------ mmlearn/datasets/core/data_collator.py | 2 +- mmlearn/datasets/librispeech.py | 6 +++--- mmlearn/datasets/nihcxr.py | 6 +++--- mmlearn/modules/encoders/vision.py | 6 +++--- mmlearn/tasks/contrastive_pretraining.py | 6 +++--- .../med_benchmarking/datasets/mimiciv_cxr.py | 6 +++--- projects/med_benchmarking/datasets/pmcoa.py | 6 +++--- projects/med_benchmarking/datasets/quilt.py | 6 +++--- projects/med_benchmarking/datasets/roco.py | 6 +++--- tests/datasets/test_example.py | 2 +- 14 files changed, 49 insertions(+), 49 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8793107..6ad619f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,13 +16,13 @@ repos: - id: check-toml - repo: https://github.com/python-poetry/poetry - rev: 1.8.4 + rev: 2.0.1 hooks: - id: poetry-check args: [--lock] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.2 + rev: v0.9.2 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] @@ -31,13 +31,13 @@ repos: types_or: [ python, pyi, jupyter ] - repo: https://github.com/crate-ci/typos - rev: v1.27.0 + rev: v1.29.4 hooks: - id: typos args: [] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.13.0 + rev: v1.14.1 hooks: - id: mypy entry: mypy @@ -46,7 +46,7 @@ repos: exclude: tests|projects - repo: https://github.com/nbQA-dev/nbQA - rev: 1.9.0 + rev: 1.9.1 hooks: - id: nbqa-ruff args: [--fix, --exit-non-zero-on-fix] diff --git a/mmlearn/cli/_instantiators.py b/mmlearn/cli/_instantiators.py index 8aa52b8..1ff90d5 100644 --- a/mmlearn/cli/_instantiators.py +++ b/mmlearn/cli/_instantiators.py @@ -103,9 +103,9 @@ def instantiate_sampler( kwargs.update(distributed_sampler_kwargs) sampler = hydra.utils.instantiate(cfg, **kwargs) - assert isinstance( - sampler, Sampler - ), f"Expected a `torch.utils.data.Sampler` object but got {type(sampler)}." + assert isinstance(sampler, Sampler), ( + f"Expected a `torch.utils.data.Sampler` object but got {type(sampler)}." + ) if sampler is None and requires_distributed_sampler: sampler = DistributedSampler(dataset, **distributed_sampler_kwargs) diff --git a/mmlearn/cli/run.py b/mmlearn/cli/run.py index fa09e18..9fd3e4d 100644 --- a/mmlearn/cli/run.py +++ b/mmlearn/cli/run.py @@ -56,9 +56,9 @@ def main(cfg: MMLearnConf) -> None: # noqa: PLR0912 trainer: Trainer = hydra.utils.instantiate( cfg.trainer, callbacks=callbacks, logger=loggers, _convert_="all" ) - assert isinstance( - trainer, Trainer - ), "Trainer must be an instance of `lightning.pytorch.trainer.Trainer`" + assert isinstance(trainer, Trainer), ( + "Trainer must be an instance of `lightning.pytorch.trainer.Trainer`" + ) if rank_zero_only.rank == 0 and loggers is not None: # update wandb config for trainer_logger in loggers: @@ -79,9 +79,9 @@ def main(cfg: MMLearnConf) -> None: # noqa: PLR0912 # prepare dataloaders if cfg.job_type == JobType.train: train_dataset = instantiate_datasets(cfg.datasets.train) - assert ( - train_dataset is not None - ), "Train dataset (`cfg.datasets.train`) is required for training." + assert train_dataset is not None, ( + "Train dataset (`cfg.datasets.train`) is required for training." + ) train_sampler = instantiate_sampler( cfg.dataloader.train.get("sampler"), @@ -109,9 +109,9 @@ def main(cfg: MMLearnConf) -> None: # noqa: PLR0912 ) else: test_dataset = instantiate_datasets(cfg.datasets.test) - assert ( - test_dataset is not None - ), "Test dataset (`cfg.datasets.test`) is required for evaluation." + assert test_dataset is not None, ( + "Test dataset (`cfg.datasets.test`) is required for evaluation." + ) test_sampler = instantiate_sampler( cfg.dataloader.test.get("sampler"), diff --git a/mmlearn/datasets/chexpert.py b/mmlearn/datasets/chexpert.py index 3405809..34bc480 100644 --- a/mmlearn/datasets/chexpert.py +++ b/mmlearn/datasets/chexpert.py @@ -66,12 +66,12 @@ def __init__( transform: Optional[Callable[[Image.Image], torch.Tensor]] = None, ) -> None: assert split in ["train", "valid"], f"split {split} is not available." - assert ( - labeler in ["chexpert", "chexbert", "vchexbert"] or labeler is None - ), f"labeler {labeler} is not available." - assert ( - callable(transform) or transform is None - ), "transform is not callable or None." + assert labeler in ["chexpert", "chexbert", "vchexbert"] or labeler is None, ( + f"labeler {labeler} is not available." + ) + assert callable(transform) or transform is None, ( + "transform is not callable or None." + ) if split == "valid": data_file = f"{split}_data.json" diff --git a/mmlearn/datasets/core/data_collator.py b/mmlearn/datasets/core/data_collator.py index 74e1a17..28df9c0 100644 --- a/mmlearn/datasets/core/data_collator.py +++ b/mmlearn/datasets/core/data_collator.py @@ -103,7 +103,7 @@ def _merge_examples(examples: list[Example]) -> dict[str, Any]: else: merged_examples[key] = [example[key]] - for key in merged_examples: + for key in merged_examples: # noqa: PLC0206 if isinstance(merged_examples[key][0], Example): merged_examples[key] = _merge_examples(merged_examples[key]) diff --git a/mmlearn/datasets/librispeech.py b/mmlearn/datasets/librispeech.py index 5a47920..6ac108e 100644 --- a/mmlearn/datasets/librispeech.py +++ b/mmlearn/datasets/librispeech.py @@ -107,9 +107,9 @@ def __len__(self) -> int: def __getitem__(self, idx: int) -> Example: """Return an example from the dataset.""" waveform, sample_rate, transcript, _, _, _ = self.dataset[idx] - assert ( - sample_rate == SAMPLE_RATE - ), f"Expected sample rate to be `16000`, got {sample_rate}." + assert sample_rate == SAMPLE_RATE, ( + f"Expected sample rate to be `16000`, got {sample_rate}." + ) waveform = pad_or_trim(waveform.flatten()) return Example( diff --git a/mmlearn/datasets/nihcxr.py b/mmlearn/datasets/nihcxr.py index 6e5ff4b..602e48f 100644 --- a/mmlearn/datasets/nihcxr.py +++ b/mmlearn/datasets/nihcxr.py @@ -62,9 +62,9 @@ def __init__( transform: Optional[Callable[[Image.Image], torch.Tensor]] = None, ) -> None: assert split in ["train", "test", "bbox"], f"split {split} is not available." - assert ( - callable(transform) or transform is None - ), "transform is not callable or None." + assert callable(transform) or transform is None, ( + "transform is not callable or None." + ) data_path = os.path.join(root_dir, split + "_data.json") diff --git a/mmlearn/modules/encoders/vision.py b/mmlearn/modules/encoders/vision.py index 7c67921..3e3bd38 100644 --- a/mmlearn/modules/encoders/vision.py +++ b/mmlearn/modules/encoders/vision.py @@ -512,9 +512,9 @@ def forward( masks: Union[torch.Tensor, list[torch.Tensor]], ) -> torch.Tensor: """Forward pass through the Vision Transformer Predictor.""" - assert (masks is not None) and ( - masks_x is not None - ), "Cannot run predictor without mask indices" + assert (masks is not None) and (masks_x is not None), ( + "Cannot run predictor without mask indices" + ) if not isinstance(masks_x, list): masks_x = [masks_x] diff --git a/mmlearn/tasks/contrastive_pretraining.py b/mmlearn/tasks/contrastive_pretraining.py index d97d179..8002eeb 100644 --- a/mmlearn/tasks/contrastive_pretraining.py +++ b/mmlearn/tasks/contrastive_pretraining.py @@ -277,9 +277,9 @@ def __init__( # noqa: PLR0912, PLR0915 Modalities.get_modality(modality_key) for modality_key in modality_encoder_mapping ] - assert ( - len(self._available_modalities) >= 2 - ), "Expected at least two modalities to be available. " + assert len(self._available_modalities) >= 2, ( + "Expected at least two modalities to be available. " + ) #: A :py:class:`~torch.nn.ModuleDict`, where the keys are the names of the #: modalities and the values are the encoder modules. diff --git a/projects/med_benchmarking/datasets/mimiciv_cxr.py b/projects/med_benchmarking/datasets/mimiciv_cxr.py index b7227dc..d171df5 100644 --- a/projects/med_benchmarking/datasets/mimiciv_cxr.py +++ b/projects/med_benchmarking/datasets/mimiciv_cxr.py @@ -160,9 +160,9 @@ def __getitem__(self, idx: int) -> Example: ) if tokens is not None: if isinstance(tokens, dict): # output of HFTokenizer - assert ( - Modalities.TEXT.name in tokens - ), f"Missing key `{Modalities.TEXT.name}` in tokens." + assert Modalities.TEXT.name in tokens, ( + f"Missing key `{Modalities.TEXT.name}` in tokens." + ) example.update(tokens) else: example[Modalities.TEXT.name] = tokens diff --git a/projects/med_benchmarking/datasets/pmcoa.py b/projects/med_benchmarking/datasets/pmcoa.py index aab6bc6..fbe72b2 100644 --- a/projects/med_benchmarking/datasets/pmcoa.py +++ b/projects/med_benchmarking/datasets/pmcoa.py @@ -123,9 +123,9 @@ def __getitem__(self, idx: int) -> Example: tokens = self.tokenizer(caption) if self.tokenizer is not None else None if tokens is not None: if isinstance(tokens, dict): # output of HFTokenizer - assert ( - Modalities.TEXT.name in tokens - ), f"Missing key `{Modalities.TEXT.name}` in tokens." + assert Modalities.TEXT.name in tokens, ( + f"Missing key `{Modalities.TEXT.name}` in tokens." + ) example.update(tokens) else: example[Modalities.TEXT.name] = tokens diff --git a/projects/med_benchmarking/datasets/quilt.py b/projects/med_benchmarking/datasets/quilt.py index 8d909a8..d04b714 100644 --- a/projects/med_benchmarking/datasets/quilt.py +++ b/projects/med_benchmarking/datasets/quilt.py @@ -158,9 +158,9 @@ def __getitem__(self, idx: int) -> Example: if tokens is not None: if isinstance(tokens, dict): # output of HFTokenizer - assert ( - Modalities.TEXT.name in tokens - ), f"Missing key `{Modalities.TEXT.name}` in tokens." + assert Modalities.TEXT.name in tokens, ( + f"Missing key `{Modalities.TEXT.name}` in tokens." + ) example.update(tokens) else: example[Modalities.TEXT.name] = tokens diff --git a/projects/med_benchmarking/datasets/roco.py b/projects/med_benchmarking/datasets/roco.py index bfb6397..abff727 100644 --- a/projects/med_benchmarking/datasets/roco.py +++ b/projects/med_benchmarking/datasets/roco.py @@ -100,9 +100,9 @@ def __getitem__(self, idx: int) -> Example: if tokens is not None: if isinstance(tokens, dict): # output of HFTokenizer - assert ( - Modalities.TEXT.name in tokens - ), f"Missing key `{Modalities.TEXT.name}` in tokens." + assert Modalities.TEXT.name in tokens, ( + f"Missing key `{Modalities.TEXT.name}` in tokens." + ) example.update(tokens) else: example[Modalities.TEXT.name] = tokens diff --git a/tests/datasets/test_example.py b/tests/datasets/test_example.py index 4acb975..4fccccb 100644 --- a/tests/datasets/test_example.py +++ b/tests/datasets/test_example.py @@ -97,7 +97,7 @@ def test_collate_example_list(): result = DefaultDataCollator()( [img_class_example, img_text_pair, audio_text_pair, nested_example], ) - for key in expected_result: + for key in expected_result: # noqa: PLC0206 assert key in result if isinstance(expected_result[key], torch.Tensor): assert torch.equal(result[key], expected_result[key])