From ba48d9ac5725fe3b82efe243cb23506b96d00899 Mon Sep 17 00:00:00 2001 From: Ilya Matiach Date: Fri, 1 Dec 2023 12:49:20 -0500 Subject: [PATCH] fix image downloader failing with automl format on deserialize due to missing label transformations --- .../CI-e2e-notebooks-text-vision.yml | 3 +- .github/workflows/CI-notebook-vision.yml | 9 ++-- .../CI-responsibleai-text-vision-pytest.yml | 5 +- .../rai_vision_insights.py | 14 +++-- .../tests/common_vision_utils.py | 10 ++++ .../tests/rai_vision_insights_validator.py | 49 +++++++++++------ .../tests/test_rai_vision_insights.py | 2 +- ...vision_insights_save_and_load_scenarios.py | 54 ++++++++++++++++++- 8 files changed, 117 insertions(+), 29 deletions(-) diff --git a/.github/workflows/CI-e2e-notebooks-text-vision.yml b/.github/workflows/CI-e2e-notebooks-text-vision.yml index 549dd0c53e..96ea625763 100644 --- a/.github/workflows/CI-e2e-notebooks-text-vision.yml +++ b/.github/workflows/CI-e2e-notebooks-text-vision.yml @@ -90,7 +90,7 @@ jobs: - name: Install pytorch with python 3.7 shell: bash -l {0} run: | - conda install --yes --quiet "pytorch==1.13.1" "torchvision<0.15" captum cpuonly -c pytorch + conda install --yes --quiet "pytorch==1.13.1" "torchvision<0.15" cpuonly -c pytorch - name: Setup tools shell: bash -l {0} @@ -102,6 +102,7 @@ jobs: - name: Install dependencies shell: bash -l {0} run: | + pip install captum pip install -r requirements-dev.txt pip install -v -e . working-directory: raiwidgets diff --git a/.github/workflows/CI-notebook-vision.yml b/.github/workflows/CI-notebook-vision.yml index 7ea9aced42..665708a6ae 100644 --- a/.github/workflows/CI-notebook-vision.yml +++ b/.github/workflows/CI-notebook-vision.yml @@ -52,25 +52,25 @@ jobs: name: Install pytorch on non-MacOS with python 3.7 shell: bash -l {0} run: | - conda install --yes --quiet "pytorch==1.13.1" "torchvision<0.15" captum cpuonly -c pytorch + conda install --yes --quiet "pytorch==1.13.1" "torchvision<0.15" cpuonly -c pytorch - if: ${{ matrix.operatingSystem == 'macos-latest' && matrix.pythonVersion == '3.7' }} name: Install Anaconda packages on MacOS with python 3.7 shell: bash -l {0} run: | - conda install --yes --quiet "pytorch==1.13.1" "torchvision<0.15" captum -c pytorch + conda install --yes --quiet "pytorch==1.13.1" "torchvision<0.15" -c pytorch - if: ${{ matrix.operatingSystem != 'macos-latest' && matrix.pythonVersion != '3.7' }} name: Install pytorch on non-MacOS shell: bash -l {0} run: | - conda install --yes --quiet "pytorch<2.1,>1.13.1" "torchvision<0.16" captum cpuonly -c pytorch + conda install --yes --quiet "pytorch<2.1,>1.13.1" "torchvision<0.16" cpuonly -c pytorch - if: ${{ matrix.operatingSystem == 'macos-latest' && matrix.pythonVersion != '3.7' }} name: Install Anaconda packages on MacOS, which should not include cpuonly according to official docs shell: bash -l {0} run: | - conda install --yes --quiet "pytorch<2.1,>1.13.1" "torchvision<0.16" captum -c pytorch + conda install --yes --quiet "pytorch<2.1,>1.13.1" "torchvision<0.16" -c pytorch - name: Setup tools shell: bash -l {0} @@ -82,6 +82,7 @@ jobs: - name: Install dependencies shell: bash -l {0} run: | + pip install captum pip install -r requirements-dev.txt pip install . working-directory: raiwidgets diff --git a/.github/workflows/CI-responsibleai-text-vision-pytest.yml b/.github/workflows/CI-responsibleai-text-vision-pytest.yml index 04ad932428..d2515a7091 100644 --- a/.github/workflows/CI-responsibleai-text-vision-pytest.yml +++ b/.github/workflows/CI-responsibleai-text-vision-pytest.yml @@ -57,13 +57,13 @@ jobs: name: Install pytorch on non-MacOS shell: bash -l {0} run: | - conda install --yes --quiet pytorch==1.13.1 "torchvision<0.15" captum cpuonly -c pytorch + conda install --yes --quiet pytorch==1.13.1 "torchvision<0.15" cpuonly -c pytorch - if: ${{ matrix.operatingSystem == 'macos-latest' }} name: Install Anaconda packages on MacOS, which should not include cpuonly according to official docs shell: bash -l {0} run: | - conda install --yes --quiet pytorch==1.13.1 "torchvision<0.15" captum -c pytorch + conda install --yes --quiet pytorch==1.13.1 "torchvision<0.15" -c pytorch - name: Setup tools shell: bash -l {0} @@ -75,6 +75,7 @@ jobs: - name: Install dependencies shell: bash -l {0} run: | + pip install captum pip install -r requirements-dev.txt pip install . working-directory: ${{ matrix.packageDirectory }} diff --git a/responsibleai_vision/responsibleai_vision/rai_vision_insights/rai_vision_insights.py b/responsibleai_vision/responsibleai_vision/rai_vision_insights/rai_vision_insights.py index 971dbf4df3..a15cda5636 100644 --- a/responsibleai_vision/responsibleai_vision/rai_vision_insights/rai_vision_insights.py +++ b/responsibleai_vision/responsibleai_vision/rai_vision_insights/rai_vision_insights.py @@ -650,6 +650,8 @@ def _get_dataset(self): for _, image in enumerate(images): if isinstance(image, str): image = get_image_from_path(image, self.image_mode) + if isinstance(image, list): + image = np.array(image) s = io.BytesIO() # IMshow only accepts floats in range [0, 1] try: @@ -787,7 +789,8 @@ def _format_od_labels(self, y, class_names): object_labels_lst = [0] * len(class_names) for detection in image: # tracking number of same objects in the image - object_labels_lst[int(detection[0] - 1)] += 1 + object_index = int(detection[0] - 1) + object_labels_lst[object_index] += 1 formatted_labels.append(object_labels_lst) return formatted_labels @@ -855,11 +858,12 @@ def _save_ext_data(self, path): os.makedirs(mltable_directory, exist_ok=True) mltable_data_dict = {} if self.test_mltable_path: - mltable_dir = self.test_mltable_path.split('/')[-1] + test_mltable_path = Path(self.test_mltable_path) + mltable_dir = test_mltable_path.name mltable_data_dict[_TEST_MLTABLE_PATH] = mltable_dir test_dir = mltable_directory / mltable_dir shutil.copytree( - Path(self.test_mltable_path), test_dir + test_mltable_path, test_dir ) if mltable_data_dict: dict_path = mltable_directory / _MLTABLE_METADATA_FILENAME @@ -1095,12 +1099,14 @@ def _load_ext_data(inst, path): mltable_dict = {} with open(mltable_dict_path, 'r') as file: mltable_dict = json.load(file) - if mltable_dict.get(_TEST_MLTABLE_PATH, ''): inst.test_mltable_path = str(mltable_directory / mltable_dict[ _TEST_MLTABLE_PATH]) test_dataset = inst._image_downloader(inst.test_mltable_path) inst.test = test_dataset._images_df + if inst.task_type == ModelTask.OBJECT_DETECTION.value: + inst.test = transform_object_detection_labels( + inst.test, target_column, inst._classes) @staticmethod def _load_transformations(inst, path): diff --git a/responsibleai_vision/tests/common_vision_utils.py b/responsibleai_vision/tests/common_vision_utils.py index aab07f35a8..8255ae1bd9 100644 --- a/responsibleai_vision/tests/common_vision_utils.py +++ b/responsibleai_vision/tests/common_vision_utils.py @@ -437,6 +437,16 @@ def _get_model_path(self, path): return os.path.join(path, 'image-classification-model') +class ObjectDetectionPipelineSerializer(object): + def save(self, model, path): + pass + + def load(self, path): + return retrieve_fridge_object_detection_model( + load_fridge_weights=True + ) + + class DummyFlowersPipelineSerializer(object): def save(self, model, path): pass diff --git a/responsibleai_vision/tests/rai_vision_insights_validator.py b/responsibleai_vision/tests/rai_vision_insights_validator.py index b91e166e90..5af4af1b84 100644 --- a/responsibleai_vision/tests/rai_vision_insights_validator.py +++ b/responsibleai_vision/tests/rai_vision_insights_validator.py @@ -20,13 +20,15 @@ def validate_rai_vision_insights( test_data, target_column, task_type, - ignore_index=False + ignore_index=False, + ignore_test_data=False ): - rai_vision_test = rai_vision_insights.test - if ignore_index: - rai_vision_test = rai_vision_test.reset_index(drop=True) - test_data = test_data.reset_index(drop=True) - pd.testing.assert_frame_equal(rai_vision_test, test_data) + if not ignore_test_data: + rai_vision_test = rai_vision_insights.test + if ignore_index: + rai_vision_test = rai_vision_test.reset_index(drop=True) + test_data = test_data.reset_index(drop=True) + pd.testing.assert_frame_equal(rai_vision_test, test_data) assert rai_vision_insights.target_column == target_column assert rai_vision_insights.task_type == task_type @@ -38,7 +40,9 @@ def run_and_validate_serialization( class_names, label, serializer, - image_width=None + image_width=None, + ignore_test_data=False, + image_downloader=None ): """Run and validate serialization. @@ -57,17 +61,28 @@ def run_and_validate_serialization( :param image_width: Image width in inches :type image_width: int """ - rai_insights = RAIVisionInsights( - pred, test, label, - task_type=task_type, - classes=class_names, - serializer=serializer, - image_width=image_width) - with TemporaryDirectory() as tmpdir: save_1 = Path(tmpdir) / "first_save" save_2 = Path(tmpdir) / "second_save" + test_data_path = None + if image_downloader is not None: + test_data_path = str(Path(tmpdir) / "fake_downloaded_test_data") + dir_path = Path(test_data_path) + dir_path.mkdir(exist_ok=True, parents=True) + fake_file_path = dir_path / 'fake_file.txt' + with open(fake_file_path, 'w') as file: + file.write("fake content") + + rai_insights = RAIVisionInsights( + pred, test, label, + test_data_path=test_data_path, + task_type=task_type, + classes=class_names, + serializer=serializer, + image_width=image_width, + image_downloader=image_downloader) + # Save it rai_insights.save(save_1) assert len(os.listdir(save_1 / ManagerNames.EXPLAINER)) == 0 @@ -80,7 +95,11 @@ def run_and_validate_serialization( # Validate validate_rai_vision_insights( rai_2, test, - label, task_type) + label, task_type, + ignore_test_data=ignore_test_data) + + # Test calling get_data works + rai_2.get_data() # Save again rai_2.save(save_2) diff --git a/responsibleai_vision/tests/test_rai_vision_insights.py b/responsibleai_vision/tests/test_rai_vision_insights.py index 434aee6c2b..3dbc1048bc 100644 --- a/responsibleai_vision/tests/test_rai_vision_insights.py +++ b/responsibleai_vision/tests/test_rai_vision_insights.py @@ -146,7 +146,7 @@ def predict_proba(self, X): class_names = np.array(['can', 'carton', 'milk_bottle', 'water_bottle']) # test case where there are different numbers of objects in labels - data = data.iloc[[1, 50, 120]] + data = data.iloc[[1, 50, 120]].reset_index(drop=True) run_rai_insights(wrapped_model, data, ImageColumns.LABEL, task_type, class_names, test_error_analysis=True, diff --git a/responsibleai_vision/tests/test_rai_vision_insights_save_and_load_scenarios.py b/responsibleai_vision/tests/test_rai_vision_insights_save_and_load_scenarios.py index e1bd6881c6..2cdf7b2e44 100644 --- a/responsibleai_vision/tests/test_rai_vision_insights_save_and_load_scenarios.py +++ b/responsibleai_vision/tests/test_rai_vision_insights_save_and_load_scenarios.py @@ -4,20 +4,36 @@ import shutil from pathlib import Path from tempfile import TemporaryDirectory +from unittest.mock import patch +import numpy as np import PIL import pytest from common_vision_utils import (DummyFlowersPipelineSerializer, ImageClassificationPipelineSerializer, + ObjectDetectionPipelineSerializer, create_dummy_model, create_image_classification_pipeline, - load_flowers_dataset, load_imagenet_dataset, - load_imagenet_labels) + load_flowers_dataset, + load_fridge_object_detection_dataset, + load_imagenet_dataset, load_imagenet_labels, + retrieve_fridge_object_detection_model) from rai_vision_insights_validator import run_and_validate_serialization from responsibleai_vision import ModelTask, RAIVisionInsights from responsibleai_vision.common.constants import ImageColumns +FRIDGE_CLASS_NAMES = np.array(['can', 'carton', + 'milk_bottle', 'water_bottle']) + + +class FakeImageDownloader: + def __init__(self, test_mltable_path): + self._images_df = self.get_data() + + def get_data(self): + return None + class TestRAIVisionInsightsSaveAndLoadScenarios(object): @@ -78,3 +94,37 @@ def test_loading_rai_insights_without_model_file(self): with pytest.raises(OSError, match=match_msg): without_model_rai_insights = RAIVisionInsights.load(save_path) assert without_model_rai_insights.model is None + + @pytest.mark.parametrize('automl_format', [True, False]) + def test_rai_insights_object_detection(self, automl_format): + data = load_fridge_object_detection_dataset(automl_format) + model = retrieve_fridge_object_detection_model( + load_fridge_weights=True + ) + task_type = ModelTask.OBJECT_DETECTION + test = data[:3] + label = ImageColumns.LABEL + serializer = ObjectDetectionPipelineSerializer() + + run_and_validate_serialization( + model, test, task_type, FRIDGE_CLASS_NAMES, label, + serializer, ignore_test_data=True) + + def test_rai_insights_image_downloader_object_detection(self): + data = load_fridge_object_detection_dataset(True) + model = retrieve_fridge_object_detection_model( + load_fridge_weights=True + ) + task_type = ModelTask.OBJECT_DETECTION + test = data[:3] + label = ImageColumns.LABEL + serializer = ObjectDetectionPipelineSerializer() + + get_data = ('test_rai_vision_insights_save_and_load_scenarios' + '.FakeImageDownloader.get_data') + with patch(get_data) as mock_images_df: + mock_images_df.return_value = test.copy() + run_and_validate_serialization( + model, test, task_type, FRIDGE_CLASS_NAMES, label, + serializer, ignore_test_data=True, + image_downloader=FakeImageDownloader)