Skip to content

Commit

Permalink
fix error when extracting exif metadata features from images in RAI V…
Browse files Browse the repository at this point in the history
…ision dashboard
  • Loading branch information
imatiach-msft committed Dec 14, 2023
1 parent 0700cdc commit 065f3b3
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 16 deletions.
3 changes: 2 additions & 1 deletion responsibleai_vision/requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,5 @@ opencv-python

fastai
mlflow
pydantic<2.0.0
pydantic<2.0.0
piexif
12 changes: 12 additions & 0 deletions responsibleai_vision/responsibleai_vision/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
from enum import Enum


class ExtractedFeatures(str, Enum):
"""Provide constants related to the extracted image features."""

MEAN_PIXEL_VALUE = 'mean_pixel_value'


class ModelTask(str, Enum):
"""Provide model task constants.
Expand All @@ -28,6 +34,12 @@ class ImageColumns(str, Enum):
IMAGE_DETAILS = 'image_details'


class ImageModes(str, Enum):
"""Provide constants related to the image modes."""

RGB = 'RGB'


class ExplainabilityLiterals:
"""Parameters for explainability method names."""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
ExplainabilityDefaults,
ImageColumns,
MLFlowSchemaLiterals,
ModelTask)
ModelTask,
ImageModes)
from responsibleai_vision.managers.error_analysis_manager import \
ErrorAnalysisManager
from responsibleai_vision.managers.explainer_manager import ExplainerManager
Expand Down Expand Up @@ -135,7 +136,7 @@ def __init__(self, model: Any,
classes: Optional[np.ndarray] = None,
serializer: Optional[Any] = None,
maximum_rows_for_test: int = 5000,
image_mode: str = "RGB",
image_mode: str = ImageModes.RGB,
test_data_path: Optional[str] = None,
transformations: Optional[Any] = None,
image_downloader: Optional[Any] = None,
Expand Down Expand Up @@ -267,7 +268,7 @@ def __init__(self, model: Any,
serializer)

ext_test, ext_features = extract_features(
self.test, self.target_column, self.task_type,
self.test, self.target_column,
self.image_mode,
self._feature_metadata)
self._ext_test = ext_test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,21 @@
from PIL import Image
from PIL.ExifTags import TAGS
from tqdm import tqdm
import warnings

from responsibleai.feature_metadata import FeatureMetadata
from responsibleai_vision.utils.image_reader import (
get_all_exif_feature_names, get_image_from_path,
get_image_pointer_from_path)
from responsibleai_vision.common.constants import ExtractedFeatures


MEAN_PIXEL_VALUE = ExtractedFeatures.MEAN_PIXEL_VALUE.value
MAX_CUSTOM_LEN = 100


def extract_features(image_dataset: pd.DataFrame,
target_column: str, task_type: str,
target_column: str,
image_mode: str = None,
feature_metadata: Optional[FeatureMetadata] = None):
'''Extract tabular data features from the image dataset.
Expand All @@ -27,8 +33,6 @@ def extract_features(image_dataset: pd.DataFrame,
:param target_column: The name of the label column or list of columns.
This is a list of columns for multilabel models.
:type target_column: str or list[str]
:param task_type: The type of task to be performed.
:type task_type: str
:param image_mode: The mode to open the image in.
See pillow documentation for all modes:
https://pillow.readthedocs.io/en/stable/handbook/concepts.html
Expand All @@ -45,7 +49,7 @@ def extract_features(image_dataset: pd.DataFrame,
if feature_metadata and feature_metadata.categorical_features is None:
feature_metadata.categorical_features = []
exif_feature_names = get_all_exif_feature_names(image_dataset)
feature_names = ["mean_pixel_value"] + exif_feature_names
feature_names = [MEAN_PIXEL_VALUE] + exif_feature_names

# append all feature names other than target column and label
column_names = image_dataset.columns
Expand All @@ -58,6 +62,7 @@ def extract_features(image_dataset: pd.DataFrame,
continue
feature_names.append(column_names[j])

blacklisted_tags = {}
# append all features
for i in tqdm(range(image_dataset.shape[0])):
image = image_dataset.iloc[i, 0]
Expand All @@ -81,9 +86,26 @@ def extract_features(image_dataset: pd.DataFrame,
# decode bytes
if isinstance(data, bytes):
data = data.decode()
if len(data) > MAX_CUSTOM_LEN:
data = data[:MAX_CUSTOM_LEN] + '...'
if isinstance(data, str):
feature_metadata.categorical_features.append(str(tag))
row_feature_values[feature_names.index(tag)] = data
if not feature_metadata:
feature_metadata = FeatureMetadata()
feature_metadata.categorical_features = []
if tag in feature_names:
feature_metadata.categorical_features.append(
str(tag))
tag_index = feature_names.index(tag)
row_feature_values[tag_index] = data
else:
# in theory this should now never happen with
# latest code, but adding this check for safety
if tag not in blacklisted_tags:
blacklisted_tags.add(tag)
warnings.warn(
f'Exif tag {tag} could not be found '
'in the feature names. Ignoring tag '
'from extracted metadata.')
elif isinstance(data, int) or isinstance(data, float):
row_feature_values[feature_names.index(tag)] = data

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ def get_all_exif_feature_names(image_dataset):
data = exifdata.get(tag_id)
if isinstance(data, str) or \
isinstance(data, int) or \
isinstance(data, float):
isinstance(data, float) or \
isinstance(data, bytes):
exif_feature_names.add(tag)
return list(exif_feature_names)

Expand Down
11 changes: 10 additions & 1 deletion responsibleai_vision/tests/common_vision_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import numpy as np
import pandas as pd
import piexif
import shap
import torch
import torch.nn as nn
Expand Down Expand Up @@ -172,7 +173,7 @@ def retrieve_unzip_file(download_url, data_file):
os.remove(data_file)


def load_fridge_dataset():
def load_fridge_dataset(add_extra_mixed_metadata=False):
# create data folder if it doesnt exist.
os.makedirs("data", exist_ok=True)

Expand All @@ -186,6 +187,14 @@ def load_fridge_dataset():
for folder in os.listdir("./data/fridgeObjects"):
for file in os.listdir("./data/fridgeObjects/" + folder):
image_path = "./data/fridgeObjects/" + folder + "/" + file
if add_extra_mixed_metadata and file.endswith("1.jpg"):
with Image.open(image_path) as im:
exif_dict = piexif.load(im.info['exif'])
comment = 'Extra metadata for {}'.format(file).encode()
exif_dict['0th'][piexif.ImageIFD.XPComment] = comment
exif_dict['1st'][piexif.ImageIFD.XPComment] = comment
exif_bytes = piexif.dump(exif_dict)
im.save(image_path, exif=exif_bytes)
data = data.append({IMAGE: image_path, LABEL: folder},
ignore_index=True)
return data
Expand Down
71 changes: 71 additions & 0 deletions responsibleai_vision/tests/test_feature_extractors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Copyright (c) Microsoft Corporation
# Licensed under the MIT License.

import numpy as np
from common_vision_utils import (load_fridge_object_detection_dataset, load_flowers_dataset, load_imagenet_dataset, load_fridge_dataset)
from responsibleai_vision import ModelTask
from responsibleai_vision.common.constants import (ExplainabilityDefaults,
ExtractedFeatures,
ImageColumns,
ImageModes)
from responsibleai_vision.utils.feature_extractors import extract_features


MEAN_PIXEL_VALUE = ExtractedFeatures.MEAN_PIXEL_VALUE.value
FRIDGE_METADATA_FEATURES = [
'Make', 'ResolutionUnit', 'ImageLength', 'ExifOffset', 'Model',
'GPSInfo', 'ImageWidth', 'DateTime', 'YCbCrPositioning',
'Software', 'Orientation'
]


def validate_extracted_features(extracted_features, feature_names,
expected_feature_names, data):
assert len(extracted_features) == len(data)
assert feature_names[0] == expected_feature_names[0]
for i in range(1, len(feature_names)):
assert feature_names[i] in expected_feature_names
assert len(feature_names) == len(expected_feature_names)
assert len(extracted_features[0]) == len(feature_names)


def extract_dataset_features(data):
return extract_features(data, ImageColumns.LABEL, ImageModes.RGB, None)


class TestFeatureExtractors(object):
def test_extract_features_fridge_object_detection(self):
data = load_fridge_object_detection_dataset(automl_format=False)
extracted_features, feature_names = extract_dataset_features(data)
expected_feature_names = [MEAN_PIXEL_VALUE] + FRIDGE_METADATA_FEATURES
validate_extracted_features(extracted_features, feature_names,
expected_feature_names, data)

def test_extract_features_fridge_metadata(self):
data = load_fridge_dataset()
extracted_features, feature_names = extract_dataset_features(data)
expected_feature_names = [MEAN_PIXEL_VALUE] + FRIDGE_METADATA_FEATURES
validate_extracted_features(extracted_features, feature_names,
expected_feature_names, data)

def test_extract_features_imagenet_metadata(self):
data = load_imagenet_dataset()
extracted_features, feature_names = extract_dataset_features(data)
expected_feature_names = [MEAN_PIXEL_VALUE]
validate_extracted_features(extracted_features, feature_names,
expected_feature_names, data)

def test_extract_features_flowers_metadata(self):
data = load_flowers_dataset(upscale=False)
extracted_features, feature_names = extract_dataset_features(data)
expected_feature_names = [MEAN_PIXEL_VALUE]
validate_extracted_features(extracted_features, feature_names,
expected_feature_names, data)

def test_extract_features_mixed_exif_XPComment_metadata(self):
data = load_fridge_dataset(add_extra_mixed_metadata=True)
extracted_features, feature_names = extract_dataset_features(data)
expected_feature_names = [MEAN_PIXEL_VALUE, 'XPComment']
expected_feature_names += FRIDGE_METADATA_FEATURES
validate_extracted_features(extracted_features, feature_names,
expected_feature_names, data)
10 changes: 6 additions & 4 deletions responsibleai_vision/tests/test_rai_vision_insights.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
from responsibleai.feature_metadata import FeatureMetadata
from responsibleai_vision import ModelTask, RAIVisionInsights
from responsibleai_vision.common.constants import (ExplainabilityDefaults,
ImageColumns)
ImageColumns,
ImageModes)

DEFAULT_MAX_EVALS = ExplainabilityDefaults.DEFAULT_MAX_EVALS
DEFAULT_NUM_MASKS = ExplainabilityDefaults.DEFAULT_NUM_MASKS
Expand All @@ -40,7 +41,7 @@ def test_rai_insights_image_classification_imagenet(self):
task_type = ModelTask.IMAGE_CLASSIFICATION
class_names = load_imagenet_labels()
run_rai_insights(pred, data[:3], ImageColumns.LABEL,
task_type, class_names, image_mode='RGB')
task_type, class_names, image_mode=ImageModes.RGB)

@pytest.mark.parametrize('max_evals', [None, 10, 200])
def test_rai_insights_image_classification_max_evals(self, max_evals):
Expand All @@ -51,7 +52,7 @@ def test_rai_insights_image_classification_max_evals(self, max_evals):
# run on a single image to avoid running out of memory on
# test machines
run_rai_insights(pred, data[:1], ImageColumns.LABEL,
task_type, class_names, image_mode='RGB',
task_type, class_names, image_mode=ImageModes.RGB,
test_explainer=True, max_evals=max_evals)

@pytest.mark.parametrize('max_evals', [-100, -1, 0])
Expand All @@ -63,7 +64,8 @@ def test_rai_insights_invalid_max_evals(self, max_evals):
with pytest.raises(ValueError,
match="max_evals must be greater than 0"):
run_rai_insights(pred, data[:1], ImageColumns.LABEL,
task_type, class_names, image_mode='RGB',
task_type, class_names,
image_mode=ImageModes.RGB,
test_explainer=True, max_evals=max_evals)

def test_rai_insights_image_classification_fridge(self):
Expand Down

0 comments on commit 065f3b3

Please sign in to comment.