Skip to content

Commit

Permalink
fix duplicate categorical features added from image exif metadata in …
Browse files Browse the repository at this point in the history
…RAI Vision Dashboard
  • Loading branch information
imatiach-msft committed Jan 4, 2024
1 parent 2556253 commit 470d176
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def extract_features(image_dataset: pd.DataFrame,
exifdata = im.getexif()
for tag_id in exifdata:
# get the tag name, instead of human unreadable tag id
tag = TAGS.get(tag_id, tag_id)
tag = str(TAGS.get(tag_id, tag_id))
data = exifdata.get(tag_id)
# decode bytes
if isinstance(data, bytes):
Expand All @@ -92,8 +92,8 @@ def extract_features(image_dataset: pd.DataFrame,
feature_metadata = FeatureMetadata()
feature_metadata.categorical_features = []
if tag in feature_names:
feature_metadata.categorical_features.append(
str(tag))
if tag not in feature_metadata.categorical_features:
feature_metadata.categorical_features.append(tag)
tag_index = feature_names.index(tag)
row_feature_values[tag_index] = data
else:
Expand Down
27 changes: 20 additions & 7 deletions responsibleai_vision/tests/test_feature_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from responsibleai_vision.common.constants import (ExtractedFeatures,
ImageColumns, ImageModes)
from responsibleai_vision.utils.feature_extractors import extract_features
from responsibleai.feature_metadata import FeatureMetadata

MEAN_PIXEL_VALUE = ExtractedFeatures.MEAN_PIXEL_VALUE.value
FRIDGE_METADATA_FEATURES = [
Expand All @@ -18,17 +19,23 @@


def validate_extracted_features(extracted_features, feature_names,
expected_feature_names, data):
expected_feature_names, data,
feature_metadata=None):
assert len(extracted_features) == len(data)
assert feature_names[0] == expected_feature_names[0]
for i in range(1, len(feature_names)):
assert feature_names[i] in expected_feature_names
assert len(feature_names) == len(expected_feature_names)
assert len(extracted_features[0]) == len(feature_names)
if feature_metadata is not None:
assert len(feature_metadata.categorical_features) <= len(feature_names)
for categorical_feature in feature_metadata.categorical_features:
assert categorical_feature in feature_names


def extract_dataset_features(data):
return extract_features(data, ImageColumns.LABEL, ImageModes.RGB, None)
def extract_dataset_features(data, feature_metadata=None):
return extract_features(data, ImageColumns.LABEL, ImageModes.RGB,
feature_metadata=feature_metadata)


class TestFeatureExtractors(object):
Expand All @@ -55,15 +62,21 @@ def test_extract_features_imagenet_metadata(self):

def test_extract_features_flowers_metadata(self):
data = load_flowers_dataset(upscale=False)
extracted_features, feature_names = extract_dataset_features(data)
feature_metadata = FeatureMetadata()
extracted_features, feature_names = extract_dataset_features(
data, feature_metadata=feature_metadata)
expected_feature_names = [MEAN_PIXEL_VALUE]
validate_extracted_features(extracted_features, feature_names,
expected_feature_names, data)
expected_feature_names, data,
feature_metadata)

def test_extract_features_mixed_exif_XPComment_metadata(self):
data = load_fridge_dataset(add_extra_mixed_metadata=True)
extracted_features, feature_names = extract_dataset_features(data)
feature_metadata = FeatureMetadata()
extracted_features, feature_names = extract_dataset_features(
data, feature_metadata=feature_metadata)
expected_feature_names = [MEAN_PIXEL_VALUE, 'XPComment']
expected_feature_names += FRIDGE_METADATA_FEATURES
validate_extracted_features(extracted_features, feature_names,
expected_feature_names, data)
expected_feature_names, data,
feature_metadata)

0 comments on commit 470d176

Please sign in to comment.