Skip to content

Commit

Permalink
[MIEB] Add image classification and zero shot classification tasks (#…
Browse files Browse the repository at this point in the history
…1101)

* fix task metadata

* use overrideable column names

* add CIFAR datasets

* add caltech101 dataset

* add FGVC aircraft dataset

* add food 101 dataset

* add OxfordPets dataset

* remove comments

* correct cifar100 path

* update cifar100 classification results

* cifar zero shot results

* add caltech101 zero shot

* matching CLIP paper implementation

* add aircraft and food zero shot

* add oxford pets zero shot
  • Loading branch information
isaac-chung authored Jul 20, 2024
1 parent b8561b8 commit 3f888fa
Show file tree
Hide file tree
Showing 29 changed files with 1,076 additions and 13 deletions.
23 changes: 13 additions & 10 deletions mteb/abstasks/Image/AbsTaskImageClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ class AbsTaskImageClassification(AbsTask):
label: int
"""

image_column_name: str = "image"
label_column_name: str = "label"

def __init__(
self,
method: str = "logReg",
Expand All @@ -44,12 +47,12 @@ def __init__(
self.n_experiments: int = ( # type: ignore
n_experiments
if n_experiments is not None
else self.metadata_dict.get("n_experiments", 10)
else self.metadata_dict.get("n_experiments", 5)
)
self.samples_per_label: int = ( # type: ignore
samples_per_label
if samples_per_label is not None
else self.metadata_dict.get("samples_per_label", 8)
else self.metadata_dict.get("samples_per_label", 16)
)

# kNN parameters
Expand Down Expand Up @@ -126,8 +129,8 @@ def _evaluate_subset(
)
# Bootstrap `self.samples_per_label` samples per label for each split
X_sampled, y_sampled, idxs = self._undersample_data(
train_split["image"], # type: ignore
train_split["label"], # type: ignore
train_split[self.image_column_name], # type: ignore
train_split[self.label_column_name], # type: ignore
self.samples_per_label,
idxs,
)
Expand All @@ -136,8 +139,8 @@ def _evaluate_subset(
evaluator = ImagekNNClassificationEvaluator(
X_sampled,
y_sampled,
eval_split["image"], # type: ignore
eval_split["label"], # type: ignore
eval_split[self.image_column_name], # type: ignore
eval_split[self.label_column_name], # type: ignore
task_name=self.metadata.name,
encode_kwargs=encode_kwargs,
**params,
Expand All @@ -146,8 +149,8 @@ def _evaluate_subset(
evaluator = ImagekNNClassificationEvaluatorPytorch(
X_sampled,
y_sampled,
eval_split["image"], # type: ignore
eval_split["label"], # type: ignore
eval_split[self.image_column_name], # type: ignore
eval_split[self.label_column_name], # type: ignore
task_name=self.metadata.name,
encode_kwargs=encode_kwargs,
**params,
Expand All @@ -156,8 +159,8 @@ def _evaluate_subset(
evaluator = ImagelogRegClassificationEvaluator(
X_sampled,
y_sampled,
eval_split["image"], # type: ignore
eval_split["label"], # type: ignore
eval_split[self.image_column_name], # type: ignore
eval_split[self.label_column_name], # type: ignore
task_name=self.metadata.name,
encode_kwargs=encode_kwargs,
**params,
Expand Down
3 changes: 3 additions & 0 deletions mteb/abstasks/TaskMetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
"Reasoning as Retrieval",
"Rendered Texts Understanding",
"Image Text Retrieval",
"Object recognition",
"Scene recognition",
"Caption Pairing",
]

Expand All @@ -56,6 +58,7 @@
"Poetry",
"Religious",
"Reviews",
"Scene",
"Social",
"Spoken",
"Subtitles",
Expand Down
5 changes: 5 additions & 0 deletions mteb/tasks/Image/ImageClassification/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
from __future__ import annotations

from .eng.Caltech101Classification import *
from .eng.CIFAR import *
from .eng.FGVCAircraftClassification import *
from .eng.Food101Classification import *
from .eng.OxfordFlowersClassification import *
from .eng.OxfordPetsClassification import *
88 changes: 88 additions & 0 deletions mteb/tasks/Image/ImageClassification/eng/CIFAR.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from __future__ import annotations

from mteb.abstasks.TaskMetadata import TaskMetadata

from .....abstasks import AbsTaskImageClassification


class CIFAR10Classification(AbsTaskImageClassification):
metadata = TaskMetadata(
name="CIFAR10",
description="Classifying images from 10 classes.",
reference="https://huggingface.co/datasets/uoft-cs/cifar10",
dataset={
"path": "uoft-cs/cifar10",
"revision": "0b2714987fa478483af9968de7c934580d0bb9a2",
},
type="Classification",
category="s2s",
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="accuracy",
date=(
"2008-01-01",
"2009-01-01",
), # Estimated range for the collection of reviews
domains=["Web"],
task_subtypes=["Object recognition"],
license="Not specified",
socioeconomic_status="mixed",
annotations_creators="derived",
dialect=[],
modalities=["image"],
sample_creation="created",
bibtex_citation=""" @TECHREPORT{Krizhevsky09learningmultiple,
author = {Alex Krizhevsky},
title = {Learning multiple layers of features from tiny images},
institution = {},
year = {2009}
}
""",
descriptive_stats={
"n_samples": {"test": 10000},
"avg_character_length": {"test": 431.4},
},
)
image_column_name: str = "img"


class CIFAR100Classification(AbsTaskImageClassification):
metadata = TaskMetadata(
name="CIFAR100",
description="Classifying images from 100 classes.",
reference="https://huggingface.co/datasets/uoft-cs/cifar100",
dataset={
"path": "uoft-cs/cifar100",
"revision": "aadb3af77e9048adbea6b47c21a81e47dd092ae5",
},
type="Classification",
category="s2s",
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="accuracy",
date=(
"2008-01-01",
"2009-01-01",
), # Estimated range for the collection of reviews
domains=["Web"],
task_subtypes=["Object recognition"],
license="Not specified",
socioeconomic_status="mixed",
annotations_creators="derived",
dialect=[],
modalities=["image"],
sample_creation="created",
bibtex_citation=""" @TECHREPORT{Krizhevsky09learningmultiple,
author = {Alex Krizhevsky},
title = {Learning multiple layers of features from tiny images},
institution = {},
year = {2009}
}
""",
descriptive_stats={
"n_samples": {"test": 10000},
"avg_character_length": {"test": 431.4},
},
)
image_column_name: str = "img"
label_column_name: str = "fine_label"
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from __future__ import annotations

from mteb.abstasks.TaskMetadata import TaskMetadata

from .....abstasks import AbsTaskImageClassification


class Caltech101Classification(AbsTaskImageClassification):
metadata = TaskMetadata(
name="Caltech101",
description="Classifying images of 101 widely varied objects.",
reference="https://ieeexplore.ieee.org/document/1384978",
dataset={
"path": "HuggingFaceM4/Caltech-101",
"name": "with_background_category",
"revision": "851374102055782c84f89b1b4e9d128a6568847b",
},
type="Classification",
category="s2s",
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="accuracy",
date=(
"2003-01-01",
"2004-01-01",
), # Estimated range for the collection of reviews
domains=["Encyclopaedic"],
task_subtypes=["Object recognition"],
license="Not specified",
socioeconomic_status="mixed",
annotations_creators="derived",
dialect=[],
modalities=["image"],
sample_creation="created",
bibtex_citation="""@INPROCEEDINGS{1384978,
author={Li Fei-Fei and Fergus, R. and Perona, P.},
booktitle={2004 Conference on Computer Vision and Pattern Recognition Workshop},
title={Learning Generative Visual Models from Few Training Examples: An Incremental Bayesian Approach Tested on 101 Object Categories},
year={2004},
volume={},
number={},
pages={178-178},
keywords={Bayesian methods;Testing;Humans;Maximum likelihood estimation;Assembly;Shape;Machine vision;Image recognition;Parameter estimation;Image databases},
doi={10.1109/CVPR.2004.383}}
""",
descriptive_stats={
"n_samples": {"test": 6084},
"avg_character_length": {"test": 431.4},
},
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from __future__ import annotations

from mteb.abstasks.TaskMetadata import TaskMetadata

from .....abstasks import AbsTaskImageClassification


class FGVCAircraftClassification(AbsTaskImageClassification):
metadata = TaskMetadata(
name="FGVCAircraft",
description="Classifying aircraft images from 41 manufacturers and 102 variants.",
reference="https://arxiv.org/abs/1306.5151",
dataset={
"path": "HuggingFaceM4/FGVC-Aircraft",
"revision": "91860adfc9a09aabca5cddb5247442109b38e213",
},
type="Classification",
category="s2s",
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="accuracy",
date=(
"2009-01-01",
"2010-01-01",
), # Estimated range for the collection of reviews
domains=["Encyclopaedic"],
task_subtypes=["Object recognition"],
license="Not specified",
socioeconomic_status="mixed",
annotations_creators="derived",
dialect=[],
modalities=["image"],
sample_creation="created",
bibtex_citation="""@misc{maji2013finegrainedvisualclassificationaircraft,
title={Fine-Grained Visual Classification of Aircraft},
author={Subhransu Maji and Esa Rahtu and Juho Kannala and Matthew Blaschko and Andrea Vedaldi},
year={2013},
eprint={1306.5151},
archivePrefix={arXiv},
primaryClass={cs.CV},
url={https://arxiv.org/abs/1306.5151},
}
""",
descriptive_stats={
"n_samples": {"test": 3333},
"avg_character_length": {"test": 431.4},
},
)
label_column_name: str = "variant" ## could be family, manufacturer, or variant. Variant has the higher number of classes.
45 changes: 45 additions & 0 deletions mteb/tasks/Image/ImageClassification/eng/Food101Classification.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from __future__ import annotations

from mteb.abstasks.TaskMetadata import TaskMetadata

from .....abstasks import AbsTaskImageClassification


class Food101Classification(AbsTaskImageClassification):
metadata = TaskMetadata(
name="Food101Classification",
description="Classifying food.",
reference="https://huggingface.co/datasets/ethz/food101",
dataset={
"path": "ethz/food101",
"revision": "e06acf2a88084f04bce4d4a525165d68e0a36c38",
},
type="Classification",
category="s2s",
eval_splits=["validation"],
eval_langs=["eng-Latn"],
main_score="accuracy",
date=(
"2013-01-01",
"2014-01-01",
), # Estimated range for the collection of reviews
domains=["Web"],
task_subtypes=["Object recognition"],
license="Not specified",
socioeconomic_status="mixed",
annotations_creators="derived",
dialect=[],
modalities=["image"],
sample_creation="created",
bibtex_citation=""" @inproceedings{bossard14,
title = {Food-101 -- Mining Discriminative Components with Random Forests},
author = {Bossard, Lukas and Guillaumin, Matthieu and Van Gool, Luc},
booktitle = {European Conference on Computer Vision},
year = {2014}
}
""",
descriptive_stats={
"n_samples": {"validation": 25300},
"avg_character_length": {"validation": 431.4},
},
)
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ class OxfordFlowersClassification(AbsTaskImageClassification):
"2012-01-01",
"2015-12-31",
), # Estimated range for the collection of reviews
form=["written"],
domains=["Reviews"],
task_subtypes=["Sentiment/Hate speech"],
license="Not specified",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from __future__ import annotations

from mteb.abstasks.TaskMetadata import TaskMetadata

from .....abstasks import AbsTaskImageClassification


class OxfordPetsClassification(AbsTaskImageClassification):
metadata = TaskMetadata(
name="OxfordPets",
description="Classifying animal images.",
reference="https://arxiv.org/abs/1306.5151",
dataset={
"path": "isaacchung/OxfordPets",
"revision": "557b480fae8d69247be74d9503b378a09425096f",
},
type="Classification",
category="s2s",
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="accuracy",
date=(
"2009-01-01",
"2010-01-01",
), # Estimated range for the collection of reviews
domains=["Encyclopaedic"],
task_subtypes=["Object recognition"],
license="Not specified",
socioeconomic_status="mixed",
annotations_creators="derived",
dialect=[],
modalities=["image"],
sample_creation="created",
bibtex_citation="""@misc{maji2013finegrainedvisualclassificationaircraft,
title={Fine-Grained Visual Classification of Aircraft},
author={Subhransu Maji and Esa Rahtu and Juho Kannala and Matthew Blaschko and Andrea Vedaldi},
year={2013},
eprint={1306.5151},
archivePrefix={arXiv},
primaryClass={cs.CV},
url={https://arxiv.org/abs/1306.5151},
}
""",
descriptive_stats={
"n_samples": {"test": 3669},
"avg_character_length": {"test": 431.4},
},
)
1 change: 0 additions & 1 deletion mteb/tasks/Image/T2IRetrieval/eng/MSCOCOT2IRetrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ class MSCOCOT2IRetrieval(AbsTaskT2IRetrieval):
eval_langs=["eng-Latn"],
main_score="ndcg_at_10",
date=("2018-01-01", "2018-12-31"),
form=["written"],
domains=["Encyclopaedic"],
task_subtypes=["Image Text Retrieval"],
license="CC BY-SA 4.0",
Expand Down
Loading

0 comments on commit 3f888fa

Please sign in to comment.