-
Notifications
You must be signed in to change notification settings - Fork 247
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[MIEB] Add image classification and zero shot classification tasks (#…
…1101) * fix task metadata * use overrideable column names * add CIFAR datasets * add caltech101 dataset * add FGVC aircraft dataset * add food 101 dataset * add OxfordPets dataset * remove comments * correct cifar100 path * update cifar100 classification results * cifar zero shot results * add caltech101 zero shot * matching CLIP paper implementation * add aircraft and food zero shot * add oxford pets zero shot
- Loading branch information
1 parent
b8561b8
commit 3f888fa
Showing
29 changed files
with
1,076 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,8 @@ | ||
from __future__ import annotations | ||
|
||
from .eng.Caltech101Classification import * | ||
from .eng.CIFAR import * | ||
from .eng.FGVCAircraftClassification import * | ||
from .eng.Food101Classification import * | ||
from .eng.OxfordFlowersClassification import * | ||
from .eng.OxfordPetsClassification import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
from __future__ import annotations | ||
|
||
from mteb.abstasks.TaskMetadata import TaskMetadata | ||
|
||
from .....abstasks import AbsTaskImageClassification | ||
|
||
|
||
class CIFAR10Classification(AbsTaskImageClassification): | ||
metadata = TaskMetadata( | ||
name="CIFAR10", | ||
description="Classifying images from 10 classes.", | ||
reference="https://huggingface.co/datasets/uoft-cs/cifar10", | ||
dataset={ | ||
"path": "uoft-cs/cifar10", | ||
"revision": "0b2714987fa478483af9968de7c934580d0bb9a2", | ||
}, | ||
type="Classification", | ||
category="s2s", | ||
eval_splits=["test"], | ||
eval_langs=["eng-Latn"], | ||
main_score="accuracy", | ||
date=( | ||
"2008-01-01", | ||
"2009-01-01", | ||
), # Estimated range for the collection of reviews | ||
domains=["Web"], | ||
task_subtypes=["Object recognition"], | ||
license="Not specified", | ||
socioeconomic_status="mixed", | ||
annotations_creators="derived", | ||
dialect=[], | ||
modalities=["image"], | ||
sample_creation="created", | ||
bibtex_citation=""" @TECHREPORT{Krizhevsky09learningmultiple, | ||
author = {Alex Krizhevsky}, | ||
title = {Learning multiple layers of features from tiny images}, | ||
institution = {}, | ||
year = {2009} | ||
} | ||
""", | ||
descriptive_stats={ | ||
"n_samples": {"test": 10000}, | ||
"avg_character_length": {"test": 431.4}, | ||
}, | ||
) | ||
image_column_name: str = "img" | ||
|
||
|
||
class CIFAR100Classification(AbsTaskImageClassification): | ||
metadata = TaskMetadata( | ||
name="CIFAR100", | ||
description="Classifying images from 100 classes.", | ||
reference="https://huggingface.co/datasets/uoft-cs/cifar100", | ||
dataset={ | ||
"path": "uoft-cs/cifar100", | ||
"revision": "aadb3af77e9048adbea6b47c21a81e47dd092ae5", | ||
}, | ||
type="Classification", | ||
category="s2s", | ||
eval_splits=["test"], | ||
eval_langs=["eng-Latn"], | ||
main_score="accuracy", | ||
date=( | ||
"2008-01-01", | ||
"2009-01-01", | ||
), # Estimated range for the collection of reviews | ||
domains=["Web"], | ||
task_subtypes=["Object recognition"], | ||
license="Not specified", | ||
socioeconomic_status="mixed", | ||
annotations_creators="derived", | ||
dialect=[], | ||
modalities=["image"], | ||
sample_creation="created", | ||
bibtex_citation=""" @TECHREPORT{Krizhevsky09learningmultiple, | ||
author = {Alex Krizhevsky}, | ||
title = {Learning multiple layers of features from tiny images}, | ||
institution = {}, | ||
year = {2009} | ||
} | ||
""", | ||
descriptive_stats={ | ||
"n_samples": {"test": 10000}, | ||
"avg_character_length": {"test": 431.4}, | ||
}, | ||
) | ||
image_column_name: str = "img" | ||
label_column_name: str = "fine_label" |
50 changes: 50 additions & 0 deletions
50
mteb/tasks/Image/ImageClassification/eng/Caltech101Classification.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
from __future__ import annotations | ||
|
||
from mteb.abstasks.TaskMetadata import TaskMetadata | ||
|
||
from .....abstasks import AbsTaskImageClassification | ||
|
||
|
||
class Caltech101Classification(AbsTaskImageClassification): | ||
metadata = TaskMetadata( | ||
name="Caltech101", | ||
description="Classifying images of 101 widely varied objects.", | ||
reference="https://ieeexplore.ieee.org/document/1384978", | ||
dataset={ | ||
"path": "HuggingFaceM4/Caltech-101", | ||
"name": "with_background_category", | ||
"revision": "851374102055782c84f89b1b4e9d128a6568847b", | ||
}, | ||
type="Classification", | ||
category="s2s", | ||
eval_splits=["test"], | ||
eval_langs=["eng-Latn"], | ||
main_score="accuracy", | ||
date=( | ||
"2003-01-01", | ||
"2004-01-01", | ||
), # Estimated range for the collection of reviews | ||
domains=["Encyclopaedic"], | ||
task_subtypes=["Object recognition"], | ||
license="Not specified", | ||
socioeconomic_status="mixed", | ||
annotations_creators="derived", | ||
dialect=[], | ||
modalities=["image"], | ||
sample_creation="created", | ||
bibtex_citation="""@INPROCEEDINGS{1384978, | ||
author={Li Fei-Fei and Fergus, R. and Perona, P.}, | ||
booktitle={2004 Conference on Computer Vision and Pattern Recognition Workshop}, | ||
title={Learning Generative Visual Models from Few Training Examples: An Incremental Bayesian Approach Tested on 101 Object Categories}, | ||
year={2004}, | ||
volume={}, | ||
number={}, | ||
pages={178-178}, | ||
keywords={Bayesian methods;Testing;Humans;Maximum likelihood estimation;Assembly;Shape;Machine vision;Image recognition;Parameter estimation;Image databases}, | ||
doi={10.1109/CVPR.2004.383}} | ||
""", | ||
descriptive_stats={ | ||
"n_samples": {"test": 6084}, | ||
"avg_character_length": {"test": 431.4}, | ||
}, | ||
) |
49 changes: 49 additions & 0 deletions
49
mteb/tasks/Image/ImageClassification/eng/FGVCAircraftClassification.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
from __future__ import annotations | ||
|
||
from mteb.abstasks.TaskMetadata import TaskMetadata | ||
|
||
from .....abstasks import AbsTaskImageClassification | ||
|
||
|
||
class FGVCAircraftClassification(AbsTaskImageClassification): | ||
metadata = TaskMetadata( | ||
name="FGVCAircraft", | ||
description="Classifying aircraft images from 41 manufacturers and 102 variants.", | ||
reference="https://arxiv.org/abs/1306.5151", | ||
dataset={ | ||
"path": "HuggingFaceM4/FGVC-Aircraft", | ||
"revision": "91860adfc9a09aabca5cddb5247442109b38e213", | ||
}, | ||
type="Classification", | ||
category="s2s", | ||
eval_splits=["test"], | ||
eval_langs=["eng-Latn"], | ||
main_score="accuracy", | ||
date=( | ||
"2009-01-01", | ||
"2010-01-01", | ||
), # Estimated range for the collection of reviews | ||
domains=["Encyclopaedic"], | ||
task_subtypes=["Object recognition"], | ||
license="Not specified", | ||
socioeconomic_status="mixed", | ||
annotations_creators="derived", | ||
dialect=[], | ||
modalities=["image"], | ||
sample_creation="created", | ||
bibtex_citation="""@misc{maji2013finegrainedvisualclassificationaircraft, | ||
title={Fine-Grained Visual Classification of Aircraft}, | ||
author={Subhransu Maji and Esa Rahtu and Juho Kannala and Matthew Blaschko and Andrea Vedaldi}, | ||
year={2013}, | ||
eprint={1306.5151}, | ||
archivePrefix={arXiv}, | ||
primaryClass={cs.CV}, | ||
url={https://arxiv.org/abs/1306.5151}, | ||
} | ||
""", | ||
descriptive_stats={ | ||
"n_samples": {"test": 3333}, | ||
"avg_character_length": {"test": 431.4}, | ||
}, | ||
) | ||
label_column_name: str = "variant" ## could be family, manufacturer, or variant. Variant has the higher number of classes. |
45 changes: 45 additions & 0 deletions
45
mteb/tasks/Image/ImageClassification/eng/Food101Classification.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
from __future__ import annotations | ||
|
||
from mteb.abstasks.TaskMetadata import TaskMetadata | ||
|
||
from .....abstasks import AbsTaskImageClassification | ||
|
||
|
||
class Food101Classification(AbsTaskImageClassification): | ||
metadata = TaskMetadata( | ||
name="Food101Classification", | ||
description="Classifying food.", | ||
reference="https://huggingface.co/datasets/ethz/food101", | ||
dataset={ | ||
"path": "ethz/food101", | ||
"revision": "e06acf2a88084f04bce4d4a525165d68e0a36c38", | ||
}, | ||
type="Classification", | ||
category="s2s", | ||
eval_splits=["validation"], | ||
eval_langs=["eng-Latn"], | ||
main_score="accuracy", | ||
date=( | ||
"2013-01-01", | ||
"2014-01-01", | ||
), # Estimated range for the collection of reviews | ||
domains=["Web"], | ||
task_subtypes=["Object recognition"], | ||
license="Not specified", | ||
socioeconomic_status="mixed", | ||
annotations_creators="derived", | ||
dialect=[], | ||
modalities=["image"], | ||
sample_creation="created", | ||
bibtex_citation=""" @inproceedings{bossard14, | ||
title = {Food-101 -- Mining Discriminative Components with Random Forests}, | ||
author = {Bossard, Lukas and Guillaumin, Matthieu and Van Gool, Luc}, | ||
booktitle = {European Conference on Computer Vision}, | ||
year = {2014} | ||
} | ||
""", | ||
descriptive_stats={ | ||
"n_samples": {"validation": 25300}, | ||
"avg_character_length": {"validation": 431.4}, | ||
}, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
48 changes: 48 additions & 0 deletions
48
mteb/tasks/Image/ImageClassification/eng/OxfordPetsClassification.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
from __future__ import annotations | ||
|
||
from mteb.abstasks.TaskMetadata import TaskMetadata | ||
|
||
from .....abstasks import AbsTaskImageClassification | ||
|
||
|
||
class OxfordPetsClassification(AbsTaskImageClassification): | ||
metadata = TaskMetadata( | ||
name="OxfordPets", | ||
description="Classifying animal images.", | ||
reference="https://arxiv.org/abs/1306.5151", | ||
dataset={ | ||
"path": "isaacchung/OxfordPets", | ||
"revision": "557b480fae8d69247be74d9503b378a09425096f", | ||
}, | ||
type="Classification", | ||
category="s2s", | ||
eval_splits=["test"], | ||
eval_langs=["eng-Latn"], | ||
main_score="accuracy", | ||
date=( | ||
"2009-01-01", | ||
"2010-01-01", | ||
), # Estimated range for the collection of reviews | ||
domains=["Encyclopaedic"], | ||
task_subtypes=["Object recognition"], | ||
license="Not specified", | ||
socioeconomic_status="mixed", | ||
annotations_creators="derived", | ||
dialect=[], | ||
modalities=["image"], | ||
sample_creation="created", | ||
bibtex_citation="""@misc{maji2013finegrainedvisualclassificationaircraft, | ||
title={Fine-Grained Visual Classification of Aircraft}, | ||
author={Subhransu Maji and Esa Rahtu and Juho Kannala and Matthew Blaschko and Andrea Vedaldi}, | ||
year={2013}, | ||
eprint={1306.5151}, | ||
archivePrefix={arXiv}, | ||
primaryClass={cs.CV}, | ||
url={https://arxiv.org/abs/1306.5151}, | ||
} | ||
""", | ||
descriptive_stats={ | ||
"n_samples": {"test": 3669}, | ||
"avg_character_length": {"test": 431.4}, | ||
}, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.