From 948dd47a50e8754f47cd6bc48f34c6225b30417e Mon Sep 17 00:00:00 2001
From: Roman Solomatin <36135455+Samoed@users.noreply.github.com>
Date: Sun, 29 Sep 2024 15:59:49 +0300
Subject: [PATCH 1/7] add datasets

---
 mteb/tasks/Reranking/__init__.py              |   1 +
 .../Reranking/multilingual/ESCIReranking.py   |  86 +++++++++++
 mteb/tasks/Retrieval/__init__.py              |   2 +
 mteb/tasks/Retrieval/jpn/JaqketRetrieval.py   |  46 ++++++
 .../Retrieval/multilingual/MrTidyRetrieval.py | 137 ++++++++++++++++++
 5 files changed, 272 insertions(+)
 create mode 100644 mteb/tasks/Reranking/multilingual/ESCIReranking.py
 create mode 100644 mteb/tasks/Retrieval/jpn/JaqketRetrieval.py
 create mode 100644 mteb/tasks/Retrieval/multilingual/MrTidyRetrieval.py

diff --git a/mteb/tasks/Reranking/__init__.py b/mteb/tasks/Reranking/__init__.py
index f96985d458..a4b302a17f 100644
--- a/mteb/tasks/Reranking/__init__.py
+++ b/mteb/tasks/Reranking/__init__.py
@@ -8,6 +8,7 @@
 from .fra.AlloprofReranking import *
 from .fra.SyntecReranking import *
 from .jpn.MMarcoReranking import *
+from .multilingual.ESCIReranking import *
 from .multilingual.MIRACLReranking import *
 from .multilingual.WikipediaRerankingMultilingual import *
 from .rus.RuBQReranking import *
diff --git a/mteb/tasks/Reranking/multilingual/ESCIReranking.py b/mteb/tasks/Reranking/multilingual/ESCIReranking.py
new file mode 100644
index 0000000000..39b7aa17c6
--- /dev/null
+++ b/mteb/tasks/Reranking/multilingual/ESCIReranking.py
@@ -0,0 +1,86 @@
+from __future__ import annotations
+
+import logging
+
+from mteb.abstasks.AbsTaskReranking import AbsTaskReranking
+from mteb.abstasks.MultilingualTask import MultilingualTask
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+logger = logging.getLogger(__name__)
+
+_EVAL_SPLIT = "test"
+_LANGUAGES = {
+    "us": ["eng-Latn"],
+    "es": ["spa-Latn"],
+    "jp": ["jpn-Jpan"],
+}
+
+_CITATION = """@article{reddy2022shopping,
+    title={Shopping Queries Dataset: A Large-Scale {ESCI} Benchmark for Improving Product Search},
+    author={Chandan K. Reddy and Lluís Màrquez and Fran Valero and Nikhil Rao and Hugo Zaragoza and Sambaran Bandyopadhyay and Arnab Biswas and Anlu Xing and Karthik Subbian},
+    year={2022},
+    eprint={2206.06588},
+    archivePrefix={arXiv}
+}"""
+
+
+class ESCIReranking(MultilingualTask, AbsTaskReranking):
+    metadata = TaskMetadata(
+        name="ESCIReranking",
+        description="",
+        reference="https://github.com/amazon-science/esci-data/",
+        dataset={
+            "path": "mteb/esci",
+            "revision": "237f74be0503482b4e8bc1b83778c7a87ea93fd8",
+        },
+        type="Reranking",
+        category="s2p",
+        modalities=["text"],
+        eval_splits=[_EVAL_SPLIT],
+        eval_langs=_LANGUAGES,
+        main_score="NDCG@10",
+        date=("2022-06-14", "2022-06-14"),
+        domains=["Written"],
+        task_subtypes=[],
+        license="apache-2.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="created",
+        bibtex_citation=_CITATION,
+        descriptive_stats={
+            "test": {
+                "num_samples": 29285,
+                "num_positive": 29285,
+                "num_negative": 29285,
+                "avg_query_len": 19.691890046098685,
+                "avg_positive_len": 9.268089465596722,
+                "avg_negative_len": 1.5105002561038074,
+                "hf_subset_descriptive_stats": {
+                    "us": {
+                        "num_samples": 21296,
+                        "num_positive": 21296,
+                        "num_negative": 21296,
+                        "avg_query_len": 21.440833959429,
+                        "avg_positive_len": 8.892515026296017,
+                        "avg_negative_len": 1.1956705484598047,
+                    },
+                    "es": {
+                        "num_samples": 3703,
+                        "num_positive": 3703,
+                        "num_negative": 3703,
+                        "avg_query_len": 20.681609505806104,
+                        "avg_positive_len": 10.561706724277613,
+                        "avg_negative_len": 2.749932487172563,
+                    },
+                    "jp": {
+                        "num_samples": 4286,
+                        "num_positive": 4286,
+                        "num_negative": 4286,
+                        "avg_query_len": 10.146756882874476,
+                        "avg_positive_len": 10.016565562295847,
+                        "avg_negative_len": 2.003966402239851,
+                    },
+                },
+            }
+        },
+    )
diff --git a/mteb/tasks/Retrieval/__init__.py b/mteb/tasks/Retrieval/__init__.py
index 3975cd9bd3..a25eec33b4 100644
--- a/mteb/tasks/Retrieval/__init__.py
+++ b/mteb/tasks/Retrieval/__init__.py
@@ -94,6 +94,7 @@
 from .fra.SyntecRetrieval import *
 from .hun.HunSum2 import *
 from .jpn.JaGovFaqsRetrieval import *
+from .jpn.JaqketRetrieval import *
 from .jpn.JaQuADRetrieval import *
 from .jpn.NLPJournalAbsIntroRetrieval import *
 from .jpn.NLPJournalTitleAbsRetrieval import *
@@ -107,6 +108,7 @@
 from .multilingual.MintakaRetrieval import *
 from .multilingual.MIRACLRetrieval import *
 from .multilingual.MLQARetrieval import *
+from .multilingual.MrTidyRetrieval import *
 from .multilingual.MultiLongDocRetrieval import *
 from .multilingual.NeuCLIR2022Retrieval import *
 from .multilingual.NeuCLIR2023Retrieval import *
diff --git a/mteb/tasks/Retrieval/jpn/JaqketRetrieval.py b/mteb/tasks/Retrieval/jpn/JaqketRetrieval.py
new file mode 100644
index 0000000000..6c3d6a86ca
--- /dev/null
+++ b/mteb/tasks/Retrieval/jpn/JaqketRetrieval.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class JaqketRetrieval(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="JaqketRetrieval",
+        dataset={
+            "path": "mteb/jaqket",
+            "revision": "3a5b92dad489a61e664c05ed2175bc9220230199",
+        },
+        description="JAQKET (JApanese Questions on Knowledge of EnTities) is a QA dataset that is created based on quiz questions.",
+        reference="https://github.com/kumapo/JAQKET-dataset",
+        type="Retrieval",
+        category="s2p",
+        modalities=["text"],
+        eval_splits=["test"],
+        eval_langs=["jpn-Jpan"],
+        main_score="ndcg_at_10",
+        date=("2023-10-09", "2023-10-09"),
+        domains=["Encyclopaedic", "Non-fiction", "Written"],
+        task_subtypes=["Question answering"],
+        license="cc-by-sa-4.0",
+        annotations_creators="human-annotated",
+        dialect=None,
+        sample_creation="found",
+        bibtex_citation="""@InProceedings{Kurihara_nlp2020,
+author =  "鈴木正敏 and 鈴木潤 and 松田耕史 and ⻄田京介 and 井之上直也",
+title =   "JAQKET: クイズを題材にした日本語 QA データセットの構築",
+booktitle =   "言語処理学会第26回年次大会",
+year =    "2020",
+url = "https://www.anlp.jp/proceedings/annual_meeting/2020/pdf_dir/P2-24.pdf"
+note= "in Japanese"
+}""",
+        descriptive_stats={
+            "test": {
+                "average_document_length": 3747.995228882333,
+                "average_query_length": 50.70611835506519,
+                "num_documents": 114229,
+                "num_queries": 997,
+                "average_relevant_docs_per_query": 1.0,
+            }
+        },
+    )
diff --git a/mteb/tasks/Retrieval/multilingual/MrTidyRetrieval.py b/mteb/tasks/Retrieval/multilingual/MrTidyRetrieval.py
new file mode 100644
index 0000000000..afb76f2e48
--- /dev/null
+++ b/mteb/tasks/Retrieval/multilingual/MrTidyRetrieval.py
@@ -0,0 +1,137 @@
+from __future__ import annotations
+
+import logging
+
+import datasets
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.MultilingualTask import MultilingualTask
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+_EVAL_LANGS = {
+    "bengali": ["ben-Beng"],
+    "english": ["eng-Latn"],
+    "finnish": ["fin-Latn"],
+    "russian": ["rus-Cyrl"],
+    "korean": ["kor-Kore"],
+    "japanese": ["jpn-Jpan"],
+    "telugu": ["tel-Telu"],
+    "thai": ["tha-Thai"],
+    "swahili": ["swa-Latn"],
+    "arabic": ["ara-Arab"],
+    "indonesian": ["ind-Latn"],
+}
+_EVAL_SPLIT = "test"
+
+logger = logging.getLogger(__name__)
+
+
+def _load_code_search_code_retrieval(
+    path: str, langs: list, splits: str, cache_dir: str = None, revision: str = None
+):
+    corpus = {lang: {split: {} for split in splits} for lang in langs}
+    queries = {lang: {split: {} for split in splits} for lang in langs}
+    relevant_docs = {lang: {split: {} for split in splits} for lang in langs}
+
+    split = _EVAL_SPLIT
+
+    for lang in langs:
+        qrels_data = datasets.load_dataset(
+            path,
+            name=f"{lang}-qrels",
+            cache_dir=cache_dir,
+            revision=revision,
+            trust_remote_code=True,
+        )[split]
+
+        for row in qrels_data:
+            query_id = row["query-id"]
+            doc_id = row["corpus-id"]
+            score = row["score"]
+            if query_id not in relevant_docs[lang][split]:
+                relevant_docs[lang][split][query_id] = {}
+            relevant_docs[lang][split][query_id][doc_id] = score
+
+        corpus_data = datasets.load_dataset(
+            path,
+            name=f"{lang}-corpus",
+            cache_dir=cache_dir,
+            revision=revision,
+            trust_remote_code=True,
+        )["train"]
+
+        for row in corpus_data:
+            doc_id = row["_id"]
+            doc_title = row["title"]
+            doc_text = row["text"]
+            corpus[lang][split][doc_id] = {"title": doc_title, "text": doc_text}
+
+        queries_data = datasets.load_dataset(
+            path,
+            name=f"{lang}-queries",
+            cache_dir=cache_dir,
+            revision=revision,
+            trust_remote_code=True,
+        )[split]
+
+        for row in queries_data:
+            query_id = row["_id"]
+            query_text = row["text"]
+            queries[lang][split][query_id] = query_text
+
+        queries = queries
+        logger.info("Loaded %d %s Queries.", len(queries), split.upper())
+
+    return corpus, queries, relevant_docs
+
+
+class MrTidyRetrieval(MultilingualTask, AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="MrTidyRetrieval",
+        description="Mr. TyDi is a multi-lingual benchmark dataset built on TyDi, covering eleven typologically diverse languages. It is designed for monolingual retrieval, specifically to evaluate ranking with learned dense representations.",
+        reference="https://huggingface.co/datasets/castorini/mr-tydi",
+        dataset={
+            "path": "mteb/mrtidy",
+            "revision": "fc24a3ce8f09746410daee3d5cd823ff7a0675b7",
+        },
+        type="Retrieval",
+        category="s2p",
+        modalities=["text"],
+        eval_splits=["test"],
+        eval_langs=_EVAL_LANGS,
+        main_score="map",
+        date=("2023-11-01", "2024-05-15"),
+        domains=["Encyclopaedic", "Written"],
+        task_subtypes=[],
+        license="cc-by-sa-3.0",
+        annotations_creators="human-annotated",
+        dialect=[],
+        sample_creation="found",
+        bibtex_citation="""@article{mrtydi,
+              title={{Mr. TyDi}: A Multi-lingual Benchmark for Dense Retrieval}, 
+              author={Xinyu Zhang and Xueguang Ma and Peng Shi and Jimmy Lin},
+              year={2021},
+              journal={arXiv:2108.08787},
+        }""",
+        descriptive_stats={},
+    )
+
+    def load_data(self, **kwargs):
+        if self.data_loaded:
+            return
+
+        self.corpus, self.queries, self.relevant_docs = (
+            _load_code_search_code_retrieval(
+                path=self.metadata_dict["dataset"]["path"],
+                langs=self.hf_subsets,
+                splits=self.metadata_dict["eval_splits"],
+                cache_dir=kwargs.get("cache_dir", None),
+                revision=self.metadata_dict["dataset"]["revision"],
+            )
+        )
+
+        self.data_loaded = True
+
+
+if __name__ == "__main__":
+    print(MrTidyRetrieval().calculate_metadata_metrics())

From ce5c7462f8a9a27d3b4bc8eb21035424e60e42e3 Mon Sep 17 00:00:00 2001
From: Roman Solomatin <36135455+Samoed@users.noreply.github.com>
Date: Sun, 29 Sep 2024 17:30:30 +0300
Subject: [PATCH 2/7] fix metrics

---
 .../Reranking/multilingual/ESCIReranking.py   |  2 +-
 .../Retrieval/multilingual/MrTidyRetrieval.py | 22 +++++++------------
 2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/mteb/tasks/Reranking/multilingual/ESCIReranking.py b/mteb/tasks/Reranking/multilingual/ESCIReranking.py
index 39b7aa17c6..c3597c2fdf 100644
--- a/mteb/tasks/Reranking/multilingual/ESCIReranking.py
+++ b/mteb/tasks/Reranking/multilingual/ESCIReranking.py
@@ -38,7 +38,7 @@ class ESCIReranking(MultilingualTask, AbsTaskReranking):
         modalities=["text"],
         eval_splits=[_EVAL_SPLIT],
         eval_langs=_LANGUAGES,
-        main_score="NDCG@10",
+        main_score="map",
         date=("2022-06-14", "2022-06-14"),
         domains=["Written"],
         task_subtypes=[],
diff --git a/mteb/tasks/Retrieval/multilingual/MrTidyRetrieval.py b/mteb/tasks/Retrieval/multilingual/MrTidyRetrieval.py
index afb76f2e48..f7bf5f9dc8 100644
--- a/mteb/tasks/Retrieval/multilingual/MrTidyRetrieval.py
+++ b/mteb/tasks/Retrieval/multilingual/MrTidyRetrieval.py
@@ -26,7 +26,7 @@
 logger = logging.getLogger(__name__)
 
 
-def _load_code_search_code_retrieval(
+def _load_data_retrieval(
     path: str, langs: list, splits: str, cache_dir: str = None, revision: str = None
 ):
     corpus = {lang: {split: {} for split in splits} for lang in langs}
@@ -99,7 +99,7 @@ class MrTidyRetrieval(MultilingualTask, AbsTaskRetrieval):
         modalities=["text"],
         eval_splits=["test"],
         eval_langs=_EVAL_LANGS,
-        main_score="map",
+        main_score="ndcg_at_10",
         date=("2023-11-01", "2024-05-15"),
         domains=["Encyclopaedic", "Written"],
         task_subtypes=[],
@@ -120,18 +120,12 @@ def load_data(self, **kwargs):
         if self.data_loaded:
             return
 
-        self.corpus, self.queries, self.relevant_docs = (
-            _load_code_search_code_retrieval(
-                path=self.metadata_dict["dataset"]["path"],
-                langs=self.hf_subsets,
-                splits=self.metadata_dict["eval_splits"],
-                cache_dir=kwargs.get("cache_dir", None),
-                revision=self.metadata_dict["dataset"]["revision"],
-            )
+        self.corpus, self.queries, self.relevant_docs = _load_data_retrieval(
+            path=self.metadata_dict["dataset"]["path"],
+            langs=self.hf_subsets,
+            splits=self.metadata_dict["eval_splits"],
+            cache_dir=kwargs.get("cache_dir", None),
+            revision=self.metadata_dict["dataset"]["revision"],
         )
 
         self.data_loaded = True
-
-
-if __name__ == "__main__":
-    print(MrTidyRetrieval().calculate_metadata_metrics())

From 456df8214a8168a644e7278e6de1a5f8d7183355 Mon Sep 17 00:00:00 2001
From: Roman Solomatin <36135455+Samoed@users.noreply.github.com>
Date: Sun, 29 Sep 2024 18:17:10 +0300
Subject: [PATCH 3/7] add Touche2020v3

---
 .../Retrieval/eng/Touche2020Retrieval.py      | 44 ++++++++++++++++++-
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/mteb/tasks/Retrieval/eng/Touche2020Retrieval.py b/mteb/tasks/Retrieval/eng/Touche2020Retrieval.py
index 2c9dc8df41..7ef2c1b7d9 100644
--- a/mteb/tasks/Retrieval/eng/Touche2020Retrieval.py
+++ b/mteb/tasks/Retrieval/eng/Touche2020Retrieval.py
@@ -1,9 +1,8 @@
 from __future__ import annotations
 
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
 from mteb.abstasks.TaskMetadata import TaskMetadata
 
-from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval
-
 
 class Touche2020(AbsTaskRetrieval):
     metadata = TaskMetadata(
@@ -57,3 +56,44 @@ class Touche2020(AbsTaskRetrieval):
             },
         },
     )
+
+
+class Touche2020v3(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="Touche2020v3",
+        description="Touché Task 1: Argument Retrieval for Controversial Questions",
+        reference="https://github.com/castorini/touche-error-analysis",
+        dataset={
+            "path": "mteb/webis-touche2020-v3",
+            "revision": "431886eaecc48f067a3975b70d0949ea2862463c",
+        },
+        type="Retrieval",
+        category="s2p",
+        modalities=["text"],
+        eval_splits=["test"],
+        eval_langs=["eng-Latn"],
+        main_score="ndcg_at_10",
+        date=None,
+        domains=None,
+        task_subtypes=None,
+        license="cc-by-sa-4.0",
+        annotations_creators=None,
+        dialect=None,
+        sample_creation=None,
+        bibtex_citation="""@INPROCEEDINGS{Thakur_etal_SIGIR2024,
+   author = "Nandan Thakur and Luiz Bonifacio and Maik {Fr\"{o}be} and Alexander Bondarenko and Ehsan Kamalloo and Martin Potthast and Matthias Hagen and Jimmy Lin",
+   title = "Systematic Evaluation of Neural Retrieval Models on the {Touch\'{e}} 2020 Argument Retrieval Subset of {BEIR}",
+   booktitle = "Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval",
+   year = 2024,
+   address_ = "Washington, D.C."
+}""",
+        descriptive_stats={
+            "test": {
+                "average_document_length": 2096.391812518931,
+                "average_query_length": 43.42857142857143,
+                "num_documents": 303732,
+                "num_queries": 49,
+                "average_relevant_docs_per_query": 34.93877551020408,
+            }
+        },
+    )

From 38c0b56bb317ab1b894f47bd523619645421e3a8 Mon Sep 17 00:00:00 2001
From: Roman Solomatin <36135455+Samoed@users.noreply.github.com>
Date: Sun, 29 Sep 2024 22:45:33 +0300
Subject: [PATCH 4/7] fix metadata

---
 .../Retrieval/eng/Touche2020Retrieval.py      | 26 +++++++++----------
 mteb/tasks/Retrieval/jpn/JaqketRetrieval.py   |  2 +-
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/mteb/tasks/Retrieval/eng/Touche2020Retrieval.py b/mteb/tasks/Retrieval/eng/Touche2020Retrieval.py
index 7ef2c1b7d9..a1277406f1 100644
--- a/mteb/tasks/Retrieval/eng/Touche2020Retrieval.py
+++ b/mteb/tasks/Retrieval/eng/Touche2020Retrieval.py
@@ -19,13 +19,13 @@ class Touche2020(AbsTaskRetrieval):
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="ndcg_at_10",
-        date=None,
-        domains=None,
-        task_subtypes=None,
-        license=None,
-        annotations_creators=None,
-        dialect=None,
-        sample_creation=None,
+        date=("2020-09-23", "2020-09-23"),
+        domains=["Academic"],
+        task_subtypes=["Question answering"],
+        license="cc-by-sa-4.0",
+        annotations_creators="human-annotated",
+        dialect=[],
+        sample_creation="found",
         bibtex_citation="""@dataset{potthast_2022_6862281,
   author       = {Potthast, Martin and
                   Gienapp, Lukas and
@@ -73,13 +73,13 @@ class Touche2020v3(AbsTaskRetrieval):
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="ndcg_at_10",
-        date=None,
-        domains=None,
-        task_subtypes=None,
+        date=("2020-09-23", "2020-09-23"),
+        domains=["Academic"],
+        task_subtypes=["Question answering"],
         license="cc-by-sa-4.0",
-        annotations_creators=None,
-        dialect=None,
-        sample_creation=None,
+        annotations_creators="human-annotated",
+        dialect=[],
+        sample_creation="found",
         bibtex_citation="""@INPROCEEDINGS{Thakur_etal_SIGIR2024,
    author = "Nandan Thakur and Luiz Bonifacio and Maik {Fr\"{o}be} and Alexander Bondarenko and Ehsan Kamalloo and Martin Potthast and Matthias Hagen and Jimmy Lin",
    title = "Systematic Evaluation of Neural Retrieval Models on the {Touch\'{e}} 2020 Argument Retrieval Subset of {BEIR}",
diff --git a/mteb/tasks/Retrieval/jpn/JaqketRetrieval.py b/mteb/tasks/Retrieval/jpn/JaqketRetrieval.py
index 6c3d6a86ca..0af7d06772 100644
--- a/mteb/tasks/Retrieval/jpn/JaqketRetrieval.py
+++ b/mteb/tasks/Retrieval/jpn/JaqketRetrieval.py
@@ -24,7 +24,7 @@ class JaqketRetrieval(AbsTaskRetrieval):
         task_subtypes=["Question answering"],
         license="cc-by-sa-4.0",
         annotations_creators="human-annotated",
-        dialect=None,
+        dialect=[],
         sample_creation="found",
         bibtex_citation="""@InProceedings{Kurihara_nlp2020,
 author =  "鈴木正敏 and 鈴木潤 and 松田耕史 and ⻄田京介 and 井之上直也",

From fbe737edb0750a9596ec5793266563a6d8a62fa3 Mon Sep 17 00:00:00 2001
From: Roman Solomatin <samoed.roman@gmail.com>
Date: Thu, 3 Oct 2024 14:15:02 +0300
Subject: [PATCH 5/7] Apply suggestions from code review

Co-authored-by: Kenneth Enevoldsen <kennethcenevoldsen@gmail.com>
---
 mteb/tasks/Retrieval/eng/Touche2020Retrieval.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mteb/tasks/Retrieval/eng/Touche2020Retrieval.py b/mteb/tasks/Retrieval/eng/Touche2020Retrieval.py
index a1277406f1..045ac083ab 100644
--- a/mteb/tasks/Retrieval/eng/Touche2020Retrieval.py
+++ b/mteb/tasks/Retrieval/eng/Touche2020Retrieval.py
@@ -58,9 +58,9 @@ class Touche2020(AbsTaskRetrieval):
     )
 
 
-class Touche2020v3(AbsTaskRetrieval):
+class Touche2020v3Retrieval(AbsTaskRetrieval):
     metadata = TaskMetadata(
-        name="Touche2020v3",
+        name="Touche2020v3Retrieval",
         description="Touché Task 1: Argument Retrieval for Controversial Questions",
         reference="https://github.com/castorini/touche-error-analysis",
         dataset={

From 1a56a469a1467ae3f9faad0b47fcf0a38e0204bf Mon Sep 17 00:00:00 2001
From: Roman Solomatin <36135455+Samoed@users.noreply.github.com>
Date: Thu, 3 Oct 2024 14:22:00 +0300
Subject: [PATCH 6/7] upd name and supress

---
 mteb/tasks/Retrieval/eng/Touche2020Retrieval.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mteb/tasks/Retrieval/eng/Touche2020Retrieval.py b/mteb/tasks/Retrieval/eng/Touche2020Retrieval.py
index 045ac083ab..01b955b19d 100644
--- a/mteb/tasks/Retrieval/eng/Touche2020Retrieval.py
+++ b/mteb/tasks/Retrieval/eng/Touche2020Retrieval.py
@@ -5,6 +5,8 @@
 
 
 class Touche2020(AbsTaskRetrieval):
+    superseded_by = "Touche2020Retrieval.v3"
+
     metadata = TaskMetadata(
         name="Touche2020",
         description="Touché Task 1: Argument Retrieval for Controversial Questions",
@@ -60,7 +62,7 @@ class Touche2020(AbsTaskRetrieval):
 
 class Touche2020v3Retrieval(AbsTaskRetrieval):
     metadata = TaskMetadata(
-        name="Touche2020v3Retrieval",
+        name="Touche2020Retrieval.v3",
         description="Touché Task 1: Argument Retrieval for Controversial Questions",
         reference="https://github.com/castorini/touche-error-analysis",
         dataset={

From 049c914a58f21987a0c37122822b7e8ee9f4fc6b Mon Sep 17 00:00:00 2001
From: Roman Solomatin <36135455+Samoed@users.noreply.github.com>
Date: Thu, 3 Oct 2024 14:33:33 +0300
Subject: [PATCH 7/7] add benchmark class

---
 mteb/benchmarks/benchmarks.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py
index ccb266aacb..c40766045c 100644
--- a/mteb/benchmarks/benchmarks.py
+++ b/mteb/benchmarks/benchmarks.py
@@ -687,3 +687,37 @@ def __getitem__(self, index):
     reference=None,
     citation=None,
 )
+
+MTEB_JPN = Benchmark(
+    name="MTEB(jpn)",
+    tasks=get_tasks(
+        languages=["jpn"],
+        tasks=[
+            # clustering
+            "LivedoorNewsClustering.v2",
+            "MewsC16JaClustering",
+            # classification
+            "AmazonReviewsClassification",
+            "AmazonCounterfactualClassification",
+            "MassiveIntentClassification",
+            "MassiveScenarioClassification",
+            # STS
+            "JSTS",
+            "JSICK",
+            # pair classification
+            "PawsXPairClassification",
+            # retrieval
+            "JaqketRetrieval",
+            "MrTidyRetrieval",
+            "JaGovFaqsRetrieval",
+            "NLPJournalTitleAbsRetrieval",
+            "NLPJournalAbsIntroRetrieval",
+            "NLPJournalTitleIntroRetrieval",
+            # reranking
+            "ESCIReranking",
+        ],
+    ),
+    description="Main Japanese benchmarks from MTEB",
+    reference="https://github.com/sbintuitions/JMTEB",
+    citation=None,
+)