diff --git a/.gitignore b/.gitignore
index 4bf5d365..52ed372d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,163 @@
*.DS_Store
-.env*
-__pycache__
ChromaDB
-models
-.vscode
\ No newline at end of file
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+# in version control.
+# https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env*
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
\ No newline at end of file
diff --git a/.python-version b/.python-version
deleted file mode 100644
index c8cfe395..00000000
--- a/.python-version
+++ /dev/null
@@ -1 +0,0 @@
-3.10
diff --git a/README.md b/README.md
index a85817af..03e6d2cf 100644
--- a/README.md
+++ b/README.md
@@ -1,93 +1,70 @@
-# MTEB Scripts
+# Scripts to run the French MTEB benchmark
-This repository contains scripts used for [MTEB](https://github.com/embeddings-benchmark/mteb) benchmarking. Some scripts rely on a results folder, which can be obtained via `git clone https://huggingface.co/datasets/mteb/results`.
+This folder contains the scripts used to generate the French tab results on the [MTEB](https://github.com/embeddings-benchmark/mteb) benchmark.
-
+Below are instructions to run the main scripts.
-- [MTEB Scripts](#mteb-scripts)
- - [Benchmark](#benchmark)
- - [Env Setup](#env-setup)
- - [Model setup](#model-setup)
- - [Download](#download)
- - [Load](#load)
+## Benchmark
-
+### Running on host using venv
-## Benchmark
+* Navigate to the repository root folder
+* Create your virtual env:
-Basic with Internet
-```python
-from mteb import MTEB
-from sentence_transformers import SentenceTransformer
-model_path = "/gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-nli-bitfit"
-model_name = model_path.split("/")[-1].split("_")[-1]
-model = SentenceTransformer(model_path)
-evaluation = MTEB(tasks=["Banking77Classification"])
-evaluation.run(model, output_folder=f"results/{model_name}")
+```bash
+python3 -m venv .venv
```
-
-No Internet Access (Download data first)
-```python
-import os
-os.environ["HF_DATASETS_OFFLINE"]="1" # 1 for offline
-os.environ["TRANSFORMERS_OFFLINE"]="1" # 1 for offline
-os.environ["TRANSFORMERS_CACHE"]="/gpfswork/rech/six/commun/models"
-os.environ["HF_DATASETS_CACHE"]="/gpfswork/rech/six/commun/datasets"
-os.environ["HF_MODULES_CACHE"]="/gpfswork/rech/six/commun/modules"
-os.environ["HF_METRICS_CACHE"]="/gpfswork/rech/six/commun/metrics"
-from mteb import MTEB
-from sentence_transformers import SentenceTransformer
-model_path = "/gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-nli-bitfit"
-model_name = model_path.split("/")[-1].split("_")[-1]
-model = SentenceTransformer(model_path)
-evaluation = MTEB(tasks=["Banking77Classification"])
-evaluation.run(model, output_folder=f"results/{model_name}")
+* Activate it and install the requirements:
+```bash
+source .venv/bin/activate
+pip install -r requirements.txt
```
-
-
-## Env Setup
-
+* Run the benchmark:
```bash
-export CONDA_ENVS_PATH=$six_ALL_CCFRWORK/conda
-
-conda create -y -n hf-prod python=3.8
-conda activate hf-prod
+cd script_mteb_french
+python run_benchmark.py
+```
-# pt-1.10.1 / cuda 11.3
-conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch
+By default the benchmark runs on sentence_transformer models but you can specify the type with the argument `--model_type`:
+```bash
+# default ['sentence_transformer']
+python run_benchmark.py
+# choosing other type ['voyage_ai']
+python run_benchmark.py --model_type voyage_ai
+# running on two types ['voyage_ai', 'sentence_transformer']
+python run_benchmark.py --model_type voyage_ai sentence_transformer
+```
-# Custom fork that uses offline datasets
-!pip install --upgrade git+https://github.com/Muennighoff/mteb.git@offlineaccess
-!pip install --upgrade git+https://github.com/Muennighoff/sentence-transformers.git@sgpt_poolings
-# If you want to run BEIR tasks
-!pip install --upgrade git+https://github.com/beir-cellar/beir.git
+You can also run the benchmark on one model only by specifying `--model_name`:
+```bash
+# default ['sentence_transformer'] -> all models of this type
+python run_benchmark.py
+# running on one model 'camembert-base'
+python run_benchmark.py --model_type sentence_transformer --model_name "xlm-roberta-base"
```
+Note that the `model_name` should be included in models of specified `model_type`.
-## Model setup
+You can run the benchmark on one task type in ["all", "classification", "clustering", "reranking", "retrieval", "pair_classification", "sts", "summarization", "bitextmining"], default is set to "all" and will run all tasks :
+```bash
+# running 'sentence_transformer' models on 'classification' task
+python run_benchmark.py --model_type sentence_transformer --task_type classification
+```
-### Download
+## Running using Docker
-```python
-import os
-import sentence_transformers
-os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/gpfswork/rech/six/commun/models"
-sentence_transformers_cache_dir = os.getenv("SENTENCE_TRANSFORMERS_HOME")
-model_repo="sentence-transformers/allenai-specter"
-revision="29f9f45ff2a85fe9dfe8ce2cef3d8ec4e65c5f37"
-model_path = os.path.join(sentence_transformers_cache_dir, model_repo.replace("/", "_"))
-model_path_tmp = sentence_transformers.util.snapshot_download(
- repo_id=model_repo,
- revision=revision,
- cache_dir=sentence_transformers_cache_dir,
- library_name="sentence-transformers",
- library_version=sentence_transformers.__version__,
- ignore_files=["flax_model.msgpack", "rust_model.ot", "tf_model.h5",],
-)
-os.rename(model_path_tmp, model_path)
+* Navigate to the repository root folder
+* Build the docker image:
+```bash
+docker build -t mtebscripts_image .
+```
+* Run the benchmark in the container as follows:
+```
+docker run -v $(pwd):/mtebscripts mtebscripts_image sh -c "cd script_mteb_french && python run_benchmark.py"
```
+If you want to use the gpu, make sure to add the `--gpus` option to your run command, or `--runtime=nvidia` if you are using an older version of docker.
-### Load
+Note: Because the volume is shared between the host and the container, the results will be available in the host at the end.
-```python
-model = SentenceTransformer("/gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-nli-bitfit")
-```
+## Models' characteristics
+
+Additionnaly, you can find a script `get_model_specs.py` to compute models' characteristics (size, number of params, embeddings dimension). You can run it similarly to the benchmark by substituting `run_benchmark.py` with `get_model_specs.py`.
diff --git a/README_orig.md b/README_orig.md
new file mode 100644
index 00000000..a85817af
--- /dev/null
+++ b/README_orig.md
@@ -0,0 +1,93 @@
+# MTEB Scripts
+
+This repository contains scripts used for [MTEB](https://github.com/embeddings-benchmark/mteb) benchmarking. Some scripts rely on a results folder, which can be obtained via `git clone https://huggingface.co/datasets/mteb/results`.
+
+
+
+- [MTEB Scripts](#mteb-scripts)
+ - [Benchmark](#benchmark)
+ - [Env Setup](#env-setup)
+ - [Model setup](#model-setup)
+ - [Download](#download)
+ - [Load](#load)
+
+
+
+## Benchmark
+
+Basic with Internet
+```python
+from mteb import MTEB
+from sentence_transformers import SentenceTransformer
+model_path = "/gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-nli-bitfit"
+model_name = model_path.split("/")[-1].split("_")[-1]
+model = SentenceTransformer(model_path)
+evaluation = MTEB(tasks=["Banking77Classification"])
+evaluation.run(model, output_folder=f"results/{model_name}")
+```
+
+No Internet Access (Download data first)
+```python
+import os
+os.environ["HF_DATASETS_OFFLINE"]="1" # 1 for offline
+os.environ["TRANSFORMERS_OFFLINE"]="1" # 1 for offline
+os.environ["TRANSFORMERS_CACHE"]="/gpfswork/rech/six/commun/models"
+os.environ["HF_DATASETS_CACHE"]="/gpfswork/rech/six/commun/datasets"
+os.environ["HF_MODULES_CACHE"]="/gpfswork/rech/six/commun/modules"
+os.environ["HF_METRICS_CACHE"]="/gpfswork/rech/six/commun/metrics"
+from mteb import MTEB
+from sentence_transformers import SentenceTransformer
+model_path = "/gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-nli-bitfit"
+model_name = model_path.split("/")[-1].split("_")[-1]
+model = SentenceTransformer(model_path)
+evaluation = MTEB(tasks=["Banking77Classification"])
+evaluation.run(model, output_folder=f"results/{model_name}")
+```
+
+
+## Env Setup
+
+```bash
+export CONDA_ENVS_PATH=$six_ALL_CCFRWORK/conda
+
+conda create -y -n hf-prod python=3.8
+conda activate hf-prod
+
+# pt-1.10.1 / cuda 11.3
+conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch
+
+# Custom fork that uses offline datasets
+!pip install --upgrade git+https://github.com/Muennighoff/mteb.git@offlineaccess
+!pip install --upgrade git+https://github.com/Muennighoff/sentence-transformers.git@sgpt_poolings
+# If you want to run BEIR tasks
+!pip install --upgrade git+https://github.com/beir-cellar/beir.git
+```
+
+## Model setup
+
+### Download
+
+```python
+import os
+import sentence_transformers
+os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/gpfswork/rech/six/commun/models"
+sentence_transformers_cache_dir = os.getenv("SENTENCE_TRANSFORMERS_HOME")
+model_repo="sentence-transformers/allenai-specter"
+revision="29f9f45ff2a85fe9dfe8ce2cef3d8ec4e65c5f37"
+model_path = os.path.join(sentence_transformers_cache_dir, model_repo.replace("/", "_"))
+model_path_tmp = sentence_transformers.util.snapshot_download(
+ repo_id=model_repo,
+ revision=revision,
+ cache_dir=sentence_transformers_cache_dir,
+ library_name="sentence-transformers",
+ library_version=sentence_transformers.__version__,
+ ignore_files=["flax_model.msgpack", "rust_model.ot", "tf_model.h5",],
+)
+os.rename(model_path_tmp, model_path)
+```
+
+### Load
+
+```python
+model = SentenceTransformer("/gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-nli-bitfit")
+```
diff --git a/script_mteb_french/results_analysis/models_characteristics.csv b/assets/models_characteristics.csv
similarity index 100%
rename from script_mteb_french/results_analysis/models_characteristics.csv
rename to assets/models_characteristics.csv
diff --git a/download_tasks.py b/download_tasks.py
deleted file mode 100644
index 0fc34286..00000000
--- a/download_tasks.py
+++ /dev/null
@@ -1,79 +0,0 @@
-"""Downloads all MTEB tasks"""
-
-TASK_LIST = [
- "AmazonCounterfactualClassification",
- "AmazonPolarityClassification",
- "AmazonReviewsClassification",
- "Banking77Classification",
- "EmotionClassification",
- "ImdbClassification",
- "MassiveIntentClassification",
- "MassiveScenarioClassification",
- "MTOPDomainClassification",
- "MTOPIntentClassification",
- "ToxicConversationsClassification",
- "TweetSentimentExtractionClassification",
- "ArxivClusteringP2P",
- "ArxivClusteringS2S",
- "BiorxivClusteringP2P",
- "BiorxivClusteringS2S",
- "MedrxivClusteringP2P",
- "MedrxivClusteringS2S",
- "RedditClustering",
- "RedditClusteringP2P",
- "StackExchangeClustering",
- "StackExchangeClusteringP2P",
- "TwentyNewsgroupsClustering",
- "SprintDuplicateQuestions",
- "TwitterSemEval2015",
- "TwitterURLCorpus",
- "AskUbuntuDupQuestions",
- "MindSmallReranking",
- "SciDocsRR",
- "StackOverflowDupQuestions",
- "ArguAna",
- "ClimateFEVER",
- "DBPedia",
- "FEVER",
- "FiQA2018",
- "HotpotQA",
- "MSMARCO",
- "NFCorpus",
- "NQ",
- "QuoraRetrieval",
- "SCIDOCS",
- "SciFact",
- "Touche2020",
- "TRECCOVID",
- "BIOSSES",
- "SICK-R",
- "STS12",
- "STS13",
- "STS14",
- "STS15",
- "STS16",
- "STS17",
- "STS22",
- "STSBenchmark",
- "SummEval",
-]
-
-import os
-os.environ["HF_DATASETS_OFFLINE"]="0" # 1 for offline
-os.environ["TRANSFORMERS_OFFLINE"]="0" # 1 for offline
-os.environ["TRANSFORMERS_CACHE"]="/gpfswork/rech/six/commun/models"
-os.environ["HF_DATASETS_CACHE"]="/gpfswork/rech/six/commun/datasets"
-os.environ["HF_MODULES_CACHE"]="/gpfswork/rech/six/commun/modules"
-os.environ["HF_METRICS_CACHE"]="/gpfswork/rech/six/commun/metrics"
-
-from mteb import MTEB
-evaluation = MTEB(tasks=TASK_LIST, task_langs=["en"])
-
-for task in evaluation.tasks:
- task.load_data()
-
- # Alternatively clone to desired place
- #path = "/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/" + task.description["hf_hub_name"]
- #from git import Repo
- #Repo.clone_from("https://huggingface.co/datasets/" + self.description["hf_hub_name"], path)
- #self.dataset = datasets.load_dataset(path, revision=self.description.get("revision", None))
diff --git a/script_mteb_french/src/AbstractEmbeddingFunction.py b/embedders/AbstractEmbeddingFunction.py
similarity index 100%
rename from script_mteb_french/src/AbstractEmbeddingFunction.py
rename to embedders/AbstractEmbeddingFunction.py
diff --git a/script_mteb_french/src/ChromaDBEmbedder.py b/embedders/ChromaDBEmbedder.py
similarity index 100%
rename from script_mteb_french/src/ChromaDBEmbedder.py
rename to embedders/ChromaDBEmbedder.py
diff --git a/script_mteb_french/src/CohereEmbeddingFunction.py b/embedders/CohereEmbeddingFunction.py
similarity index 100%
rename from script_mteb_french/src/CohereEmbeddingFunction.py
rename to embedders/CohereEmbeddingFunction.py
diff --git a/script_mteb_french/src/LaserEmbeddingFunction.py b/embedders/LaserEmbeddingFunction.py
similarity index 96%
rename from script_mteb_french/src/LaserEmbeddingFunction.py
rename to embedders/LaserEmbeddingFunction.py
index 79119dcf..e563dadc 100644
--- a/script_mteb_french/src/LaserEmbeddingFunction.py
+++ b/embedders/LaserEmbeddingFunction.py
@@ -33,7 +33,7 @@ def encode_documents(self, input: Documents) -> Embeddings:
@staticmethod
def _download_laser_models():
- MODELS_DOWNLOAD_FOLDER = "models"
+ MODELS_DOWNLOAD_FOLDER = "downloads"
if not os.path.exists(MODELS_DOWNLOAD_FOLDER):
os.mkdir(MODELS_DOWNLOAD_FOLDER)
diff --git a/script_mteb_french/src/MistralAIEmbeddingFunction.py b/embedders/MistralAIEmbeddingFunction.py
similarity index 100%
rename from script_mteb_french/src/MistralAIEmbeddingFunction.py
rename to embedders/MistralAIEmbeddingFunction.py
diff --git a/script_mteb_french/src/ModelConfig.py b/embedders/ModelConfig.py
similarity index 100%
rename from script_mteb_french/src/ModelConfig.py
rename to embedders/ModelConfig.py
diff --git a/script_mteb_french/src/OpenAIEmbeddingFunction.py b/embedders/OpenAIEmbeddingFunction.py
similarity index 100%
rename from script_mteb_french/src/OpenAIEmbeddingFunction.py
rename to embedders/OpenAIEmbeddingFunction.py
diff --git a/script_mteb_french/src/SentenceTransformerEmbeddingFunction.py b/embedders/SentenceTransformerEmbeddingFunction.py
similarity index 100%
rename from script_mteb_french/src/SentenceTransformerEmbeddingFunction.py
rename to embedders/SentenceTransformerEmbeddingFunction.py
diff --git a/script_mteb_french/src/UniversalSentenceEncoderEmbeddingFunction.py b/embedders/UniversalSentenceEncoderEmbeddingFunction.py
similarity index 100%
rename from script_mteb_french/src/UniversalSentenceEncoderEmbeddingFunction.py
rename to embedders/UniversalSentenceEncoderEmbeddingFunction.py
diff --git a/script_mteb_french/src/VoyageAIEmbeddingFunction.py b/embedders/VoyageAIEmbeddingFunction.py
similarity index 100%
rename from script_mteb_french/src/VoyageAIEmbeddingFunction.py
rename to embedders/VoyageAIEmbeddingFunction.py
diff --git a/script_mteb_french/src/__init__.py b/embedders/__init__.py
similarity index 100%
rename from script_mteb_french/src/__init__.py
rename to embedders/__init__.py
diff --git a/fix_results.py b/fix_results.py
deleted file mode 100644
index bb095429..00000000
--- a/fix_results.py
+++ /dev/null
@@ -1,64 +0,0 @@
-"""
-Iterates over json results for custom fixes
-Usage: python fix_results.py results_folder_path
-"""
-import glob
-import json
-import sys
-import os
-
-from mteb import MTEB
-
-results_folder = sys.argv[1]
-files = glob.glob(f'{results_folder.strip("/")}/*/*.json')
-
-print("Found files: ", files)
-
-for file_name in files:
- with open(file_name, 'r', encoding='utf-8') as f:
- results = json.load(f)
- if "dataset_version" in results:
- results.pop("dataset_version")
- if "mteb_version" not in results:
- results["mteb_version"] = "0.0.2"
- if "mteb_dataset_name" not in results:
- results["mteb_dataset_name"] = file_name.split("/")[-1].replace(".json", "")
- if "dataset_revision" not in results:
- print(file_name)
- mteb_desc = (
- MTEB(tasks=[file_name.split("/")[-1].replace(".json", "").replace("CQADupstackRetrieval", "CQADupstackAndroidRetrieval")])
- .tasks[0]
- .description
- )
- import huggingface_hub
- if "hf_hub_name" in mteb_desc:
- hf_hub_name = mteb_desc.get("hf_hub_name")
- else:
- hf_hub_name = "BeIR/" + mteb_desc.get("beir_name")
- if "cqadupstack" in hf_hub_name:
- hf_hub_name = "BeIR/cqadupstack-qrels"
- results["dataset_revision"] = huggingface_hub.hf_api.dataset_info(hf_hub_name).sha
-
- if "STS22" in file_name:
- for split, split_results in results.items():
- if isinstance(split_results, dict):
- for metric, score in split_results.items():
- if isinstance(score, dict):
- for sub_metric, sub_score in score.items():
- if isinstance(sub_score, dict):
- for sub_sub_metric, sub_sub_score in sub_score.items():
- results[split][metric][sub_metric][sub_sub_metric] = abs(sub_sub_score)
- else:
- results[split][metric][sub_metric] = abs(sub_score)
- else:
- results[split][metric] = abs(score)
- results.setdefault(split, {})
- # Merge MSMARCO dev & test split runs
- elif "MSMARCO." in file_name and os.path.exists(file_name.replace("MSMARCO.", "MSMARCO-test.")):
- with open(file_name.replace("MSMARCO.", "MSMARCO-test."), 'r', encoding='utf-8') as f:
- results_test = json.load(f)
- results["test"] = results_test["test"]
-
- with open(file_name, 'w', encoding='utf-8') as f:
- json.dump(results, f, indent=4)
-
diff --git a/results/Cohere-embed-multilingual-light-v3.0/AlloProfClusteringP2P.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/AlloProfClusteringP2P.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/AlloProfClusteringP2P.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/AlloProfClusteringS2S.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/AlloProfClusteringS2S.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/AlloProfClusteringS2S.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/AlloprofReranking.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/AlloprofReranking.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/AlloprofReranking.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/AlloprofReranking.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/AlloprofRetrieval.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/AlloprofRetrieval.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/AlloprofRetrieval.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/AlloprofRetrieval.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/AmazonReviewsClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/AmazonReviewsClassification.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/AmazonReviewsClassification.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/AmazonReviewsClassification.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/BSARDRetrieval.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/BSARDRetrieval.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/BSARDRetrieval.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/BSARDRetrieval.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/DiaBLaBitextMining.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/DiaBLaBitextMining.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/DiaBLaBitextMining.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/DiaBLaBitextMining.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/FloresBitextMining.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/FloresBitextMining.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/FloresBitextMining.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/FloresBitextMining.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/HALClusteringS2S.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/HALClusteringS2S.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/HALClusteringS2S.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/HALClusteringS2S.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/MLSUMClusteringP2P.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MLSUMClusteringP2P.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MLSUMClusteringP2P.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/MLSUMClusteringS2S.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MLSUMClusteringS2S.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MLSUMClusteringS2S.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/MTOPDomainClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MTOPDomainClassification.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/MTOPDomainClassification.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MTOPDomainClassification.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/MTOPIntentClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MTOPIntentClassification.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/MTOPIntentClassification.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MTOPIntentClassification.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClassification.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClassification.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClusteringP2P.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClusteringS2S.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/MassiveIntentClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MassiveIntentClassification.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/MassiveIntentClassification.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MassiveIntentClassification.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/MassiveScenarioClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MassiveScenarioClassification.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/MassiveScenarioClassification.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MassiveScenarioClassification.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/MintakaRetrieval.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MintakaRetrieval.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/MintakaRetrieval.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MintakaRetrieval.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/OpusparcusPC.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/OpusparcusPC.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/OpusparcusPC.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/OpusparcusPC.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/PawsX.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/PawsX.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/PawsX.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/PawsX.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/SICKFr.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/SICKFr.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/SICKFr.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/SICKFr.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/STS22.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/STS22.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/STS22.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/STS22.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/STSBenchmarkMultilingualSTS.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/SummEvalFr.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/SummEvalFr.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/SummEvalFr.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/SummEvalFr.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/SyntecReranking.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/SyntecReranking.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/SyntecReranking.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/SyntecReranking.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/SyntecRetrieval.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/SyntecRetrieval.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/SyntecRetrieval.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/SyntecRetrieval.json
diff --git a/results/Cohere-embed-multilingual-light-v3.0/XPQARetrieval.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/XPQARetrieval.json
similarity index 100%
rename from results/Cohere-embed-multilingual-light-v3.0/XPQARetrieval.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/XPQARetrieval.json
diff --git a/results/Cohere-embed-multilingual-v3.0/AlloProfClusteringP2P.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/AlloProfClusteringP2P.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/AlloProfClusteringP2P.json
diff --git a/results/Cohere-embed-multilingual-v3.0/AlloProfClusteringS2S.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/AlloProfClusteringS2S.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/AlloProfClusteringS2S.json
diff --git a/results/Cohere-embed-multilingual-v3.0/AlloprofReranking.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/AlloprofReranking.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/AlloprofReranking.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/AlloprofReranking.json
diff --git a/results/Cohere-embed-multilingual-v3.0/AlloprofRetrieval.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/AlloprofRetrieval.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/AlloprofRetrieval.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/AlloprofRetrieval.json
diff --git a/results/Cohere-embed-multilingual-v3.0/AmazonReviewsClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/AmazonReviewsClassification.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/AmazonReviewsClassification.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/AmazonReviewsClassification.json
diff --git a/results/Cohere-embed-multilingual-v3.0/BSARDRetrieval.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/BSARDRetrieval.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/BSARDRetrieval.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/BSARDRetrieval.json
diff --git a/results/Cohere-embed-multilingual-v3.0/DiaBLaBitextMining.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/DiaBLaBitextMining.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/DiaBLaBitextMining.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/DiaBLaBitextMining.json
diff --git a/results/Cohere-embed-multilingual-v3.0/FloresBitextMining.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/FloresBitextMining.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/FloresBitextMining.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/FloresBitextMining.json
diff --git a/results/Cohere-embed-multilingual-v3.0/HALClusteringS2S.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/HALClusteringS2S.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/HALClusteringS2S.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/HALClusteringS2S.json
diff --git a/results/Cohere-embed-multilingual-v3.0/MLSUMClusteringP2P.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MLSUMClusteringP2P.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MLSUMClusteringP2P.json
diff --git a/results/Cohere-embed-multilingual-v3.0/MLSUMClusteringS2S.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MLSUMClusteringS2S.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MLSUMClusteringS2S.json
diff --git a/results/Cohere-embed-multilingual-v3.0/MTOPDomainClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MTOPDomainClassification.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/MTOPDomainClassification.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MTOPDomainClassification.json
diff --git a/results/Cohere-embed-multilingual-v3.0/MTOPIntentClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MTOPIntentClassification.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/MTOPIntentClassification.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MTOPIntentClassification.json
diff --git a/results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClassification.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClassification.json
diff --git a/results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClusteringP2P.json
diff --git a/results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClusteringS2S.json
diff --git a/results/Cohere-embed-multilingual-v3.0/MassiveIntentClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MassiveIntentClassification.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/MassiveIntentClassification.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MassiveIntentClassification.json
diff --git a/results/Cohere-embed-multilingual-v3.0/MassiveScenarioClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MassiveScenarioClassification.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/MassiveScenarioClassification.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MassiveScenarioClassification.json
diff --git a/results/Cohere-embed-multilingual-v3.0/MintakaRetrieval.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MintakaRetrieval.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/MintakaRetrieval.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MintakaRetrieval.json
diff --git a/results/Cohere-embed-multilingual-v3.0/OpusparcusPC.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/OpusparcusPC.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/OpusparcusPC.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/OpusparcusPC.json
diff --git a/results/Cohere-embed-multilingual-v3.0/PawsX.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/PawsX.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/PawsX.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/PawsX.json
diff --git a/results/Cohere-embed-multilingual-v3.0/SICKFr.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/SICKFr.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/SICKFr.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/SICKFr.json
diff --git a/results/Cohere-embed-multilingual-v3.0/STS22.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/STS22.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/STS22.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/STS22.json
diff --git a/results/Cohere-embed-multilingual-v3.0/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/STSBenchmarkMultilingualSTS.json
diff --git a/results/Cohere-embed-multilingual-v3.0/SummEvalFr.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/SummEvalFr.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/SummEvalFr.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/SummEvalFr.json
diff --git a/results/Cohere-embed-multilingual-v3.0/SyntecReranking.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/SyntecReranking.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/SyntecReranking.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/SyntecReranking.json
diff --git a/results/Cohere-embed-multilingual-v3.0/SyntecRetrieval.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/SyntecRetrieval.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/SyntecRetrieval.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/SyntecRetrieval.json
diff --git a/results/Cohere-embed-multilingual-v3.0/XPQARetrieval.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/XPQARetrieval.json
similarity index 100%
rename from results/Cohere-embed-multilingual-v3.0/XPQARetrieval.json
rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/XPQARetrieval.json
diff --git a/results/Geotrend/bert-base-10lang-cased/AlloProfClusteringP2P.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/AlloProfClusteringP2P.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/AlloProfClusteringP2P.json
diff --git a/results/Geotrend/bert-base-10lang-cased/AlloProfClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/AlloProfClusteringS2S.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/AlloProfClusteringS2S.json
diff --git a/results/Geotrend/bert-base-10lang-cased/AlloprofReranking.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/AlloprofReranking.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/AlloprofReranking.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/AlloprofReranking.json
diff --git a/results/Geotrend/bert-base-10lang-cased/AlloprofRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/AlloprofRetrieval.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/AlloprofRetrieval.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/AlloprofRetrieval.json
diff --git a/results/Geotrend/bert-base-10lang-cased/AmazonReviewsClassification.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/AmazonReviewsClassification.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/AmazonReviewsClassification.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/AmazonReviewsClassification.json
diff --git a/results/Geotrend/bert-base-10lang-cased/BSARDRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/BSARDRetrieval.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/BSARDRetrieval.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/BSARDRetrieval.json
diff --git a/results/Geotrend/bert-base-10lang-cased/DiaBLaBitextMining.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/DiaBLaBitextMining.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/DiaBLaBitextMining.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/DiaBLaBitextMining.json
diff --git a/results/Geotrend/bert-base-10lang-cased/FloresBitextMining.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/FloresBitextMining.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/FloresBitextMining.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/FloresBitextMining.json
diff --git a/results/Geotrend/bert-base-10lang-cased/HALClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/HALClusteringS2S.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/HALClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/HALClusteringS2S.json
diff --git a/results/Geotrend/bert-base-10lang-cased/MLSUMClusteringP2P.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MLSUMClusteringP2P.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MLSUMClusteringP2P.json
diff --git a/results/Geotrend/bert-base-10lang-cased/MLSUMClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MLSUMClusteringS2S.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MLSUMClusteringS2S.json
diff --git a/results/Geotrend/bert-base-10lang-cased/MTOPDomainClassification.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MTOPDomainClassification.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/MTOPDomainClassification.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MTOPDomainClassification.json
diff --git a/results/Geotrend/bert-base-10lang-cased/MTOPIntentClassification.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MTOPIntentClassification.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/MTOPIntentClassification.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MTOPIntentClassification.json
diff --git a/results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClassification.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClassification.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClassification.json
diff --git a/results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClusteringP2P.json
diff --git a/results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClusteringS2S.json
diff --git a/results/Geotrend/bert-base-10lang-cased/MassiveIntentClassification.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MassiveIntentClassification.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/MassiveIntentClassification.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MassiveIntentClassification.json
diff --git a/results/Geotrend/bert-base-10lang-cased/MassiveScenarioClassification.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MassiveScenarioClassification.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/MassiveScenarioClassification.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MassiveScenarioClassification.json
diff --git a/results/Geotrend/bert-base-10lang-cased/MintakaRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MintakaRetrieval.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/MintakaRetrieval.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MintakaRetrieval.json
diff --git a/results/Geotrend/bert-base-10lang-cased/OpusparcusPC.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/OpusparcusPC.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/OpusparcusPC.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/OpusparcusPC.json
diff --git a/results/Geotrend/bert-base-10lang-cased/PawsX.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/PawsX.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/PawsX.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/PawsX.json
diff --git a/results/Geotrend/bert-base-10lang-cased/SICKFr.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/SICKFr.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/SICKFr.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/SICKFr.json
diff --git a/results/Geotrend/bert-base-10lang-cased/STS22.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/STS22.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/STS22.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/STS22.json
diff --git a/results/Geotrend/bert-base-10lang-cased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/STSBenchmarkMultilingualSTS.json
diff --git a/results/Geotrend/bert-base-10lang-cased/SummEvalFr.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/SummEvalFr.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/SummEvalFr.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/SummEvalFr.json
diff --git a/results/Geotrend/bert-base-10lang-cased/SyntecReranking.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/SyntecReranking.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/SyntecReranking.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/SyntecReranking.json
diff --git a/results/Geotrend/bert-base-10lang-cased/SyntecRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/SyntecRetrieval.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/SyntecRetrieval.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/SyntecRetrieval.json
diff --git a/results/Geotrend/bert-base-10lang-cased/XPQARetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/XPQARetrieval.json
similarity index 100%
rename from results/Geotrend/bert-base-10lang-cased/XPQARetrieval.json
rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/XPQARetrieval.json
diff --git a/results/Geotrend/bert-base-15lang-cased/AlloProfClusteringP2P.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/AlloProfClusteringP2P.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/AlloProfClusteringP2P.json
diff --git a/results/Geotrend/bert-base-15lang-cased/AlloProfClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/AlloProfClusteringS2S.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/AlloProfClusteringS2S.json
diff --git a/results/Geotrend/bert-base-15lang-cased/AlloprofReranking.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/AlloprofReranking.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/AlloprofReranking.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/AlloprofReranking.json
diff --git a/results/Geotrend/bert-base-15lang-cased/AlloprofRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/AlloprofRetrieval.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/AlloprofRetrieval.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/AlloprofRetrieval.json
diff --git a/results/Geotrend/bert-base-15lang-cased/AmazonReviewsClassification.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/AmazonReviewsClassification.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/AmazonReviewsClassification.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/AmazonReviewsClassification.json
diff --git a/results/Geotrend/bert-base-15lang-cased/BSARDRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/BSARDRetrieval.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/BSARDRetrieval.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/BSARDRetrieval.json
diff --git a/results/Geotrend/bert-base-15lang-cased/DiaBLaBitextMining.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/DiaBLaBitextMining.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/DiaBLaBitextMining.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/DiaBLaBitextMining.json
diff --git a/results/Geotrend/bert-base-15lang-cased/FloresBitextMining.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/FloresBitextMining.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/FloresBitextMining.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/FloresBitextMining.json
diff --git a/results/Geotrend/bert-base-15lang-cased/HALClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/HALClusteringS2S.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/HALClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/HALClusteringS2S.json
diff --git a/results/Geotrend/bert-base-15lang-cased/MLSUMClusteringP2P.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MLSUMClusteringP2P.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MLSUMClusteringP2P.json
diff --git a/results/Geotrend/bert-base-15lang-cased/MLSUMClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MLSUMClusteringS2S.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MLSUMClusteringS2S.json
diff --git a/results/Geotrend/bert-base-15lang-cased/MTOPDomainClassification.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MTOPDomainClassification.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/MTOPDomainClassification.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MTOPDomainClassification.json
diff --git a/results/Geotrend/bert-base-15lang-cased/MTOPIntentClassification.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MTOPIntentClassification.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/MTOPIntentClassification.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MTOPIntentClassification.json
diff --git a/results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClassification.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClassification.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClassification.json
diff --git a/results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClusteringP2P.json
diff --git a/results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClusteringS2S.json
diff --git a/results/Geotrend/bert-base-15lang-cased/MassiveIntentClassification.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MassiveIntentClassification.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/MassiveIntentClassification.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MassiveIntentClassification.json
diff --git a/results/Geotrend/bert-base-15lang-cased/MassiveScenarioClassification.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MassiveScenarioClassification.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/MassiveScenarioClassification.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MassiveScenarioClassification.json
diff --git a/results/Geotrend/bert-base-15lang-cased/MintakaRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MintakaRetrieval.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/MintakaRetrieval.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MintakaRetrieval.json
diff --git a/results/Geotrend/bert-base-15lang-cased/OpusparcusPC.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/OpusparcusPC.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/OpusparcusPC.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/OpusparcusPC.json
diff --git a/results/Geotrend/bert-base-15lang-cased/PawsX.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/PawsX.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/PawsX.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/PawsX.json
diff --git a/results/Geotrend/bert-base-15lang-cased/SICKFr.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/SICKFr.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/SICKFr.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/SICKFr.json
diff --git a/results/Geotrend/bert-base-15lang-cased/STS22.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/STS22.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/STS22.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/STS22.json
diff --git a/results/Geotrend/bert-base-15lang-cased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/STSBenchmarkMultilingualSTS.json
diff --git a/results/Geotrend/bert-base-15lang-cased/SummEvalFr.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/SummEvalFr.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/SummEvalFr.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/SummEvalFr.json
diff --git a/results/Geotrend/bert-base-15lang-cased/SyntecReranking.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/SyntecReranking.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/SyntecReranking.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/SyntecReranking.json
diff --git a/results/Geotrend/bert-base-15lang-cased/SyntecRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/SyntecRetrieval.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/SyntecRetrieval.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/SyntecRetrieval.json
diff --git a/results/Geotrend/bert-base-15lang-cased/XPQARetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/XPQARetrieval.json
similarity index 100%
rename from results/Geotrend/bert-base-15lang-cased/XPQARetrieval.json
rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/XPQARetrieval.json
diff --git a/results/Geotrend/bert-base-25lang-cased/AlloProfClusteringP2P.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/AlloProfClusteringP2P.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/AlloProfClusteringP2P.json
diff --git a/results/Geotrend/bert-base-25lang-cased/AlloProfClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/AlloProfClusteringS2S.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/AlloProfClusteringS2S.json
diff --git a/results/Geotrend/bert-base-25lang-cased/AlloprofReranking.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/AlloprofReranking.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/AlloprofReranking.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/AlloprofReranking.json
diff --git a/results/Geotrend/bert-base-25lang-cased/AlloprofRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/AlloprofRetrieval.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/AlloprofRetrieval.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/AlloprofRetrieval.json
diff --git a/results/Geotrend/bert-base-25lang-cased/AmazonReviewsClassification.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/AmazonReviewsClassification.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/AmazonReviewsClassification.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/AmazonReviewsClassification.json
diff --git a/results/Geotrend/bert-base-25lang-cased/BSARDRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/BSARDRetrieval.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/BSARDRetrieval.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/BSARDRetrieval.json
diff --git a/results/Geotrend/bert-base-25lang-cased/DiaBLaBitextMining.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/DiaBLaBitextMining.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/DiaBLaBitextMining.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/DiaBLaBitextMining.json
diff --git a/results/Geotrend/bert-base-25lang-cased/FloresBitextMining.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/FloresBitextMining.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/FloresBitextMining.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/FloresBitextMining.json
diff --git a/results/Geotrend/bert-base-25lang-cased/HALClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/HALClusteringS2S.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/HALClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/HALClusteringS2S.json
diff --git a/results/Geotrend/bert-base-25lang-cased/MLSUMClusteringP2P.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MLSUMClusteringP2P.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MLSUMClusteringP2P.json
diff --git a/results/Geotrend/bert-base-25lang-cased/MLSUMClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MLSUMClusteringS2S.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MLSUMClusteringS2S.json
diff --git a/results/Geotrend/bert-base-25lang-cased/MTOPDomainClassification.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MTOPDomainClassification.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/MTOPDomainClassification.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MTOPDomainClassification.json
diff --git a/results/Geotrend/bert-base-25lang-cased/MTOPIntentClassification.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MTOPIntentClassification.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/MTOPIntentClassification.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MTOPIntentClassification.json
diff --git a/results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClassification.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClassification.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClassification.json
diff --git a/results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClusteringP2P.json
diff --git a/results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClusteringS2S.json
diff --git a/results/Geotrend/bert-base-25lang-cased/MassiveIntentClassification.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MassiveIntentClassification.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/MassiveIntentClassification.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MassiveIntentClassification.json
diff --git a/results/Geotrend/bert-base-25lang-cased/MassiveScenarioClassification.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MassiveScenarioClassification.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/MassiveScenarioClassification.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MassiveScenarioClassification.json
diff --git a/results/Geotrend/bert-base-25lang-cased/MintakaRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MintakaRetrieval.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/MintakaRetrieval.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MintakaRetrieval.json
diff --git a/results/Geotrend/bert-base-25lang-cased/OpusparcusPC.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/OpusparcusPC.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/OpusparcusPC.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/OpusparcusPC.json
diff --git a/results/Geotrend/bert-base-25lang-cased/PawsX.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/PawsX.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/PawsX.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/PawsX.json
diff --git a/results/Geotrend/bert-base-25lang-cased/SICKFr.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/SICKFr.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/SICKFr.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/SICKFr.json
diff --git a/results/Geotrend/bert-base-25lang-cased/STS22.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/STS22.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/STS22.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/STS22.json
diff --git a/results/Geotrend/bert-base-25lang-cased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/STSBenchmarkMultilingualSTS.json
diff --git a/results/Geotrend/bert-base-25lang-cased/SummEvalFr.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/SummEvalFr.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/SummEvalFr.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/SummEvalFr.json
diff --git a/results/Geotrend/bert-base-25lang-cased/SyntecReranking.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/SyntecReranking.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/SyntecReranking.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/SyntecReranking.json
diff --git a/results/Geotrend/bert-base-25lang-cased/SyntecRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/SyntecRetrieval.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/SyntecRetrieval.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/SyntecRetrieval.json
diff --git a/results/Geotrend/bert-base-25lang-cased/XPQARetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/XPQARetrieval.json
similarity index 100%
rename from results/Geotrend/bert-base-25lang-cased/XPQARetrieval.json
rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/XPQARetrieval.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/AlloProfClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/AlloProfClusteringP2P.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/AlloProfClusteringP2P.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/AlloProfClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/AlloProfClusteringS2S.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/AlloProfClusteringS2S.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/AlloprofReranking.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/AlloprofReranking.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/AlloprofReranking.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/AlloprofReranking.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/AlloprofRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/AlloprofRetrieval.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/AlloprofRetrieval.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/AlloprofRetrieval.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/AmazonReviewsClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/AmazonReviewsClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/AmazonReviewsClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/AmazonReviewsClassification.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/BSARDRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/BSARDRetrieval.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/BSARDRetrieval.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/BSARDRetrieval.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/DiaBLaBitextMining.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/DiaBLaBitextMining.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/DiaBLaBitextMining.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/DiaBLaBitextMining.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/FloresBitextMining.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/FloresBitextMining.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/FloresBitextMining.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/FloresBitextMining.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/HALClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/HALClusteringS2S.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/HALClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/HALClusteringS2S.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/MLSUMClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MLSUMClusteringP2P.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MLSUMClusteringP2P.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/MLSUMClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MLSUMClusteringS2S.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MLSUMClusteringS2S.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/MTOPDomainClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MTOPDomainClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/MTOPDomainClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MTOPDomainClassification.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/MTOPIntentClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MTOPIntentClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/MTOPIntentClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MTOPIntentClassification.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClassification.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClusteringP2P.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClusteringS2S.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/MassiveIntentClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MassiveIntentClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/MassiveIntentClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MassiveIntentClassification.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/MassiveScenarioClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MassiveScenarioClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/MassiveScenarioClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MassiveScenarioClassification.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/MintakaRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MintakaRetrieval.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/MintakaRetrieval.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MintakaRetrieval.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/OpusparcusPC.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/OpusparcusPC.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/OpusparcusPC.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/OpusparcusPC.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/PawsX.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/PawsX.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/PawsX.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/PawsX.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/SICKFr.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/SICKFr.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/SICKFr.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/SICKFr.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/STS22.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/STS22.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/STS22.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/STS22.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/STSBenchmarkMultilingualSTS.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/SummEvalFr.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/SummEvalFr.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/SummEvalFr.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/SummEvalFr.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/SyntecReranking.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/SyntecReranking.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/SyntecReranking.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/SyntecReranking.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/SyntecRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/SyntecRetrieval.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/SyntecRetrieval.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/SyntecRetrieval.json
diff --git a/results/Geotrend/distilbert-base-25lang-cased/XPQARetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/XPQARetrieval.json
similarity index 100%
rename from results/Geotrend/distilbert-base-25lang-cased/XPQARetrieval.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/XPQARetrieval.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/AlloProfClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/AlloProfClusteringP2P.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/AlloProfClusteringP2P.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/AlloProfClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/AlloProfClusteringS2S.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/AlloProfClusteringS2S.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/AlloprofReranking.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/AlloprofReranking.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/AlloprofReranking.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/AlloprofReranking.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/AlloprofRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/AlloprofRetrieval.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/AlloprofRetrieval.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/AlloprofRetrieval.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/AmazonReviewsClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/AmazonReviewsClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/AmazonReviewsClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/AmazonReviewsClassification.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/BSARDRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/BSARDRetrieval.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/BSARDRetrieval.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/BSARDRetrieval.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/DiaBLaBitextMining.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/DiaBLaBitextMining.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/DiaBLaBitextMining.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/DiaBLaBitextMining.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/FloresBitextMining.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/FloresBitextMining.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/FloresBitextMining.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/FloresBitextMining.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/HALClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/HALClusteringS2S.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/HALClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/HALClusteringS2S.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/MLSUMClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MLSUMClusteringP2P.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MLSUMClusteringP2P.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/MLSUMClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MLSUMClusteringS2S.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MLSUMClusteringS2S.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/MTOPDomainClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MTOPDomainClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/MTOPDomainClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MTOPDomainClassification.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/MTOPIntentClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MTOPIntentClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/MTOPIntentClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MTOPIntentClassification.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClassification.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClusteringP2P.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClusteringS2S.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/MassiveIntentClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MassiveIntentClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/MassiveIntentClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MassiveIntentClassification.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/MassiveScenarioClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MassiveScenarioClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/MassiveScenarioClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MassiveScenarioClassification.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/MintakaRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MintakaRetrieval.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/MintakaRetrieval.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MintakaRetrieval.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/OpusparcusPC.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/OpusparcusPC.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/OpusparcusPC.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/OpusparcusPC.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/PawsX.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/PawsX.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/PawsX.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/PawsX.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/SICKFr.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/SICKFr.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/SICKFr.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/SICKFr.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/STS22.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/STS22.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/STS22.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/STS22.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/STSBenchmarkMultilingualSTS.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/SummEvalFr.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/SummEvalFr.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/SummEvalFr.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/SummEvalFr.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/SyntecReranking.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/SyntecReranking.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/SyntecReranking.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/SyntecReranking.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/SyntecRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/SyntecRetrieval.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/SyntecRetrieval.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/SyntecRetrieval.json
diff --git a/results/Geotrend/distilbert-base-en-fr-cased/XPQARetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/XPQARetrieval.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-cased/XPQARetrieval.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/XPQARetrieval.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloProfClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloProfClusteringP2P.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloProfClusteringP2P.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloProfClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloProfClusteringS2S.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloProfClusteringS2S.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloprofReranking.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloprofReranking.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloprofReranking.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloprofReranking.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloprofRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloprofRetrieval.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloprofRetrieval.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloprofRetrieval.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AmazonReviewsClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AmazonReviewsClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AmazonReviewsClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AmazonReviewsClassification.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/BSARDRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/BSARDRetrieval.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/BSARDRetrieval.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/BSARDRetrieval.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/DiaBLaBitextMining.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/DiaBLaBitextMining.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/DiaBLaBitextMining.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/DiaBLaBitextMining.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/FloresBitextMining.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/FloresBitextMining.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/FloresBitextMining.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/FloresBitextMining.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/HALClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/HALClusteringS2S.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/HALClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/HALClusteringS2S.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MLSUMClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MLSUMClusteringP2P.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MLSUMClusteringP2P.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MLSUMClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MLSUMClusteringS2S.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MLSUMClusteringS2S.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MTOPDomainClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MTOPDomainClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MTOPDomainClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MTOPDomainClassification.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MTOPIntentClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MTOPIntentClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MTOPIntentClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MTOPIntentClassification.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClassification.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClusteringP2P.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClusteringS2S.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MassiveIntentClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MassiveIntentClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MassiveIntentClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MassiveIntentClassification.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MassiveScenarioClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MassiveScenarioClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MassiveScenarioClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MassiveScenarioClassification.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MintakaRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MintakaRetrieval.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MintakaRetrieval.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MintakaRetrieval.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/OpusparcusPC.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/OpusparcusPC.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/OpusparcusPC.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/OpusparcusPC.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/PawsX.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/PawsX.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/PawsX.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/PawsX.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SICKFr.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SICKFr.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SICKFr.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SICKFr.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/STS22.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/STS22.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/STS22.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/STS22.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/STSBenchmarkMultilingualSTS.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SummEvalFr.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SummEvalFr.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SummEvalFr.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SummEvalFr.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SyntecReranking.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SyntecReranking.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SyntecReranking.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SyntecReranking.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SyntecRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SyntecRetrieval.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SyntecRetrieval.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SyntecRetrieval.json
diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/XPQARetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/XPQARetrieval.json
similarity index 100%
rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/XPQARetrieval.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/XPQARetrieval.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/AlloProfClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/AlloProfClusteringP2P.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/AlloProfClusteringP2P.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/AlloProfClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/AlloProfClusteringS2S.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/AlloProfClusteringS2S.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/AlloprofReranking.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/AlloprofReranking.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/AlloprofReranking.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/AlloprofReranking.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/AlloprofRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/AlloprofRetrieval.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/AlloprofRetrieval.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/AlloprofRetrieval.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/AmazonReviewsClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/AmazonReviewsClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/AmazonReviewsClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/AmazonReviewsClassification.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/BSARDRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/BSARDRetrieval.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/BSARDRetrieval.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/BSARDRetrieval.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/DiaBLaBitextMining.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/DiaBLaBitextMining.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/DiaBLaBitextMining.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/DiaBLaBitextMining.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/FloresBitextMining.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/FloresBitextMining.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/FloresBitextMining.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/FloresBitextMining.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/HALClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/HALClusteringS2S.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/HALClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/HALClusteringS2S.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/MLSUMClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MLSUMClusteringP2P.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MLSUMClusteringP2P.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/MLSUMClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MLSUMClusteringS2S.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MLSUMClusteringS2S.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/MTOPDomainClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MTOPDomainClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/MTOPDomainClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MTOPDomainClassification.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/MTOPIntentClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MTOPIntentClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/MTOPIntentClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MTOPIntentClassification.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClassification.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClusteringP2P.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClusteringS2S.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/MassiveIntentClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MassiveIntentClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/MassiveIntentClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MassiveIntentClassification.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/MassiveScenarioClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MassiveScenarioClassification.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/MassiveScenarioClassification.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MassiveScenarioClassification.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/MintakaRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MintakaRetrieval.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/MintakaRetrieval.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MintakaRetrieval.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/OpusparcusPC.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/OpusparcusPC.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/OpusparcusPC.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/OpusparcusPC.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/PawsX.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/PawsX.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/PawsX.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/PawsX.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/SICKFr.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/SICKFr.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/SICKFr.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/SICKFr.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/STS22.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/STS22.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/STS22.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/STS22.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/STSBenchmarkMultilingualSTS.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/SummEvalFr.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/SummEvalFr.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/SummEvalFr.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/SummEvalFr.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/SyntecReranking.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/SyntecReranking.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/SyntecReranking.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/SyntecReranking.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/SyntecRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/SyntecRetrieval.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/SyntecRetrieval.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/SyntecRetrieval.json
diff --git a/results/Geotrend/distilbert-base-fr-cased/XPQARetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/XPQARetrieval.json
similarity index 100%
rename from results/Geotrend/distilbert-base-fr-cased/XPQARetrieval.json
rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/XPQARetrieval.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/AlloProfClusteringP2P.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/AlloProfClusteringP2P.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/AlloProfClusteringP2P.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/AlloProfClusteringS2S.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/AlloProfClusteringS2S.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/AlloProfClusteringS2S.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/AlloprofReranking.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/AlloprofReranking.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/AlloprofReranking.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/AlloprofReranking.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/AlloprofRetrieval.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/AlloprofRetrieval.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/AlloprofRetrieval.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/AlloprofRetrieval.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/AmazonReviewsClassification.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/AmazonReviewsClassification.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/AmazonReviewsClassification.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/AmazonReviewsClassification.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/BSARDRetrieval.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/BSARDRetrieval.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/BSARDRetrieval.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/BSARDRetrieval.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/DiaBLaBitextMining.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/DiaBLaBitextMining.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/DiaBLaBitextMining.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/DiaBLaBitextMining.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/FloresBitextMining.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/FloresBitextMining.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/FloresBitextMining.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/FloresBitextMining.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/HALClusteringS2S.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/HALClusteringS2S.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/HALClusteringS2S.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/HALClusteringS2S.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/MLSUMClusteringP2P.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MLSUMClusteringP2P.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MLSUMClusteringP2P.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/MLSUMClusteringS2S.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MLSUMClusteringS2S.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MLSUMClusteringS2S.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/MTOPDomainClassification.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MTOPDomainClassification.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/MTOPDomainClassification.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MTOPDomainClassification.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/MTOPIntentClassification.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MTOPIntentClassification.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/MTOPIntentClassification.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MTOPIntentClassification.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClassification.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClassification.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClassification.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClusteringP2P.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClusteringS2S.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/MassiveIntentClassification.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MassiveIntentClassification.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/MassiveIntentClassification.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MassiveIntentClassification.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/MassiveScenarioClassification.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MassiveScenarioClassification.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/MassiveScenarioClassification.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MassiveScenarioClassification.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/MintakaRetrieval.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MintakaRetrieval.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/MintakaRetrieval.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MintakaRetrieval.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/OpusparcusPC.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/OpusparcusPC.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/OpusparcusPC.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/OpusparcusPC.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/PawsX.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/PawsX.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/PawsX.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/PawsX.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/SICKFr.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/SICKFr.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/SICKFr.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/SICKFr.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/STS22.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/STS22.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/STS22.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/STS22.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/STSBenchmarkMultilingualSTS.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/SummEvalFr.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/SummEvalFr.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/SummEvalFr.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/SummEvalFr.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/SyntecReranking.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/SyntecReranking.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/SyntecReranking.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/SyntecReranking.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/SyntecRetrieval.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/SyntecRetrieval.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/SyntecRetrieval.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/SyntecRetrieval.json
diff --git a/results/Wissam42/sentence-croissant-llm-base/XPQARetrieval.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/XPQARetrieval.json
similarity index 100%
rename from results/Wissam42/sentence-croissant-llm-base/XPQARetrieval.json
rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/XPQARetrieval.json
diff --git a/results/bert-base-multilingual-cased/AlloProfClusteringP2P.json b/outputs/benchmark_results/bert-base-multilingual-cased/AlloProfClusteringP2P.json
similarity index 100%
rename from results/bert-base-multilingual-cased/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/AlloProfClusteringP2P.json
diff --git a/results/bert-base-multilingual-cased/AlloProfClusteringS2S.json b/outputs/benchmark_results/bert-base-multilingual-cased/AlloProfClusteringS2S.json
similarity index 100%
rename from results/bert-base-multilingual-cased/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/AlloProfClusteringS2S.json
diff --git a/results/bert-base-multilingual-cased/AlloprofReranking.json b/outputs/benchmark_results/bert-base-multilingual-cased/AlloprofReranking.json
similarity index 100%
rename from results/bert-base-multilingual-cased/AlloprofReranking.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/AlloprofReranking.json
diff --git a/results/bert-base-multilingual-cased/AlloprofRetrieval.json b/outputs/benchmark_results/bert-base-multilingual-cased/AlloprofRetrieval.json
similarity index 100%
rename from results/bert-base-multilingual-cased/AlloprofRetrieval.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/AlloprofRetrieval.json
diff --git a/results/bert-base-multilingual-cased/AmazonReviewsClassification.json b/outputs/benchmark_results/bert-base-multilingual-cased/AmazonReviewsClassification.json
similarity index 100%
rename from results/bert-base-multilingual-cased/AmazonReviewsClassification.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/AmazonReviewsClassification.json
diff --git a/results/bert-base-multilingual-cased/BSARDRetrieval.json b/outputs/benchmark_results/bert-base-multilingual-cased/BSARDRetrieval.json
similarity index 100%
rename from results/bert-base-multilingual-cased/BSARDRetrieval.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/BSARDRetrieval.json
diff --git a/results/bert-base-multilingual-cased/DiaBLaBitextMining.json b/outputs/benchmark_results/bert-base-multilingual-cased/DiaBLaBitextMining.json
similarity index 100%
rename from results/bert-base-multilingual-cased/DiaBLaBitextMining.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/DiaBLaBitextMining.json
diff --git a/results/bert-base-multilingual-cased/FloresBitextMining.json b/outputs/benchmark_results/bert-base-multilingual-cased/FloresBitextMining.json
similarity index 100%
rename from results/bert-base-multilingual-cased/FloresBitextMining.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/FloresBitextMining.json
diff --git a/results/bert-base-multilingual-cased/HALClusteringS2S.json b/outputs/benchmark_results/bert-base-multilingual-cased/HALClusteringS2S.json
similarity index 100%
rename from results/bert-base-multilingual-cased/HALClusteringS2S.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/HALClusteringS2S.json
diff --git a/results/bert-base-multilingual-cased/MLSUMClusteringP2P.json b/outputs/benchmark_results/bert-base-multilingual-cased/MLSUMClusteringP2P.json
similarity index 100%
rename from results/bert-base-multilingual-cased/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/MLSUMClusteringP2P.json
diff --git a/results/bert-base-multilingual-cased/MLSUMClusteringS2S.json b/outputs/benchmark_results/bert-base-multilingual-cased/MLSUMClusteringS2S.json
similarity index 100%
rename from results/bert-base-multilingual-cased/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/MLSUMClusteringS2S.json
diff --git a/results/bert-base-multilingual-cased/MTOPDomainClassification.json b/outputs/benchmark_results/bert-base-multilingual-cased/MTOPDomainClassification.json
similarity index 100%
rename from results/bert-base-multilingual-cased/MTOPDomainClassification.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/MTOPDomainClassification.json
diff --git a/results/bert-base-multilingual-cased/MTOPIntentClassification.json b/outputs/benchmark_results/bert-base-multilingual-cased/MTOPIntentClassification.json
similarity index 100%
rename from results/bert-base-multilingual-cased/MTOPIntentClassification.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/MTOPIntentClassification.json
diff --git a/results/bert-base-multilingual-cased/MasakhaNEWSClassification.json b/outputs/benchmark_results/bert-base-multilingual-cased/MasakhaNEWSClassification.json
similarity index 100%
rename from results/bert-base-multilingual-cased/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/MasakhaNEWSClassification.json
diff --git a/results/bert-base-multilingual-cased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/bert-base-multilingual-cased/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/bert-base-multilingual-cased/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/MasakhaNEWSClusteringP2P.json
diff --git a/results/bert-base-multilingual-cased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/bert-base-multilingual-cased/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/bert-base-multilingual-cased/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/MasakhaNEWSClusteringS2S.json
diff --git a/results/bert-base-multilingual-cased/MassiveIntentClassification.json b/outputs/benchmark_results/bert-base-multilingual-cased/MassiveIntentClassification.json
similarity index 100%
rename from results/bert-base-multilingual-cased/MassiveIntentClassification.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/MassiveIntentClassification.json
diff --git a/results/bert-base-multilingual-cased/MassiveScenarioClassification.json b/outputs/benchmark_results/bert-base-multilingual-cased/MassiveScenarioClassification.json
similarity index 100%
rename from results/bert-base-multilingual-cased/MassiveScenarioClassification.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/MassiveScenarioClassification.json
diff --git a/results/bert-base-multilingual-cased/MintakaRetrieval.json b/outputs/benchmark_results/bert-base-multilingual-cased/MintakaRetrieval.json
similarity index 100%
rename from results/bert-base-multilingual-cased/MintakaRetrieval.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/MintakaRetrieval.json
diff --git a/results/bert-base-multilingual-cased/OpusparcusPC.json b/outputs/benchmark_results/bert-base-multilingual-cased/OpusparcusPC.json
similarity index 100%
rename from results/bert-base-multilingual-cased/OpusparcusPC.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/OpusparcusPC.json
diff --git a/results/bert-base-multilingual-cased/PawsX.json b/outputs/benchmark_results/bert-base-multilingual-cased/PawsX.json
similarity index 100%
rename from results/bert-base-multilingual-cased/PawsX.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/PawsX.json
diff --git a/results/bert-base-multilingual-cased/SICKFr.json b/outputs/benchmark_results/bert-base-multilingual-cased/SICKFr.json
similarity index 100%
rename from results/bert-base-multilingual-cased/SICKFr.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/SICKFr.json
diff --git a/results/bert-base-multilingual-cased/STS22.json b/outputs/benchmark_results/bert-base-multilingual-cased/STS22.json
similarity index 100%
rename from results/bert-base-multilingual-cased/STS22.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/STS22.json
diff --git a/results/bert-base-multilingual-cased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/bert-base-multilingual-cased/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/bert-base-multilingual-cased/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/STSBenchmarkMultilingualSTS.json
diff --git a/results/bert-base-multilingual-cased/SummEvalFr.json b/outputs/benchmark_results/bert-base-multilingual-cased/SummEvalFr.json
similarity index 100%
rename from results/bert-base-multilingual-cased/SummEvalFr.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/SummEvalFr.json
diff --git a/results/bert-base-multilingual-cased/SyntecReranking.json b/outputs/benchmark_results/bert-base-multilingual-cased/SyntecReranking.json
similarity index 100%
rename from results/bert-base-multilingual-cased/SyntecReranking.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/SyntecReranking.json
diff --git a/results/bert-base-multilingual-cased/SyntecRetrieval.json b/outputs/benchmark_results/bert-base-multilingual-cased/SyntecRetrieval.json
similarity index 100%
rename from results/bert-base-multilingual-cased/SyntecRetrieval.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/SyntecRetrieval.json
diff --git a/results/bert-base-multilingual-cased/XPQARetrieval.json b/outputs/benchmark_results/bert-base-multilingual-cased/XPQARetrieval.json
similarity index 100%
rename from results/bert-base-multilingual-cased/XPQARetrieval.json
rename to outputs/benchmark_results/bert-base-multilingual-cased/XPQARetrieval.json
diff --git a/results/bert-base-multilingual-uncased/AlloProfClusteringP2P.json b/outputs/benchmark_results/bert-base-multilingual-uncased/AlloProfClusteringP2P.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/AlloProfClusteringP2P.json
diff --git a/results/bert-base-multilingual-uncased/AlloProfClusteringS2S.json b/outputs/benchmark_results/bert-base-multilingual-uncased/AlloProfClusteringS2S.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/AlloProfClusteringS2S.json
diff --git a/results/bert-base-multilingual-uncased/AlloprofReranking.json b/outputs/benchmark_results/bert-base-multilingual-uncased/AlloprofReranking.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/AlloprofReranking.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/AlloprofReranking.json
diff --git a/results/bert-base-multilingual-uncased/AlloprofRetrieval.json b/outputs/benchmark_results/bert-base-multilingual-uncased/AlloprofRetrieval.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/AlloprofRetrieval.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/AlloprofRetrieval.json
diff --git a/results/bert-base-multilingual-uncased/AmazonReviewsClassification.json b/outputs/benchmark_results/bert-base-multilingual-uncased/AmazonReviewsClassification.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/AmazonReviewsClassification.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/AmazonReviewsClassification.json
diff --git a/results/bert-base-multilingual-uncased/BSARDRetrieval.json b/outputs/benchmark_results/bert-base-multilingual-uncased/BSARDRetrieval.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/BSARDRetrieval.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/BSARDRetrieval.json
diff --git a/results/bert-base-multilingual-uncased/DiaBLaBitextMining.json b/outputs/benchmark_results/bert-base-multilingual-uncased/DiaBLaBitextMining.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/DiaBLaBitextMining.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/DiaBLaBitextMining.json
diff --git a/results/bert-base-multilingual-uncased/FloresBitextMining.json b/outputs/benchmark_results/bert-base-multilingual-uncased/FloresBitextMining.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/FloresBitextMining.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/FloresBitextMining.json
diff --git a/results/bert-base-multilingual-uncased/HALClusteringS2S.json b/outputs/benchmark_results/bert-base-multilingual-uncased/HALClusteringS2S.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/HALClusteringS2S.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/HALClusteringS2S.json
diff --git a/results/bert-base-multilingual-uncased/MLSUMClusteringP2P.json b/outputs/benchmark_results/bert-base-multilingual-uncased/MLSUMClusteringP2P.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/MLSUMClusteringP2P.json
diff --git a/results/bert-base-multilingual-uncased/MLSUMClusteringS2S.json b/outputs/benchmark_results/bert-base-multilingual-uncased/MLSUMClusteringS2S.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/MLSUMClusteringS2S.json
diff --git a/results/bert-base-multilingual-uncased/MTOPDomainClassification.json b/outputs/benchmark_results/bert-base-multilingual-uncased/MTOPDomainClassification.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/MTOPDomainClassification.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/MTOPDomainClassification.json
diff --git a/results/bert-base-multilingual-uncased/MTOPIntentClassification.json b/outputs/benchmark_results/bert-base-multilingual-uncased/MTOPIntentClassification.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/MTOPIntentClassification.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/MTOPIntentClassification.json
diff --git a/results/bert-base-multilingual-uncased/MasakhaNEWSClassification.json b/outputs/benchmark_results/bert-base-multilingual-uncased/MasakhaNEWSClassification.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/MasakhaNEWSClassification.json
diff --git a/results/bert-base-multilingual-uncased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/bert-base-multilingual-uncased/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/MasakhaNEWSClusteringP2P.json
diff --git a/results/bert-base-multilingual-uncased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/bert-base-multilingual-uncased/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/MasakhaNEWSClusteringS2S.json
diff --git a/results/bert-base-multilingual-uncased/MassiveIntentClassification.json b/outputs/benchmark_results/bert-base-multilingual-uncased/MassiveIntentClassification.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/MassiveIntentClassification.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/MassiveIntentClassification.json
diff --git a/results/bert-base-multilingual-uncased/MassiveScenarioClassification.json b/outputs/benchmark_results/bert-base-multilingual-uncased/MassiveScenarioClassification.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/MassiveScenarioClassification.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/MassiveScenarioClassification.json
diff --git a/results/bert-base-multilingual-uncased/MintakaRetrieval.json b/outputs/benchmark_results/bert-base-multilingual-uncased/MintakaRetrieval.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/MintakaRetrieval.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/MintakaRetrieval.json
diff --git a/results/bert-base-multilingual-uncased/OpusparcusPC.json b/outputs/benchmark_results/bert-base-multilingual-uncased/OpusparcusPC.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/OpusparcusPC.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/OpusparcusPC.json
diff --git a/results/bert-base-multilingual-uncased/PawsX.json b/outputs/benchmark_results/bert-base-multilingual-uncased/PawsX.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/PawsX.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/PawsX.json
diff --git a/results/bert-base-multilingual-uncased/SICKFr.json b/outputs/benchmark_results/bert-base-multilingual-uncased/SICKFr.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/SICKFr.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/SICKFr.json
diff --git a/results/bert-base-multilingual-uncased/STS22.json b/outputs/benchmark_results/bert-base-multilingual-uncased/STS22.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/STS22.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/STS22.json
diff --git a/results/bert-base-multilingual-uncased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/bert-base-multilingual-uncased/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/STSBenchmarkMultilingualSTS.json
diff --git a/results/bert-base-multilingual-uncased/SummEvalFr.json b/outputs/benchmark_results/bert-base-multilingual-uncased/SummEvalFr.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/SummEvalFr.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/SummEvalFr.json
diff --git a/results/bert-base-multilingual-uncased/SyntecReranking.json b/outputs/benchmark_results/bert-base-multilingual-uncased/SyntecReranking.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/SyntecReranking.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/SyntecReranking.json
diff --git a/results/bert-base-multilingual-uncased/SyntecRetrieval.json b/outputs/benchmark_results/bert-base-multilingual-uncased/SyntecRetrieval.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/SyntecRetrieval.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/SyntecRetrieval.json
diff --git a/results/bert-base-multilingual-uncased/XPQARetrieval.json b/outputs/benchmark_results/bert-base-multilingual-uncased/XPQARetrieval.json
similarity index 100%
rename from results/bert-base-multilingual-uncased/XPQARetrieval.json
rename to outputs/benchmark_results/bert-base-multilingual-uncased/XPQARetrieval.json
diff --git a/results/camembert/camembert-base/AlloProfClusteringP2P.json b/outputs/benchmark_results/camembert/camembert-base/AlloProfClusteringP2P.json
similarity index 100%
rename from results/camembert/camembert-base/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/camembert/camembert-base/AlloProfClusteringP2P.json
diff --git a/results/camembert/camembert-base/AlloProfClusteringS2S.json b/outputs/benchmark_results/camembert/camembert-base/AlloProfClusteringS2S.json
similarity index 100%
rename from results/camembert/camembert-base/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/camembert/camembert-base/AlloProfClusteringS2S.json
diff --git a/results/camembert/camembert-base/AlloprofReranking.json b/outputs/benchmark_results/camembert/camembert-base/AlloprofReranking.json
similarity index 100%
rename from results/camembert/camembert-base/AlloprofReranking.json
rename to outputs/benchmark_results/camembert/camembert-base/AlloprofReranking.json
diff --git a/results/camembert/camembert-base/AlloprofRetrieval.json b/outputs/benchmark_results/camembert/camembert-base/AlloprofRetrieval.json
similarity index 100%
rename from results/camembert/camembert-base/AlloprofRetrieval.json
rename to outputs/benchmark_results/camembert/camembert-base/AlloprofRetrieval.json
diff --git a/results/camembert/camembert-base/AmazonReviewsClassification.json b/outputs/benchmark_results/camembert/camembert-base/AmazonReviewsClassification.json
similarity index 100%
rename from results/camembert/camembert-base/AmazonReviewsClassification.json
rename to outputs/benchmark_results/camembert/camembert-base/AmazonReviewsClassification.json
diff --git a/results/camembert/camembert-base/BSARDRetrieval.json b/outputs/benchmark_results/camembert/camembert-base/BSARDRetrieval.json
similarity index 100%
rename from results/camembert/camembert-base/BSARDRetrieval.json
rename to outputs/benchmark_results/camembert/camembert-base/BSARDRetrieval.json
diff --git a/results/camembert/camembert-base/DiaBLaBitextMining.json b/outputs/benchmark_results/camembert/camembert-base/DiaBLaBitextMining.json
similarity index 100%
rename from results/camembert/camembert-base/DiaBLaBitextMining.json
rename to outputs/benchmark_results/camembert/camembert-base/DiaBLaBitextMining.json
diff --git a/results/camembert/camembert-base/FloresBitextMining.json b/outputs/benchmark_results/camembert/camembert-base/FloresBitextMining.json
similarity index 100%
rename from results/camembert/camembert-base/FloresBitextMining.json
rename to outputs/benchmark_results/camembert/camembert-base/FloresBitextMining.json
diff --git a/results/camembert/camembert-base/HALClusteringS2S.json b/outputs/benchmark_results/camembert/camembert-base/HALClusteringS2S.json
similarity index 100%
rename from results/camembert/camembert-base/HALClusteringS2S.json
rename to outputs/benchmark_results/camembert/camembert-base/HALClusteringS2S.json
diff --git a/results/camembert/camembert-base/MLSUMClusteringP2P.json b/outputs/benchmark_results/camembert/camembert-base/MLSUMClusteringP2P.json
similarity index 100%
rename from results/camembert/camembert-base/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/camembert/camembert-base/MLSUMClusteringP2P.json
diff --git a/results/camembert/camembert-base/MLSUMClusteringS2S.json b/outputs/benchmark_results/camembert/camembert-base/MLSUMClusteringS2S.json
similarity index 100%
rename from results/camembert/camembert-base/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/camembert/camembert-base/MLSUMClusteringS2S.json
diff --git a/results/camembert/camembert-base/MTOPDomainClassification.json b/outputs/benchmark_results/camembert/camembert-base/MTOPDomainClassification.json
similarity index 100%
rename from results/camembert/camembert-base/MTOPDomainClassification.json
rename to outputs/benchmark_results/camembert/camembert-base/MTOPDomainClassification.json
diff --git a/results/camembert/camembert-base/MTOPIntentClassification.json b/outputs/benchmark_results/camembert/camembert-base/MTOPIntentClassification.json
similarity index 100%
rename from results/camembert/camembert-base/MTOPIntentClassification.json
rename to outputs/benchmark_results/camembert/camembert-base/MTOPIntentClassification.json
diff --git a/results/camembert/camembert-base/MasakhaNEWSClassification.json b/outputs/benchmark_results/camembert/camembert-base/MasakhaNEWSClassification.json
similarity index 100%
rename from results/camembert/camembert-base/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/camembert/camembert-base/MasakhaNEWSClassification.json
diff --git a/results/camembert/camembert-base/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/camembert/camembert-base/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/camembert/camembert-base/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/camembert/camembert-base/MasakhaNEWSClusteringP2P.json
diff --git a/results/camembert/camembert-base/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/camembert/camembert-base/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/camembert/camembert-base/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/camembert/camembert-base/MasakhaNEWSClusteringS2S.json
diff --git a/results/camembert/camembert-base/MassiveIntentClassification.json b/outputs/benchmark_results/camembert/camembert-base/MassiveIntentClassification.json
similarity index 100%
rename from results/camembert/camembert-base/MassiveIntentClassification.json
rename to outputs/benchmark_results/camembert/camembert-base/MassiveIntentClassification.json
diff --git a/results/camembert/camembert-base/MassiveScenarioClassification.json b/outputs/benchmark_results/camembert/camembert-base/MassiveScenarioClassification.json
similarity index 100%
rename from results/camembert/camembert-base/MassiveScenarioClassification.json
rename to outputs/benchmark_results/camembert/camembert-base/MassiveScenarioClassification.json
diff --git a/results/camembert/camembert-base/MintakaRetrieval.json b/outputs/benchmark_results/camembert/camembert-base/MintakaRetrieval.json
similarity index 100%
rename from results/camembert/camembert-base/MintakaRetrieval.json
rename to outputs/benchmark_results/camembert/camembert-base/MintakaRetrieval.json
diff --git a/results/camembert/camembert-base/OpusparcusPC.json b/outputs/benchmark_results/camembert/camembert-base/OpusparcusPC.json
similarity index 100%
rename from results/camembert/camembert-base/OpusparcusPC.json
rename to outputs/benchmark_results/camembert/camembert-base/OpusparcusPC.json
diff --git a/results/camembert/camembert-base/PawsX.json b/outputs/benchmark_results/camembert/camembert-base/PawsX.json
similarity index 100%
rename from results/camembert/camembert-base/PawsX.json
rename to outputs/benchmark_results/camembert/camembert-base/PawsX.json
diff --git a/results/camembert/camembert-base/SICKFr.json b/outputs/benchmark_results/camembert/camembert-base/SICKFr.json
similarity index 100%
rename from results/camembert/camembert-base/SICKFr.json
rename to outputs/benchmark_results/camembert/camembert-base/SICKFr.json
diff --git a/results/camembert/camembert-base/STS22.json b/outputs/benchmark_results/camembert/camembert-base/STS22.json
similarity index 100%
rename from results/camembert/camembert-base/STS22.json
rename to outputs/benchmark_results/camembert/camembert-base/STS22.json
diff --git a/results/camembert/camembert-base/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/camembert/camembert-base/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/camembert/camembert-base/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/camembert/camembert-base/STSBenchmarkMultilingualSTS.json
diff --git a/results/camembert/camembert-base/SummEvalFr.json b/outputs/benchmark_results/camembert/camembert-base/SummEvalFr.json
similarity index 100%
rename from results/camembert/camembert-base/SummEvalFr.json
rename to outputs/benchmark_results/camembert/camembert-base/SummEvalFr.json
diff --git a/results/camembert/camembert-base/SyntecReranking.json b/outputs/benchmark_results/camembert/camembert-base/SyntecReranking.json
similarity index 100%
rename from results/camembert/camembert-base/SyntecReranking.json
rename to outputs/benchmark_results/camembert/camembert-base/SyntecReranking.json
diff --git a/results/camembert/camembert-base/SyntecRetrieval.json b/outputs/benchmark_results/camembert/camembert-base/SyntecRetrieval.json
similarity index 100%
rename from results/camembert/camembert-base/SyntecRetrieval.json
rename to outputs/benchmark_results/camembert/camembert-base/SyntecRetrieval.json
diff --git a/results/camembert/camembert-base/XPQARetrieval.json b/outputs/benchmark_results/camembert/camembert-base/XPQARetrieval.json
similarity index 100%
rename from results/camembert/camembert-base/XPQARetrieval.json
rename to outputs/benchmark_results/camembert/camembert-base/XPQARetrieval.json
diff --git a/results/camembert/camembert-large/AlloProfClusteringP2P.json b/outputs/benchmark_results/camembert/camembert-large/AlloProfClusteringP2P.json
similarity index 100%
rename from results/camembert/camembert-large/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/camembert/camembert-large/AlloProfClusteringP2P.json
diff --git a/results/camembert/camembert-large/AlloProfClusteringS2S.json b/outputs/benchmark_results/camembert/camembert-large/AlloProfClusteringS2S.json
similarity index 100%
rename from results/camembert/camembert-large/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/camembert/camembert-large/AlloProfClusteringS2S.json
diff --git a/results/camembert/camembert-large/AlloprofReranking.json b/outputs/benchmark_results/camembert/camembert-large/AlloprofReranking.json
similarity index 100%
rename from results/camembert/camembert-large/AlloprofReranking.json
rename to outputs/benchmark_results/camembert/camembert-large/AlloprofReranking.json
diff --git a/results/camembert/camembert-large/AlloprofRetrieval.json b/outputs/benchmark_results/camembert/camembert-large/AlloprofRetrieval.json
similarity index 100%
rename from results/camembert/camembert-large/AlloprofRetrieval.json
rename to outputs/benchmark_results/camembert/camembert-large/AlloprofRetrieval.json
diff --git a/results/camembert/camembert-large/AmazonReviewsClassification.json b/outputs/benchmark_results/camembert/camembert-large/AmazonReviewsClassification.json
similarity index 100%
rename from results/camembert/camembert-large/AmazonReviewsClassification.json
rename to outputs/benchmark_results/camembert/camembert-large/AmazonReviewsClassification.json
diff --git a/results/camembert/camembert-large/BSARDRetrieval.json b/outputs/benchmark_results/camembert/camembert-large/BSARDRetrieval.json
similarity index 100%
rename from results/camembert/camembert-large/BSARDRetrieval.json
rename to outputs/benchmark_results/camembert/camembert-large/BSARDRetrieval.json
diff --git a/results/camembert/camembert-large/DiaBLaBitextMining.json b/outputs/benchmark_results/camembert/camembert-large/DiaBLaBitextMining.json
similarity index 100%
rename from results/camembert/camembert-large/DiaBLaBitextMining.json
rename to outputs/benchmark_results/camembert/camembert-large/DiaBLaBitextMining.json
diff --git a/results/camembert/camembert-large/FloresBitextMining.json b/outputs/benchmark_results/camembert/camembert-large/FloresBitextMining.json
similarity index 100%
rename from results/camembert/camembert-large/FloresBitextMining.json
rename to outputs/benchmark_results/camembert/camembert-large/FloresBitextMining.json
diff --git a/results/camembert/camembert-large/HALClusteringS2S.json b/outputs/benchmark_results/camembert/camembert-large/HALClusteringS2S.json
similarity index 100%
rename from results/camembert/camembert-large/HALClusteringS2S.json
rename to outputs/benchmark_results/camembert/camembert-large/HALClusteringS2S.json
diff --git a/results/camembert/camembert-large/MLSUMClusteringP2P.json b/outputs/benchmark_results/camembert/camembert-large/MLSUMClusteringP2P.json
similarity index 100%
rename from results/camembert/camembert-large/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/camembert/camembert-large/MLSUMClusteringP2P.json
diff --git a/results/camembert/camembert-large/MLSUMClusteringS2S.json b/outputs/benchmark_results/camembert/camembert-large/MLSUMClusteringS2S.json
similarity index 100%
rename from results/camembert/camembert-large/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/camembert/camembert-large/MLSUMClusteringS2S.json
diff --git a/results/camembert/camembert-large/MTOPDomainClassification.json b/outputs/benchmark_results/camembert/camembert-large/MTOPDomainClassification.json
similarity index 100%
rename from results/camembert/camembert-large/MTOPDomainClassification.json
rename to outputs/benchmark_results/camembert/camembert-large/MTOPDomainClassification.json
diff --git a/results/camembert/camembert-large/MTOPIntentClassification.json b/outputs/benchmark_results/camembert/camembert-large/MTOPIntentClassification.json
similarity index 100%
rename from results/camembert/camembert-large/MTOPIntentClassification.json
rename to outputs/benchmark_results/camembert/camembert-large/MTOPIntentClassification.json
diff --git a/results/camembert/camembert-large/MasakhaNEWSClassification.json b/outputs/benchmark_results/camembert/camembert-large/MasakhaNEWSClassification.json
similarity index 100%
rename from results/camembert/camembert-large/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/camembert/camembert-large/MasakhaNEWSClassification.json
diff --git a/results/camembert/camembert-large/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/camembert/camembert-large/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/camembert/camembert-large/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/camembert/camembert-large/MasakhaNEWSClusteringP2P.json
diff --git a/results/camembert/camembert-large/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/camembert/camembert-large/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/camembert/camembert-large/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/camembert/camembert-large/MasakhaNEWSClusteringS2S.json
diff --git a/results/camembert/camembert-large/MassiveIntentClassification.json b/outputs/benchmark_results/camembert/camembert-large/MassiveIntentClassification.json
similarity index 100%
rename from results/camembert/camembert-large/MassiveIntentClassification.json
rename to outputs/benchmark_results/camembert/camembert-large/MassiveIntentClassification.json
diff --git a/results/camembert/camembert-large/MassiveScenarioClassification.json b/outputs/benchmark_results/camembert/camembert-large/MassiveScenarioClassification.json
similarity index 100%
rename from results/camembert/camembert-large/MassiveScenarioClassification.json
rename to outputs/benchmark_results/camembert/camembert-large/MassiveScenarioClassification.json
diff --git a/results/camembert/camembert-large/MintakaRetrieval.json b/outputs/benchmark_results/camembert/camembert-large/MintakaRetrieval.json
similarity index 100%
rename from results/camembert/camembert-large/MintakaRetrieval.json
rename to outputs/benchmark_results/camembert/camembert-large/MintakaRetrieval.json
diff --git a/results/camembert/camembert-large/OpusparcusPC.json b/outputs/benchmark_results/camembert/camembert-large/OpusparcusPC.json
similarity index 100%
rename from results/camembert/camembert-large/OpusparcusPC.json
rename to outputs/benchmark_results/camembert/camembert-large/OpusparcusPC.json
diff --git a/results/camembert/camembert-large/PawsX.json b/outputs/benchmark_results/camembert/camembert-large/PawsX.json
similarity index 100%
rename from results/camembert/camembert-large/PawsX.json
rename to outputs/benchmark_results/camembert/camembert-large/PawsX.json
diff --git a/results/camembert/camembert-large/SICKFr.json b/outputs/benchmark_results/camembert/camembert-large/SICKFr.json
similarity index 100%
rename from results/camembert/camembert-large/SICKFr.json
rename to outputs/benchmark_results/camembert/camembert-large/SICKFr.json
diff --git a/results/camembert/camembert-large/STS22.json b/outputs/benchmark_results/camembert/camembert-large/STS22.json
similarity index 100%
rename from results/camembert/camembert-large/STS22.json
rename to outputs/benchmark_results/camembert/camembert-large/STS22.json
diff --git a/results/camembert/camembert-large/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/camembert/camembert-large/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/camembert/camembert-large/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/camembert/camembert-large/STSBenchmarkMultilingualSTS.json
diff --git a/results/camembert/camembert-large/SummEvalFr.json b/outputs/benchmark_results/camembert/camembert-large/SummEvalFr.json
similarity index 100%
rename from results/camembert/camembert-large/SummEvalFr.json
rename to outputs/benchmark_results/camembert/camembert-large/SummEvalFr.json
diff --git a/results/camembert/camembert-large/SyntecReranking.json b/outputs/benchmark_results/camembert/camembert-large/SyntecReranking.json
similarity index 100%
rename from results/camembert/camembert-large/SyntecReranking.json
rename to outputs/benchmark_results/camembert/camembert-large/SyntecReranking.json
diff --git a/results/camembert/camembert-large/SyntecRetrieval.json b/outputs/benchmark_results/camembert/camembert-large/SyntecRetrieval.json
similarity index 100%
rename from results/camembert/camembert-large/SyntecRetrieval.json
rename to outputs/benchmark_results/camembert/camembert-large/SyntecRetrieval.json
diff --git a/results/camembert/camembert-large/XPQARetrieval.json b/outputs/benchmark_results/camembert/camembert-large/XPQARetrieval.json
similarity index 100%
rename from results/camembert/camembert-large/XPQARetrieval.json
rename to outputs/benchmark_results/camembert/camembert-large/XPQARetrieval.json
diff --git a/results/dangvantuan/sentence-camembert-base/AlloProfClusteringP2P.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/AlloProfClusteringP2P.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/AlloProfClusteringP2P.json
diff --git a/results/dangvantuan/sentence-camembert-base/AlloProfClusteringS2S.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/AlloProfClusteringS2S.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/AlloProfClusteringS2S.json
diff --git a/results/dangvantuan/sentence-camembert-base/AlloprofReranking.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/AlloprofReranking.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/AlloprofReranking.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/AlloprofReranking.json
diff --git a/results/dangvantuan/sentence-camembert-base/AlloprofRetrieval.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/AlloprofRetrieval.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/AlloprofRetrieval.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/AlloprofRetrieval.json
diff --git a/results/dangvantuan/sentence-camembert-base/AmazonReviewsClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/AmazonReviewsClassification.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/AmazonReviewsClassification.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/AmazonReviewsClassification.json
diff --git a/results/dangvantuan/sentence-camembert-base/BSARDRetrieval.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/BSARDRetrieval.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/BSARDRetrieval.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/BSARDRetrieval.json
diff --git a/results/dangvantuan/sentence-camembert-base/DiaBLaBitextMining.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/DiaBLaBitextMining.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/DiaBLaBitextMining.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/DiaBLaBitextMining.json
diff --git a/results/dangvantuan/sentence-camembert-base/FloresBitextMining.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/FloresBitextMining.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/FloresBitextMining.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/FloresBitextMining.json
diff --git a/results/dangvantuan/sentence-camembert-base/HALClusteringS2S.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/HALClusteringS2S.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/HALClusteringS2S.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/HALClusteringS2S.json
diff --git a/results/dangvantuan/sentence-camembert-base/MLSUMClusteringP2P.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/MLSUMClusteringP2P.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/MLSUMClusteringP2P.json
diff --git a/results/dangvantuan/sentence-camembert-base/MLSUMClusteringS2S.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/MLSUMClusteringS2S.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/MLSUMClusteringS2S.json
diff --git a/results/dangvantuan/sentence-camembert-base/MTOPDomainClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/MTOPDomainClassification.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/MTOPDomainClassification.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/MTOPDomainClassification.json
diff --git a/results/dangvantuan/sentence-camembert-base/MTOPIntentClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/MTOPIntentClassification.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/MTOPIntentClassification.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/MTOPIntentClassification.json
diff --git a/results/dangvantuan/sentence-camembert-base/MasakhaNEWSClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/MasakhaNEWSClassification.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/MasakhaNEWSClassification.json
diff --git a/results/dangvantuan/sentence-camembert-base/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/MasakhaNEWSClusteringP2P.json
diff --git a/results/dangvantuan/sentence-camembert-base/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/MasakhaNEWSClusteringS2S.json
diff --git a/results/dangvantuan/sentence-camembert-base/MassiveIntentClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/MassiveIntentClassification.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/MassiveIntentClassification.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/MassiveIntentClassification.json
diff --git a/results/dangvantuan/sentence-camembert-base/MassiveScenarioClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/MassiveScenarioClassification.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/MassiveScenarioClassification.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/MassiveScenarioClassification.json
diff --git a/results/dangvantuan/sentence-camembert-base/MintakaRetrieval.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/MintakaRetrieval.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/MintakaRetrieval.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/MintakaRetrieval.json
diff --git a/results/dangvantuan/sentence-camembert-base/OpusparcusPC.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/OpusparcusPC.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/OpusparcusPC.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/OpusparcusPC.json
diff --git a/results/dangvantuan/sentence-camembert-base/PawsX.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/PawsX.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/PawsX.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/PawsX.json
diff --git a/results/dangvantuan/sentence-camembert-base/SICKFr.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/SICKFr.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/SICKFr.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/SICKFr.json
diff --git a/results/dangvantuan/sentence-camembert-base/STS22.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/STS22.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/STS22.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/STS22.json
diff --git a/results/dangvantuan/sentence-camembert-base/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/STSBenchmarkMultilingualSTS.json
diff --git a/results/dangvantuan/sentence-camembert-base/SummEvalFr.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/SummEvalFr.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/SummEvalFr.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/SummEvalFr.json
diff --git a/results/dangvantuan/sentence-camembert-base/SyntecReranking.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/SyntecReranking.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/SyntecReranking.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/SyntecReranking.json
diff --git a/results/dangvantuan/sentence-camembert-base/SyntecRetrieval.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/SyntecRetrieval.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/SyntecRetrieval.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/SyntecRetrieval.json
diff --git a/results/dangvantuan/sentence-camembert-base/XPQARetrieval.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/XPQARetrieval.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-base/XPQARetrieval.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/XPQARetrieval.json
diff --git a/results/dangvantuan/sentence-camembert-large/AlloProfClusteringP2P.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/AlloProfClusteringP2P.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/AlloProfClusteringP2P.json
diff --git a/results/dangvantuan/sentence-camembert-large/AlloProfClusteringS2S.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/AlloProfClusteringS2S.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/AlloProfClusteringS2S.json
diff --git a/results/dangvantuan/sentence-camembert-large/AlloprofReranking.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/AlloprofReranking.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/AlloprofReranking.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/AlloprofReranking.json
diff --git a/results/dangvantuan/sentence-camembert-large/AlloprofRetrieval.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/AlloprofRetrieval.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/AlloprofRetrieval.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/AlloprofRetrieval.json
diff --git a/results/dangvantuan/sentence-camembert-large/AmazonReviewsClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/AmazonReviewsClassification.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/AmazonReviewsClassification.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/AmazonReviewsClassification.json
diff --git a/results/dangvantuan/sentence-camembert-large/BSARDRetrieval.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/BSARDRetrieval.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/BSARDRetrieval.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/BSARDRetrieval.json
diff --git a/results/dangvantuan/sentence-camembert-large/DiaBLaBitextMining.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/DiaBLaBitextMining.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/DiaBLaBitextMining.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/DiaBLaBitextMining.json
diff --git a/results/dangvantuan/sentence-camembert-large/FloresBitextMining.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/FloresBitextMining.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/FloresBitextMining.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/FloresBitextMining.json
diff --git a/results/dangvantuan/sentence-camembert-large/HALClusteringS2S.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/HALClusteringS2S.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/HALClusteringS2S.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/HALClusteringS2S.json
diff --git a/results/dangvantuan/sentence-camembert-large/MLSUMClusteringP2P.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/MLSUMClusteringP2P.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/MLSUMClusteringP2P.json
diff --git a/results/dangvantuan/sentence-camembert-large/MLSUMClusteringS2S.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/MLSUMClusteringS2S.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/MLSUMClusteringS2S.json
diff --git a/results/dangvantuan/sentence-camembert-large/MTOPDomainClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/MTOPDomainClassification.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/MTOPDomainClassification.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/MTOPDomainClassification.json
diff --git a/results/dangvantuan/sentence-camembert-large/MTOPIntentClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/MTOPIntentClassification.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/MTOPIntentClassification.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/MTOPIntentClassification.json
diff --git a/results/dangvantuan/sentence-camembert-large/MasakhaNEWSClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/MasakhaNEWSClassification.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/MasakhaNEWSClassification.json
diff --git a/results/dangvantuan/sentence-camembert-large/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/MasakhaNEWSClusteringP2P.json
diff --git a/results/dangvantuan/sentence-camembert-large/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/MasakhaNEWSClusteringS2S.json
diff --git a/results/dangvantuan/sentence-camembert-large/MassiveIntentClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/MassiveIntentClassification.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/MassiveIntentClassification.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/MassiveIntentClassification.json
diff --git a/results/dangvantuan/sentence-camembert-large/MassiveScenarioClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/MassiveScenarioClassification.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/MassiveScenarioClassification.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/MassiveScenarioClassification.json
diff --git a/results/dangvantuan/sentence-camembert-large/MintakaRetrieval.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/MintakaRetrieval.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/MintakaRetrieval.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/MintakaRetrieval.json
diff --git a/results/dangvantuan/sentence-camembert-large/OpusparcusPC.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/OpusparcusPC.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/OpusparcusPC.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/OpusparcusPC.json
diff --git a/results/dangvantuan/sentence-camembert-large/PawsX.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/PawsX.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/PawsX.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/PawsX.json
diff --git a/results/dangvantuan/sentence-camembert-large/SICKFr.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/SICKFr.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/SICKFr.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/SICKFr.json
diff --git a/results/dangvantuan/sentence-camembert-large/STS22.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/STS22.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/STS22.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/STS22.json
diff --git a/results/dangvantuan/sentence-camembert-large/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/STSBenchmarkMultilingualSTS.json
diff --git a/results/dangvantuan/sentence-camembert-large/SummEvalFr.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/SummEvalFr.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/SummEvalFr.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/SummEvalFr.json
diff --git a/results/dangvantuan/sentence-camembert-large/SyntecReranking.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/SyntecReranking.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/SyntecReranking.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/SyntecReranking.json
diff --git a/results/dangvantuan/sentence-camembert-large/SyntecRetrieval.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/SyntecRetrieval.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/SyntecRetrieval.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/SyntecRetrieval.json
diff --git a/results/dangvantuan/sentence-camembert-large/XPQARetrieval.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/XPQARetrieval.json
similarity index 100%
rename from results/dangvantuan/sentence-camembert-large/XPQARetrieval.json
rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/XPQARetrieval.json
diff --git a/results/distilbert-base-uncased/AlloProfClusteringP2P.json b/outputs/benchmark_results/distilbert-base-uncased/AlloProfClusteringP2P.json
similarity index 100%
rename from results/distilbert-base-uncased/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/distilbert-base-uncased/AlloProfClusteringP2P.json
diff --git a/results/distilbert-base-uncased/AlloProfClusteringS2S.json b/outputs/benchmark_results/distilbert-base-uncased/AlloProfClusteringS2S.json
similarity index 100%
rename from results/distilbert-base-uncased/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/distilbert-base-uncased/AlloProfClusteringS2S.json
diff --git a/results/distilbert-base-uncased/AlloprofReranking.json b/outputs/benchmark_results/distilbert-base-uncased/AlloprofReranking.json
similarity index 100%
rename from results/distilbert-base-uncased/AlloprofReranking.json
rename to outputs/benchmark_results/distilbert-base-uncased/AlloprofReranking.json
diff --git a/results/distilbert-base-uncased/AlloprofRetrieval.json b/outputs/benchmark_results/distilbert-base-uncased/AlloprofRetrieval.json
similarity index 100%
rename from results/distilbert-base-uncased/AlloprofRetrieval.json
rename to outputs/benchmark_results/distilbert-base-uncased/AlloprofRetrieval.json
diff --git a/results/distilbert-base-uncased/AmazonReviewsClassification.json b/outputs/benchmark_results/distilbert-base-uncased/AmazonReviewsClassification.json
similarity index 100%
rename from results/distilbert-base-uncased/AmazonReviewsClassification.json
rename to outputs/benchmark_results/distilbert-base-uncased/AmazonReviewsClassification.json
diff --git a/results/distilbert-base-uncased/BSARDRetrieval.json b/outputs/benchmark_results/distilbert-base-uncased/BSARDRetrieval.json
similarity index 100%
rename from results/distilbert-base-uncased/BSARDRetrieval.json
rename to outputs/benchmark_results/distilbert-base-uncased/BSARDRetrieval.json
diff --git a/results/distilbert-base-uncased/DiaBLaBitextMining.json b/outputs/benchmark_results/distilbert-base-uncased/DiaBLaBitextMining.json
similarity index 100%
rename from results/distilbert-base-uncased/DiaBLaBitextMining.json
rename to outputs/benchmark_results/distilbert-base-uncased/DiaBLaBitextMining.json
diff --git a/results/distilbert-base-uncased/FloresBitextMining.json b/outputs/benchmark_results/distilbert-base-uncased/FloresBitextMining.json
similarity index 100%
rename from results/distilbert-base-uncased/FloresBitextMining.json
rename to outputs/benchmark_results/distilbert-base-uncased/FloresBitextMining.json
diff --git a/results/distilbert-base-uncased/HALClusteringS2S.json b/outputs/benchmark_results/distilbert-base-uncased/HALClusteringS2S.json
similarity index 100%
rename from results/distilbert-base-uncased/HALClusteringS2S.json
rename to outputs/benchmark_results/distilbert-base-uncased/HALClusteringS2S.json
diff --git a/results/distilbert-base-uncased/MLSUMClusteringP2P.json b/outputs/benchmark_results/distilbert-base-uncased/MLSUMClusteringP2P.json
similarity index 100%
rename from results/distilbert-base-uncased/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/distilbert-base-uncased/MLSUMClusteringP2P.json
diff --git a/results/distilbert-base-uncased/MLSUMClusteringS2S.json b/outputs/benchmark_results/distilbert-base-uncased/MLSUMClusteringS2S.json
similarity index 100%
rename from results/distilbert-base-uncased/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/distilbert-base-uncased/MLSUMClusteringS2S.json
diff --git a/results/distilbert-base-uncased/MTOPDomainClassification.json b/outputs/benchmark_results/distilbert-base-uncased/MTOPDomainClassification.json
similarity index 100%
rename from results/distilbert-base-uncased/MTOPDomainClassification.json
rename to outputs/benchmark_results/distilbert-base-uncased/MTOPDomainClassification.json
diff --git a/results/distilbert-base-uncased/MTOPIntentClassification.json b/outputs/benchmark_results/distilbert-base-uncased/MTOPIntentClassification.json
similarity index 100%
rename from results/distilbert-base-uncased/MTOPIntentClassification.json
rename to outputs/benchmark_results/distilbert-base-uncased/MTOPIntentClassification.json
diff --git a/results/distilbert-base-uncased/MasakhaNEWSClassification.json b/outputs/benchmark_results/distilbert-base-uncased/MasakhaNEWSClassification.json
similarity index 100%
rename from results/distilbert-base-uncased/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/distilbert-base-uncased/MasakhaNEWSClassification.json
diff --git a/results/distilbert-base-uncased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/distilbert-base-uncased/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/distilbert-base-uncased/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/distilbert-base-uncased/MasakhaNEWSClusteringP2P.json
diff --git a/results/distilbert-base-uncased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/distilbert-base-uncased/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/distilbert-base-uncased/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/distilbert-base-uncased/MasakhaNEWSClusteringS2S.json
diff --git a/results/distilbert-base-uncased/MassiveIntentClassification.json b/outputs/benchmark_results/distilbert-base-uncased/MassiveIntentClassification.json
similarity index 100%
rename from results/distilbert-base-uncased/MassiveIntentClassification.json
rename to outputs/benchmark_results/distilbert-base-uncased/MassiveIntentClassification.json
diff --git a/results/distilbert-base-uncased/MassiveScenarioClassification.json b/outputs/benchmark_results/distilbert-base-uncased/MassiveScenarioClassification.json
similarity index 100%
rename from results/distilbert-base-uncased/MassiveScenarioClassification.json
rename to outputs/benchmark_results/distilbert-base-uncased/MassiveScenarioClassification.json
diff --git a/results/distilbert-base-uncased/MintakaRetrieval.json b/outputs/benchmark_results/distilbert-base-uncased/MintakaRetrieval.json
similarity index 100%
rename from results/distilbert-base-uncased/MintakaRetrieval.json
rename to outputs/benchmark_results/distilbert-base-uncased/MintakaRetrieval.json
diff --git a/results/distilbert-base-uncased/OpusparcusPC.json b/outputs/benchmark_results/distilbert-base-uncased/OpusparcusPC.json
similarity index 100%
rename from results/distilbert-base-uncased/OpusparcusPC.json
rename to outputs/benchmark_results/distilbert-base-uncased/OpusparcusPC.json
diff --git a/results/distilbert-base-uncased/PawsX.json b/outputs/benchmark_results/distilbert-base-uncased/PawsX.json
similarity index 100%
rename from results/distilbert-base-uncased/PawsX.json
rename to outputs/benchmark_results/distilbert-base-uncased/PawsX.json
diff --git a/results/distilbert-base-uncased/SICKFr.json b/outputs/benchmark_results/distilbert-base-uncased/SICKFr.json
similarity index 100%
rename from results/distilbert-base-uncased/SICKFr.json
rename to outputs/benchmark_results/distilbert-base-uncased/SICKFr.json
diff --git a/results/distilbert-base-uncased/STS22.json b/outputs/benchmark_results/distilbert-base-uncased/STS22.json
similarity index 100%
rename from results/distilbert-base-uncased/STS22.json
rename to outputs/benchmark_results/distilbert-base-uncased/STS22.json
diff --git a/results/distilbert-base-uncased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/distilbert-base-uncased/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/distilbert-base-uncased/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/distilbert-base-uncased/STSBenchmarkMultilingualSTS.json
diff --git a/results/distilbert-base-uncased/SummEvalFr.json b/outputs/benchmark_results/distilbert-base-uncased/SummEvalFr.json
similarity index 100%
rename from results/distilbert-base-uncased/SummEvalFr.json
rename to outputs/benchmark_results/distilbert-base-uncased/SummEvalFr.json
diff --git a/results/distilbert-base-uncased/SyntecReranking.json b/outputs/benchmark_results/distilbert-base-uncased/SyntecReranking.json
similarity index 100%
rename from results/distilbert-base-uncased/SyntecReranking.json
rename to outputs/benchmark_results/distilbert-base-uncased/SyntecReranking.json
diff --git a/results/distilbert-base-uncased/SyntecRetrieval.json b/outputs/benchmark_results/distilbert-base-uncased/SyntecRetrieval.json
similarity index 100%
rename from results/distilbert-base-uncased/SyntecRetrieval.json
rename to outputs/benchmark_results/distilbert-base-uncased/SyntecRetrieval.json
diff --git a/results/distilbert-base-uncased/XPQARetrieval.json b/outputs/benchmark_results/distilbert-base-uncased/XPQARetrieval.json
similarity index 100%
rename from results/distilbert-base-uncased/XPQARetrieval.json
rename to outputs/benchmark_results/distilbert-base-uncased/XPQARetrieval.json
diff --git a/results/flaubert/flaubert_base_cased/AlloProfClusteringP2P.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/AlloProfClusteringP2P.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/AlloProfClusteringP2P.json
diff --git a/results/flaubert/flaubert_base_cased/AlloProfClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/AlloProfClusteringS2S.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/AlloProfClusteringS2S.json
diff --git a/results/flaubert/flaubert_base_cased/AlloprofReranking.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/AlloprofReranking.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/AlloprofReranking.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/AlloprofReranking.json
diff --git a/results/flaubert/flaubert_base_cased/AlloprofRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/AlloprofRetrieval.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/AlloprofRetrieval.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/AlloprofRetrieval.json
diff --git a/results/flaubert/flaubert_base_cased/AmazonReviewsClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/AmazonReviewsClassification.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/AmazonReviewsClassification.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/AmazonReviewsClassification.json
diff --git a/results/flaubert/flaubert_base_cased/BSARDRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/BSARDRetrieval.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/BSARDRetrieval.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/BSARDRetrieval.json
diff --git a/results/flaubert/flaubert_base_cased/DiaBLaBitextMining.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/DiaBLaBitextMining.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/DiaBLaBitextMining.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/DiaBLaBitextMining.json
diff --git a/results/flaubert/flaubert_base_cased/FloresBitextMining.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/FloresBitextMining.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/FloresBitextMining.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/FloresBitextMining.json
diff --git a/results/flaubert/flaubert_base_cased/HALClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/HALClusteringS2S.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/HALClusteringS2S.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/HALClusteringS2S.json
diff --git a/results/flaubert/flaubert_base_cased/MLSUMClusteringP2P.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/MLSUMClusteringP2P.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/MLSUMClusteringP2P.json
diff --git a/results/flaubert/flaubert_base_cased/MLSUMClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/MLSUMClusteringS2S.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/MLSUMClusteringS2S.json
diff --git a/results/flaubert/flaubert_base_cased/MTOPDomainClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/MTOPDomainClassification.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/MTOPDomainClassification.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/MTOPDomainClassification.json
diff --git a/results/flaubert/flaubert_base_cased/MTOPIntentClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/MTOPIntentClassification.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/MTOPIntentClassification.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/MTOPIntentClassification.json
diff --git a/results/flaubert/flaubert_base_cased/MasakhaNEWSClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/MasakhaNEWSClassification.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/MasakhaNEWSClassification.json
diff --git a/results/flaubert/flaubert_base_cased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/MasakhaNEWSClusteringP2P.json
diff --git a/results/flaubert/flaubert_base_cased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/MasakhaNEWSClusteringS2S.json
diff --git a/results/flaubert/flaubert_base_cased/MassiveIntentClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/MassiveIntentClassification.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/MassiveIntentClassification.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/MassiveIntentClassification.json
diff --git a/results/flaubert/flaubert_base_cased/MassiveScenarioClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/MassiveScenarioClassification.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/MassiveScenarioClassification.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/MassiveScenarioClassification.json
diff --git a/results/flaubert/flaubert_base_cased/MintakaRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/MintakaRetrieval.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/MintakaRetrieval.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/MintakaRetrieval.json
diff --git a/results/flaubert/flaubert_base_cased/OpusparcusPC.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/OpusparcusPC.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/OpusparcusPC.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/OpusparcusPC.json
diff --git a/results/flaubert/flaubert_base_cased/PawsX.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/PawsX.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/PawsX.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/PawsX.json
diff --git a/results/flaubert/flaubert_base_cased/SICKFr.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/SICKFr.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/SICKFr.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/SICKFr.json
diff --git a/results/flaubert/flaubert_base_cased/STS22.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/STS22.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/STS22.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/STS22.json
diff --git a/results/flaubert/flaubert_base_cased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/STSBenchmarkMultilingualSTS.json
diff --git a/results/flaubert/flaubert_base_cased/SummEvalFr.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/SummEvalFr.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/SummEvalFr.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/SummEvalFr.json
diff --git a/results/flaubert/flaubert_base_cased/SyntecReranking.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/SyntecReranking.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/SyntecReranking.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/SyntecReranking.json
diff --git a/results/flaubert/flaubert_base_cased/SyntecRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/SyntecRetrieval.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/SyntecRetrieval.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/SyntecRetrieval.json
diff --git a/results/flaubert/flaubert_base_cased/XPQARetrieval.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/XPQARetrieval.json
similarity index 100%
rename from results/flaubert/flaubert_base_cased/XPQARetrieval.json
rename to outputs/benchmark_results/flaubert/flaubert_base_cased/XPQARetrieval.json
diff --git a/results/flaubert/flaubert_base_uncased/AlloProfClusteringP2P.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/AlloProfClusteringP2P.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/AlloProfClusteringP2P.json
diff --git a/results/flaubert/flaubert_base_uncased/AlloProfClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/AlloProfClusteringS2S.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/AlloProfClusteringS2S.json
diff --git a/results/flaubert/flaubert_base_uncased/AlloprofReranking.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/AlloprofReranking.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/AlloprofReranking.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/AlloprofReranking.json
diff --git a/results/flaubert/flaubert_base_uncased/AlloprofRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/AlloprofRetrieval.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/AlloprofRetrieval.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/AlloprofRetrieval.json
diff --git a/results/flaubert/flaubert_base_uncased/AmazonReviewsClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/AmazonReviewsClassification.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/AmazonReviewsClassification.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/AmazonReviewsClassification.json
diff --git a/results/flaubert/flaubert_base_uncased/BSARDRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/BSARDRetrieval.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/BSARDRetrieval.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/BSARDRetrieval.json
diff --git a/results/flaubert/flaubert_base_uncased/DiaBLaBitextMining.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/DiaBLaBitextMining.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/DiaBLaBitextMining.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/DiaBLaBitextMining.json
diff --git a/results/flaubert/flaubert_base_uncased/FloresBitextMining.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/FloresBitextMining.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/FloresBitextMining.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/FloresBitextMining.json
diff --git a/results/flaubert/flaubert_base_uncased/HALClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/HALClusteringS2S.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/HALClusteringS2S.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/HALClusteringS2S.json
diff --git a/results/flaubert/flaubert_base_uncased/MLSUMClusteringP2P.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/MLSUMClusteringP2P.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/MLSUMClusteringP2P.json
diff --git a/results/flaubert/flaubert_base_uncased/MLSUMClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/MLSUMClusteringS2S.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/MLSUMClusteringS2S.json
diff --git a/results/flaubert/flaubert_base_uncased/MTOPDomainClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/MTOPDomainClassification.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/MTOPDomainClassification.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/MTOPDomainClassification.json
diff --git a/results/flaubert/flaubert_base_uncased/MTOPIntentClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/MTOPIntentClassification.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/MTOPIntentClassification.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/MTOPIntentClassification.json
diff --git a/results/flaubert/flaubert_base_uncased/MasakhaNEWSClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/MasakhaNEWSClassification.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/MasakhaNEWSClassification.json
diff --git a/results/flaubert/flaubert_base_uncased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/MasakhaNEWSClusteringP2P.json
diff --git a/results/flaubert/flaubert_base_uncased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/MasakhaNEWSClusteringS2S.json
diff --git a/results/flaubert/flaubert_base_uncased/MassiveIntentClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/MassiveIntentClassification.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/MassiveIntentClassification.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/MassiveIntentClassification.json
diff --git a/results/flaubert/flaubert_base_uncased/MassiveScenarioClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/MassiveScenarioClassification.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/MassiveScenarioClassification.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/MassiveScenarioClassification.json
diff --git a/results/flaubert/flaubert_base_uncased/MintakaRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/MintakaRetrieval.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/MintakaRetrieval.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/MintakaRetrieval.json
diff --git a/results/flaubert/flaubert_base_uncased/OpusparcusPC.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/OpusparcusPC.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/OpusparcusPC.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/OpusparcusPC.json
diff --git a/results/flaubert/flaubert_base_uncased/PawsX.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/PawsX.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/PawsX.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/PawsX.json
diff --git a/results/flaubert/flaubert_base_uncased/SICKFr.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/SICKFr.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/SICKFr.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/SICKFr.json
diff --git a/results/flaubert/flaubert_base_uncased/STS22.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/STS22.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/STS22.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/STS22.json
diff --git a/results/flaubert/flaubert_base_uncased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/STSBenchmarkMultilingualSTS.json
diff --git a/results/flaubert/flaubert_base_uncased/SummEvalFr.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/SummEvalFr.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/SummEvalFr.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/SummEvalFr.json
diff --git a/results/flaubert/flaubert_base_uncased/SyntecReranking.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/SyntecReranking.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/SyntecReranking.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/SyntecReranking.json
diff --git a/results/flaubert/flaubert_base_uncased/SyntecRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/SyntecRetrieval.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/SyntecRetrieval.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/SyntecRetrieval.json
diff --git a/results/flaubert/flaubert_base_uncased/XPQARetrieval.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/XPQARetrieval.json
similarity index 100%
rename from results/flaubert/flaubert_base_uncased/XPQARetrieval.json
rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/XPQARetrieval.json
diff --git a/results/flaubert/flaubert_large_cased/AlloProfClusteringP2P.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/AlloProfClusteringP2P.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/AlloProfClusteringP2P.json
diff --git a/results/flaubert/flaubert_large_cased/AlloProfClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/AlloProfClusteringS2S.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/AlloProfClusteringS2S.json
diff --git a/results/flaubert/flaubert_large_cased/AlloprofReranking.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/AlloprofReranking.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/AlloprofReranking.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/AlloprofReranking.json
diff --git a/results/flaubert/flaubert_large_cased/AlloprofRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/AlloprofRetrieval.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/AlloprofRetrieval.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/AlloprofRetrieval.json
diff --git a/results/flaubert/flaubert_large_cased/AmazonReviewsClassification.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/AmazonReviewsClassification.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/AmazonReviewsClassification.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/AmazonReviewsClassification.json
diff --git a/results/flaubert/flaubert_large_cased/BSARDRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/BSARDRetrieval.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/BSARDRetrieval.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/BSARDRetrieval.json
diff --git a/results/flaubert/flaubert_large_cased/DiaBLaBitextMining.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/DiaBLaBitextMining.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/DiaBLaBitextMining.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/DiaBLaBitextMining.json
diff --git a/results/flaubert/flaubert_large_cased/FloresBitextMining.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/FloresBitextMining.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/FloresBitextMining.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/FloresBitextMining.json
diff --git a/results/flaubert/flaubert_large_cased/HALClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/HALClusteringS2S.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/HALClusteringS2S.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/HALClusteringS2S.json
diff --git a/results/flaubert/flaubert_large_cased/MLSUMClusteringP2P.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/MLSUMClusteringP2P.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/MLSUMClusteringP2P.json
diff --git a/results/flaubert/flaubert_large_cased/MLSUMClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/MLSUMClusteringS2S.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/MLSUMClusteringS2S.json
diff --git a/results/flaubert/flaubert_large_cased/MTOPDomainClassification.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/MTOPDomainClassification.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/MTOPDomainClassification.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/MTOPDomainClassification.json
diff --git a/results/flaubert/flaubert_large_cased/MTOPIntentClassification.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/MTOPIntentClassification.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/MTOPIntentClassification.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/MTOPIntentClassification.json
diff --git a/results/flaubert/flaubert_large_cased/MasakhaNEWSClassification.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/MasakhaNEWSClassification.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/MasakhaNEWSClassification.json
diff --git a/results/flaubert/flaubert_large_cased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/MasakhaNEWSClusteringP2P.json
diff --git a/results/flaubert/flaubert_large_cased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/MasakhaNEWSClusteringS2S.json
diff --git a/results/flaubert/flaubert_large_cased/MassiveIntentClassification.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/MassiveIntentClassification.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/MassiveIntentClassification.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/MassiveIntentClassification.json
diff --git a/results/flaubert/flaubert_large_cased/MassiveScenarioClassification.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/MassiveScenarioClassification.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/MassiveScenarioClassification.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/MassiveScenarioClassification.json
diff --git a/results/flaubert/flaubert_large_cased/MintakaRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/MintakaRetrieval.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/MintakaRetrieval.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/MintakaRetrieval.json
diff --git a/results/flaubert/flaubert_large_cased/OpusparcusPC.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/OpusparcusPC.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/OpusparcusPC.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/OpusparcusPC.json
diff --git a/results/flaubert/flaubert_large_cased/PawsX.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/PawsX.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/PawsX.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/PawsX.json
diff --git a/results/flaubert/flaubert_large_cased/SICKFr.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/SICKFr.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/SICKFr.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/SICKFr.json
diff --git a/results/flaubert/flaubert_large_cased/STS22.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/STS22.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/STS22.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/STS22.json
diff --git a/results/flaubert/flaubert_large_cased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/STSBenchmarkMultilingualSTS.json
diff --git a/results/flaubert/flaubert_large_cased/SummEvalFr.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/SummEvalFr.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/SummEvalFr.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/SummEvalFr.json
diff --git a/results/flaubert/flaubert_large_cased/SyntecReranking.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/SyntecReranking.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/SyntecReranking.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/SyntecReranking.json
diff --git a/results/flaubert/flaubert_large_cased/SyntecRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/SyntecRetrieval.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/SyntecRetrieval.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/SyntecRetrieval.json
diff --git a/results/flaubert/flaubert_large_cased/XPQARetrieval.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/XPQARetrieval.json
similarity index 100%
rename from results/flaubert/flaubert_large_cased/XPQARetrieval.json
rename to outputs/benchmark_results/flaubert/flaubert_large_cased/XPQARetrieval.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/AlloProfClusteringP2P.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/AlloProfClusteringP2P.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/AlloProfClusteringP2P.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/AlloProfClusteringS2S.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/AlloProfClusteringS2S.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/AlloProfClusteringS2S.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/AlloprofReranking.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/AlloprofReranking.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/AlloprofReranking.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/AlloprofReranking.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/AlloprofRetrieval.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/AlloprofRetrieval.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/AlloprofRetrieval.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/AlloprofRetrieval.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/AmazonReviewsClassification.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/AmazonReviewsClassification.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/AmazonReviewsClassification.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/AmazonReviewsClassification.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/BSARDRetrieval.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/BSARDRetrieval.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/BSARDRetrieval.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/BSARDRetrieval.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/DiaBLaBitextMining.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/DiaBLaBitextMining.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/DiaBLaBitextMining.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/DiaBLaBitextMining.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/FloresBitextMining.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/FloresBitextMining.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/FloresBitextMining.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/FloresBitextMining.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/HALClusteringS2S.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/HALClusteringS2S.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/HALClusteringS2S.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/HALClusteringS2S.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/MLSUMClusteringP2P.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MLSUMClusteringP2P.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MLSUMClusteringP2P.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/MLSUMClusteringS2S.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MLSUMClusteringS2S.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MLSUMClusteringS2S.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/MTOPDomainClassification.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MTOPDomainClassification.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/MTOPDomainClassification.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MTOPDomainClassification.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/MTOPIntentClassification.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MTOPIntentClassification.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/MTOPIntentClassification.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MTOPIntentClassification.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClassification.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClassification.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClassification.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClusteringP2P.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClusteringS2S.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/MassiveIntentClassification.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MassiveIntentClassification.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/MassiveIntentClassification.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MassiveIntentClassification.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/MassiveScenarioClassification.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MassiveScenarioClassification.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/MassiveScenarioClassification.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MassiveScenarioClassification.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/MintakaRetrieval.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MintakaRetrieval.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/MintakaRetrieval.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MintakaRetrieval.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/OpusparcusPC.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/OpusparcusPC.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/OpusparcusPC.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/OpusparcusPC.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/PawsX.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/PawsX.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/PawsX.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/PawsX.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/SICKFr.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/SICKFr.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/SICKFr.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/SICKFr.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/STS22.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/STS22.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/STS22.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/STS22.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/STSBenchmarkMultilingualSTS.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/SummEvalFr.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/SummEvalFr.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/SummEvalFr.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/SummEvalFr.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/SyntecReranking.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/SyntecReranking.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/SyntecReranking.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/SyntecReranking.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/SyntecRetrieval.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/SyntecRetrieval.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/SyntecRetrieval.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/SyntecRetrieval.json
diff --git a/results/intfloat/e5-mistral-7b-instruct/XPQARetrieval.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/XPQARetrieval.json
similarity index 100%
rename from results/intfloat/e5-mistral-7b-instruct/XPQARetrieval.json
rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/XPQARetrieval.json
diff --git a/results/intfloat/multilingual-e5-base/AlloProfClusteringP2P.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/AlloProfClusteringP2P.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/AlloProfClusteringP2P.json
diff --git a/results/intfloat/multilingual-e5-base/AlloProfClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/AlloProfClusteringS2S.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/AlloProfClusteringS2S.json
diff --git a/results/intfloat/multilingual-e5-base/AlloprofReranking.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/AlloprofReranking.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/AlloprofReranking.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/AlloprofReranking.json
diff --git a/results/intfloat/multilingual-e5-base/AlloprofRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/AlloprofRetrieval.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/AlloprofRetrieval.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/AlloprofRetrieval.json
diff --git a/results/intfloat/multilingual-e5-base/AmazonReviewsClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/AmazonReviewsClassification.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/AmazonReviewsClassification.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/AmazonReviewsClassification.json
diff --git a/results/intfloat/multilingual-e5-base/BSARDRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/BSARDRetrieval.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/BSARDRetrieval.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/BSARDRetrieval.json
diff --git a/results/intfloat/multilingual-e5-base/DiaBLaBitextMining.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/DiaBLaBitextMining.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/DiaBLaBitextMining.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/DiaBLaBitextMining.json
diff --git a/results/intfloat/multilingual-e5-base/FloresBitextMining.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/FloresBitextMining.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/FloresBitextMining.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/FloresBitextMining.json
diff --git a/results/intfloat/multilingual-e5-base/HALClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/HALClusteringS2S.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/HALClusteringS2S.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/HALClusteringS2S.json
diff --git a/results/intfloat/multilingual-e5-base/MLSUMClusteringP2P.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/MLSUMClusteringP2P.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/MLSUMClusteringP2P.json
diff --git a/results/intfloat/multilingual-e5-base/MLSUMClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/MLSUMClusteringS2S.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/MLSUMClusteringS2S.json
diff --git a/results/intfloat/multilingual-e5-base/MTOPDomainClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/MTOPDomainClassification.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/MTOPDomainClassification.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/MTOPDomainClassification.json
diff --git a/results/intfloat/multilingual-e5-base/MTOPIntentClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/MTOPIntentClassification.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/MTOPIntentClassification.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/MTOPIntentClassification.json
diff --git a/results/intfloat/multilingual-e5-base/MasakhaNEWSClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/MasakhaNEWSClassification.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/MasakhaNEWSClassification.json
diff --git a/results/intfloat/multilingual-e5-base/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/MasakhaNEWSClusteringP2P.json
diff --git a/results/intfloat/multilingual-e5-base/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/MasakhaNEWSClusteringS2S.json
diff --git a/results/intfloat/multilingual-e5-base/MassiveIntentClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/MassiveIntentClassification.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/MassiveIntentClassification.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/MassiveIntentClassification.json
diff --git a/results/intfloat/multilingual-e5-base/MassiveScenarioClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/MassiveScenarioClassification.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/MassiveScenarioClassification.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/MassiveScenarioClassification.json
diff --git a/results/intfloat/multilingual-e5-base/MintakaRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/MintakaRetrieval.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/MintakaRetrieval.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/MintakaRetrieval.json
diff --git a/results/intfloat/multilingual-e5-base/OpusparcusPC.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/OpusparcusPC.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/OpusparcusPC.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/OpusparcusPC.json
diff --git a/results/intfloat/multilingual-e5-base/PawsX.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/PawsX.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/PawsX.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/PawsX.json
diff --git a/results/intfloat/multilingual-e5-base/SICKFr.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/SICKFr.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/SICKFr.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/SICKFr.json
diff --git a/results/intfloat/multilingual-e5-base/STS22.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/STS22.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/STS22.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/STS22.json
diff --git a/results/intfloat/multilingual-e5-base/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/STSBenchmarkMultilingualSTS.json
diff --git a/results/intfloat/multilingual-e5-base/SummEvalFr.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/SummEvalFr.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/SummEvalFr.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/SummEvalFr.json
diff --git a/results/intfloat/multilingual-e5-base/SyntecReranking.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/SyntecReranking.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/SyntecReranking.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/SyntecReranking.json
diff --git a/results/intfloat/multilingual-e5-base/SyntecRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/SyntecRetrieval.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/SyntecRetrieval.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/SyntecRetrieval.json
diff --git a/results/intfloat/multilingual-e5-base/XPQARetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/XPQARetrieval.json
similarity index 100%
rename from results/intfloat/multilingual-e5-base/XPQARetrieval.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-base/XPQARetrieval.json
diff --git a/results/intfloat/multilingual-e5-large/AlloProfClusteringP2P.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/AlloProfClusteringP2P.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/AlloProfClusteringP2P.json
diff --git a/results/intfloat/multilingual-e5-large/AlloProfClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/AlloProfClusteringS2S.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/AlloProfClusteringS2S.json
diff --git a/results/intfloat/multilingual-e5-large/AlloprofReranking.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/AlloprofReranking.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/AlloprofReranking.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/AlloprofReranking.json
diff --git a/results/intfloat/multilingual-e5-large/AlloprofRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/AlloprofRetrieval.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/AlloprofRetrieval.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/AlloprofRetrieval.json
diff --git a/results/intfloat/multilingual-e5-large/AmazonReviewsClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/AmazonReviewsClassification.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/AmazonReviewsClassification.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/AmazonReviewsClassification.json
diff --git a/results/intfloat/multilingual-e5-large/BSARDRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/BSARDRetrieval.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/BSARDRetrieval.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/BSARDRetrieval.json
diff --git a/results/intfloat/multilingual-e5-large/DiaBLaBitextMining.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/DiaBLaBitextMining.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/DiaBLaBitextMining.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/DiaBLaBitextMining.json
diff --git a/results/intfloat/multilingual-e5-large/FloresBitextMining.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/FloresBitextMining.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/FloresBitextMining.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/FloresBitextMining.json
diff --git a/results/intfloat/multilingual-e5-large/HALClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/HALClusteringS2S.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/HALClusteringS2S.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/HALClusteringS2S.json
diff --git a/results/intfloat/multilingual-e5-large/MLSUMClusteringP2P.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/MLSUMClusteringP2P.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/MLSUMClusteringP2P.json
diff --git a/results/intfloat/multilingual-e5-large/MLSUMClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/MLSUMClusteringS2S.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/MLSUMClusteringS2S.json
diff --git a/results/intfloat/multilingual-e5-large/MTOPDomainClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/MTOPDomainClassification.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/MTOPDomainClassification.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/MTOPDomainClassification.json
diff --git a/results/intfloat/multilingual-e5-large/MTOPIntentClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/MTOPIntentClassification.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/MTOPIntentClassification.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/MTOPIntentClassification.json
diff --git a/results/intfloat/multilingual-e5-large/MasakhaNEWSClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/MasakhaNEWSClassification.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/MasakhaNEWSClassification.json
diff --git a/results/intfloat/multilingual-e5-large/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/MasakhaNEWSClusteringP2P.json
diff --git a/results/intfloat/multilingual-e5-large/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/MasakhaNEWSClusteringS2S.json
diff --git a/results/intfloat/multilingual-e5-large/MassiveIntentClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/MassiveIntentClassification.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/MassiveIntentClassification.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/MassiveIntentClassification.json
diff --git a/results/intfloat/multilingual-e5-large/MassiveScenarioClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/MassiveScenarioClassification.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/MassiveScenarioClassification.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/MassiveScenarioClassification.json
diff --git a/results/intfloat/multilingual-e5-large/MintakaRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/MintakaRetrieval.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/MintakaRetrieval.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/MintakaRetrieval.json
diff --git a/results/intfloat/multilingual-e5-large/OpusparcusPC.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/OpusparcusPC.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/OpusparcusPC.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/OpusparcusPC.json
diff --git a/results/intfloat/multilingual-e5-large/PawsX.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/PawsX.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/PawsX.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/PawsX.json
diff --git a/results/intfloat/multilingual-e5-large/SICKFr.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/SICKFr.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/SICKFr.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/SICKFr.json
diff --git a/results/intfloat/multilingual-e5-large/STS22.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/STS22.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/STS22.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/STS22.json
diff --git a/results/intfloat/multilingual-e5-large/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/STSBenchmarkMultilingualSTS.json
diff --git a/results/intfloat/multilingual-e5-large/SummEvalFr.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/SummEvalFr.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/SummEvalFr.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/SummEvalFr.json
diff --git a/results/intfloat/multilingual-e5-large/SyntecReranking.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/SyntecReranking.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/SyntecReranking.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/SyntecReranking.json
diff --git a/results/intfloat/multilingual-e5-large/SyntecRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/SyntecRetrieval.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/SyntecRetrieval.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/SyntecRetrieval.json
diff --git a/results/intfloat/multilingual-e5-large/XPQARetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/XPQARetrieval.json
similarity index 100%
rename from results/intfloat/multilingual-e5-large/XPQARetrieval.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-large/XPQARetrieval.json
diff --git a/results/intfloat/multilingual-e5-small/AlloProfClusteringP2P.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/AlloProfClusteringP2P.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/AlloProfClusteringP2P.json
diff --git a/results/intfloat/multilingual-e5-small/AlloProfClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/AlloProfClusteringS2S.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/AlloProfClusteringS2S.json
diff --git a/results/intfloat/multilingual-e5-small/AlloprofReranking.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/AlloprofReranking.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/AlloprofReranking.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/AlloprofReranking.json
diff --git a/results/intfloat/multilingual-e5-small/AlloprofRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/AlloprofRetrieval.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/AlloprofRetrieval.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/AlloprofRetrieval.json
diff --git a/results/intfloat/multilingual-e5-small/AmazonReviewsClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/AmazonReviewsClassification.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/AmazonReviewsClassification.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/AmazonReviewsClassification.json
diff --git a/results/intfloat/multilingual-e5-small/BSARDRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/BSARDRetrieval.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/BSARDRetrieval.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/BSARDRetrieval.json
diff --git a/results/intfloat/multilingual-e5-small/DiaBLaBitextMining.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/DiaBLaBitextMining.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/DiaBLaBitextMining.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/DiaBLaBitextMining.json
diff --git a/results/intfloat/multilingual-e5-small/FloresBitextMining.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/FloresBitextMining.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/FloresBitextMining.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/FloresBitextMining.json
diff --git a/results/intfloat/multilingual-e5-small/HALClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/HALClusteringS2S.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/HALClusteringS2S.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/HALClusteringS2S.json
diff --git a/results/intfloat/multilingual-e5-small/MLSUMClusteringP2P.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/MLSUMClusteringP2P.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/MLSUMClusteringP2P.json
diff --git a/results/intfloat/multilingual-e5-small/MLSUMClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/MLSUMClusteringS2S.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/MLSUMClusteringS2S.json
diff --git a/results/intfloat/multilingual-e5-small/MTOPDomainClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/MTOPDomainClassification.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/MTOPDomainClassification.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/MTOPDomainClassification.json
diff --git a/results/intfloat/multilingual-e5-small/MTOPIntentClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/MTOPIntentClassification.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/MTOPIntentClassification.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/MTOPIntentClassification.json
diff --git a/results/intfloat/multilingual-e5-small/MasakhaNEWSClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/MasakhaNEWSClassification.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/MasakhaNEWSClassification.json
diff --git a/results/intfloat/multilingual-e5-small/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/MasakhaNEWSClusteringP2P.json
diff --git a/results/intfloat/multilingual-e5-small/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/MasakhaNEWSClusteringS2S.json
diff --git a/results/intfloat/multilingual-e5-small/MassiveIntentClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/MassiveIntentClassification.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/MassiveIntentClassification.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/MassiveIntentClassification.json
diff --git a/results/intfloat/multilingual-e5-small/MassiveScenarioClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/MassiveScenarioClassification.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/MassiveScenarioClassification.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/MassiveScenarioClassification.json
diff --git a/results/intfloat/multilingual-e5-small/MintakaRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/MintakaRetrieval.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/MintakaRetrieval.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/MintakaRetrieval.json
diff --git a/results/intfloat/multilingual-e5-small/OpusparcusPC.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/OpusparcusPC.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/OpusparcusPC.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/OpusparcusPC.json
diff --git a/results/intfloat/multilingual-e5-small/PawsX.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/PawsX.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/PawsX.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/PawsX.json
diff --git a/results/intfloat/multilingual-e5-small/SICKFr.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/SICKFr.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/SICKFr.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/SICKFr.json
diff --git a/results/intfloat/multilingual-e5-small/STS22.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/STS22.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/STS22.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/STS22.json
diff --git a/results/intfloat/multilingual-e5-small/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/STSBenchmarkMultilingualSTS.json
diff --git a/results/intfloat/multilingual-e5-small/SummEvalFr.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/SummEvalFr.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/SummEvalFr.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/SummEvalFr.json
diff --git a/results/intfloat/multilingual-e5-small/SyntecReranking.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/SyntecReranking.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/SyntecReranking.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/SyntecReranking.json
diff --git a/results/intfloat/multilingual-e5-small/SyntecRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/SyntecRetrieval.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/SyntecRetrieval.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/SyntecRetrieval.json
diff --git a/results/intfloat/multilingual-e5-small/XPQARetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/XPQARetrieval.json
similarity index 100%
rename from results/intfloat/multilingual-e5-small/XPQARetrieval.json
rename to outputs/benchmark_results/intfloat/multilingual-e5-small/XPQARetrieval.json
diff --git a/results/izhx/udever-bloom-1b1/AlloProfClusteringP2P.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/AlloProfClusteringP2P.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/AlloProfClusteringP2P.json
diff --git a/results/izhx/udever-bloom-1b1/AlloProfClusteringS2S.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/AlloProfClusteringS2S.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/AlloProfClusteringS2S.json
diff --git a/results/izhx/udever-bloom-1b1/AlloprofReranking.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/AlloprofReranking.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/AlloprofReranking.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/AlloprofReranking.json
diff --git a/results/izhx/udever-bloom-1b1/AlloprofRetrieval.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/AlloprofRetrieval.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/AlloprofRetrieval.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/AlloprofRetrieval.json
diff --git a/results/izhx/udever-bloom-1b1/AmazonReviewsClassification.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/AmazonReviewsClassification.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/AmazonReviewsClassification.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/AmazonReviewsClassification.json
diff --git a/results/izhx/udever-bloom-1b1/BSARDRetrieval.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/BSARDRetrieval.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/BSARDRetrieval.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/BSARDRetrieval.json
diff --git a/results/izhx/udever-bloom-1b1/DiaBLaBitextMining.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/DiaBLaBitextMining.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/DiaBLaBitextMining.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/DiaBLaBitextMining.json
diff --git a/results/izhx/udever-bloom-1b1/FloresBitextMining.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/FloresBitextMining.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/FloresBitextMining.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/FloresBitextMining.json
diff --git a/results/izhx/udever-bloom-1b1/HALClusteringS2S.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/HALClusteringS2S.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/HALClusteringS2S.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/HALClusteringS2S.json
diff --git a/results/izhx/udever-bloom-1b1/MLSUMClusteringP2P.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/MLSUMClusteringP2P.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/MLSUMClusteringP2P.json
diff --git a/results/izhx/udever-bloom-1b1/MLSUMClusteringS2S.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/MLSUMClusteringS2S.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/MLSUMClusteringS2S.json
diff --git a/results/izhx/udever-bloom-1b1/MTOPDomainClassification.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/MTOPDomainClassification.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/MTOPDomainClassification.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/MTOPDomainClassification.json
diff --git a/results/izhx/udever-bloom-1b1/MTOPIntentClassification.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/MTOPIntentClassification.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/MTOPIntentClassification.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/MTOPIntentClassification.json
diff --git a/results/izhx/udever-bloom-1b1/MasakhaNEWSClassification.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/MasakhaNEWSClassification.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/MasakhaNEWSClassification.json
diff --git a/results/izhx/udever-bloom-1b1/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/MasakhaNEWSClusteringP2P.json
diff --git a/results/izhx/udever-bloom-1b1/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/MasakhaNEWSClusteringS2S.json
diff --git a/results/izhx/udever-bloom-1b1/MassiveIntentClassification.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/MassiveIntentClassification.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/MassiveIntentClassification.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/MassiveIntentClassification.json
diff --git a/results/izhx/udever-bloom-1b1/MassiveScenarioClassification.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/MassiveScenarioClassification.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/MassiveScenarioClassification.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/MassiveScenarioClassification.json
diff --git a/results/izhx/udever-bloom-1b1/MintakaRetrieval.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/MintakaRetrieval.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/MintakaRetrieval.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/MintakaRetrieval.json
diff --git a/results/izhx/udever-bloom-1b1/OpusparcusPC.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/OpusparcusPC.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/OpusparcusPC.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/OpusparcusPC.json
diff --git a/results/izhx/udever-bloom-1b1/PawsX.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/PawsX.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/PawsX.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/PawsX.json
diff --git a/results/izhx/udever-bloom-1b1/SICKFr.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/SICKFr.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/SICKFr.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/SICKFr.json
diff --git a/results/izhx/udever-bloom-1b1/STS22.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/STS22.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/STS22.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/STS22.json
diff --git a/results/izhx/udever-bloom-1b1/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/STSBenchmarkMultilingualSTS.json
diff --git a/results/izhx/udever-bloom-1b1/SummEvalFr.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/SummEvalFr.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/SummEvalFr.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/SummEvalFr.json
diff --git a/results/izhx/udever-bloom-1b1/SyntecReranking.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/SyntecReranking.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/SyntecReranking.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/SyntecReranking.json
diff --git a/results/izhx/udever-bloom-1b1/SyntecRetrieval.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/SyntecRetrieval.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/SyntecRetrieval.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/SyntecRetrieval.json
diff --git a/results/izhx/udever-bloom-1b1/XPQARetrieval.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/XPQARetrieval.json
similarity index 100%
rename from results/izhx/udever-bloom-1b1/XPQARetrieval.json
rename to outputs/benchmark_results/izhx/udever-bloom-1b1/XPQARetrieval.json
diff --git a/results/izhx/udever-bloom-560m/AlloProfClusteringP2P.json b/outputs/benchmark_results/izhx/udever-bloom-560m/AlloProfClusteringP2P.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/AlloProfClusteringP2P.json
diff --git a/results/izhx/udever-bloom-560m/AlloProfClusteringS2S.json b/outputs/benchmark_results/izhx/udever-bloom-560m/AlloProfClusteringS2S.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/AlloProfClusteringS2S.json
diff --git a/results/izhx/udever-bloom-560m/AlloprofReranking.json b/outputs/benchmark_results/izhx/udever-bloom-560m/AlloprofReranking.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/AlloprofReranking.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/AlloprofReranking.json
diff --git a/results/izhx/udever-bloom-560m/AlloprofRetrieval.json b/outputs/benchmark_results/izhx/udever-bloom-560m/AlloprofRetrieval.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/AlloprofRetrieval.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/AlloprofRetrieval.json
diff --git a/results/izhx/udever-bloom-560m/AmazonReviewsClassification.json b/outputs/benchmark_results/izhx/udever-bloom-560m/AmazonReviewsClassification.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/AmazonReviewsClassification.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/AmazonReviewsClassification.json
diff --git a/results/izhx/udever-bloom-560m/BSARDRetrieval.json b/outputs/benchmark_results/izhx/udever-bloom-560m/BSARDRetrieval.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/BSARDRetrieval.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/BSARDRetrieval.json
diff --git a/results/izhx/udever-bloom-560m/DiaBLaBitextMining.json b/outputs/benchmark_results/izhx/udever-bloom-560m/DiaBLaBitextMining.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/DiaBLaBitextMining.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/DiaBLaBitextMining.json
diff --git a/results/izhx/udever-bloom-560m/FloresBitextMining.json b/outputs/benchmark_results/izhx/udever-bloom-560m/FloresBitextMining.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/FloresBitextMining.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/FloresBitextMining.json
diff --git a/results/izhx/udever-bloom-560m/HALClusteringS2S.json b/outputs/benchmark_results/izhx/udever-bloom-560m/HALClusteringS2S.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/HALClusteringS2S.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/HALClusteringS2S.json
diff --git a/results/izhx/udever-bloom-560m/MLSUMClusteringP2P.json b/outputs/benchmark_results/izhx/udever-bloom-560m/MLSUMClusteringP2P.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/MLSUMClusteringP2P.json
diff --git a/results/izhx/udever-bloom-560m/MLSUMClusteringS2S.json b/outputs/benchmark_results/izhx/udever-bloom-560m/MLSUMClusteringS2S.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/MLSUMClusteringS2S.json
diff --git a/results/izhx/udever-bloom-560m/MTOPDomainClassification.json b/outputs/benchmark_results/izhx/udever-bloom-560m/MTOPDomainClassification.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/MTOPDomainClassification.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/MTOPDomainClassification.json
diff --git a/results/izhx/udever-bloom-560m/MTOPIntentClassification.json b/outputs/benchmark_results/izhx/udever-bloom-560m/MTOPIntentClassification.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/MTOPIntentClassification.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/MTOPIntentClassification.json
diff --git a/results/izhx/udever-bloom-560m/MasakhaNEWSClassification.json b/outputs/benchmark_results/izhx/udever-bloom-560m/MasakhaNEWSClassification.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/MasakhaNEWSClassification.json
diff --git a/results/izhx/udever-bloom-560m/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/izhx/udever-bloom-560m/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/MasakhaNEWSClusteringP2P.json
diff --git a/results/izhx/udever-bloom-560m/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/izhx/udever-bloom-560m/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/MasakhaNEWSClusteringS2S.json
diff --git a/results/izhx/udever-bloom-560m/MassiveIntentClassification.json b/outputs/benchmark_results/izhx/udever-bloom-560m/MassiveIntentClassification.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/MassiveIntentClassification.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/MassiveIntentClassification.json
diff --git a/results/izhx/udever-bloom-560m/MassiveScenarioClassification.json b/outputs/benchmark_results/izhx/udever-bloom-560m/MassiveScenarioClassification.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/MassiveScenarioClassification.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/MassiveScenarioClassification.json
diff --git a/results/izhx/udever-bloom-560m/MintakaRetrieval.json b/outputs/benchmark_results/izhx/udever-bloom-560m/MintakaRetrieval.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/MintakaRetrieval.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/MintakaRetrieval.json
diff --git a/results/izhx/udever-bloom-560m/OpusparcusPC.json b/outputs/benchmark_results/izhx/udever-bloom-560m/OpusparcusPC.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/OpusparcusPC.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/OpusparcusPC.json
diff --git a/results/izhx/udever-bloom-560m/PawsX.json b/outputs/benchmark_results/izhx/udever-bloom-560m/PawsX.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/PawsX.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/PawsX.json
diff --git a/results/izhx/udever-bloom-560m/SICKFr.json b/outputs/benchmark_results/izhx/udever-bloom-560m/SICKFr.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/SICKFr.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/SICKFr.json
diff --git a/results/izhx/udever-bloom-560m/STS22.json b/outputs/benchmark_results/izhx/udever-bloom-560m/STS22.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/STS22.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/STS22.json
diff --git a/results/izhx/udever-bloom-560m/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/izhx/udever-bloom-560m/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/STSBenchmarkMultilingualSTS.json
diff --git a/results/izhx/udever-bloom-560m/SummEvalFr.json b/outputs/benchmark_results/izhx/udever-bloom-560m/SummEvalFr.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/SummEvalFr.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/SummEvalFr.json
diff --git a/results/izhx/udever-bloom-560m/SyntecReranking.json b/outputs/benchmark_results/izhx/udever-bloom-560m/SyntecReranking.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/SyntecReranking.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/SyntecReranking.json
diff --git a/results/izhx/udever-bloom-560m/SyntecRetrieval.json b/outputs/benchmark_results/izhx/udever-bloom-560m/SyntecRetrieval.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/SyntecRetrieval.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/SyntecRetrieval.json
diff --git a/results/izhx/udever-bloom-560m/XPQARetrieval.json b/outputs/benchmark_results/izhx/udever-bloom-560m/XPQARetrieval.json
similarity index 100%
rename from results/izhx/udever-bloom-560m/XPQARetrieval.json
rename to outputs/benchmark_results/izhx/udever-bloom-560m/XPQARetrieval.json
diff --git a/results/laser2/AlloProfClusteringP2P.json b/outputs/benchmark_results/laser2/AlloProfClusteringP2P.json
similarity index 100%
rename from results/laser2/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/laser2/AlloProfClusteringP2P.json
diff --git a/results/laser2/AlloProfClusteringS2S.json b/outputs/benchmark_results/laser2/AlloProfClusteringS2S.json
similarity index 100%
rename from results/laser2/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/laser2/AlloProfClusteringS2S.json
diff --git a/results/laser2/AlloprofReranking.json b/outputs/benchmark_results/laser2/AlloprofReranking.json
similarity index 100%
rename from results/laser2/AlloprofReranking.json
rename to outputs/benchmark_results/laser2/AlloprofReranking.json
diff --git a/results/laser2/AlloprofRetrieval.json b/outputs/benchmark_results/laser2/AlloprofRetrieval.json
similarity index 100%
rename from results/laser2/AlloprofRetrieval.json
rename to outputs/benchmark_results/laser2/AlloprofRetrieval.json
diff --git a/results/laser2/AmazonReviewsClassification.json b/outputs/benchmark_results/laser2/AmazonReviewsClassification.json
similarity index 100%
rename from results/laser2/AmazonReviewsClassification.json
rename to outputs/benchmark_results/laser2/AmazonReviewsClassification.json
diff --git a/results/laser2/BSARDRetrieval.json b/outputs/benchmark_results/laser2/BSARDRetrieval.json
similarity index 100%
rename from results/laser2/BSARDRetrieval.json
rename to outputs/benchmark_results/laser2/BSARDRetrieval.json
diff --git a/results/laser2/DiaBLaBitextMining.json b/outputs/benchmark_results/laser2/DiaBLaBitextMining.json
similarity index 100%
rename from results/laser2/DiaBLaBitextMining.json
rename to outputs/benchmark_results/laser2/DiaBLaBitextMining.json
diff --git a/results/laser2/FloresBitextMining.json b/outputs/benchmark_results/laser2/FloresBitextMining.json
similarity index 100%
rename from results/laser2/FloresBitextMining.json
rename to outputs/benchmark_results/laser2/FloresBitextMining.json
diff --git a/results/laser2/HALClusteringS2S.json b/outputs/benchmark_results/laser2/HALClusteringS2S.json
similarity index 100%
rename from results/laser2/HALClusteringS2S.json
rename to outputs/benchmark_results/laser2/HALClusteringS2S.json
diff --git a/results/laser2/MLSUMClusteringP2P.json b/outputs/benchmark_results/laser2/MLSUMClusteringP2P.json
similarity index 100%
rename from results/laser2/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/laser2/MLSUMClusteringP2P.json
diff --git a/results/laser2/MLSUMClusteringS2S.json b/outputs/benchmark_results/laser2/MLSUMClusteringS2S.json
similarity index 100%
rename from results/laser2/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/laser2/MLSUMClusteringS2S.json
diff --git a/results/laser2/MTOPDomainClassification.json b/outputs/benchmark_results/laser2/MTOPDomainClassification.json
similarity index 100%
rename from results/laser2/MTOPDomainClassification.json
rename to outputs/benchmark_results/laser2/MTOPDomainClassification.json
diff --git a/results/laser2/MTOPIntentClassification.json b/outputs/benchmark_results/laser2/MTOPIntentClassification.json
similarity index 100%
rename from results/laser2/MTOPIntentClassification.json
rename to outputs/benchmark_results/laser2/MTOPIntentClassification.json
diff --git a/results/laser2/MasakhaNEWSClassification.json b/outputs/benchmark_results/laser2/MasakhaNEWSClassification.json
similarity index 100%
rename from results/laser2/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/laser2/MasakhaNEWSClassification.json
diff --git a/results/laser2/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/laser2/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/laser2/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/laser2/MasakhaNEWSClusteringP2P.json
diff --git a/results/laser2/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/laser2/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/laser2/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/laser2/MasakhaNEWSClusteringS2S.json
diff --git a/results/laser2/MassiveIntentClassification.json b/outputs/benchmark_results/laser2/MassiveIntentClassification.json
similarity index 100%
rename from results/laser2/MassiveIntentClassification.json
rename to outputs/benchmark_results/laser2/MassiveIntentClassification.json
diff --git a/results/laser2/MassiveScenarioClassification.json b/outputs/benchmark_results/laser2/MassiveScenarioClassification.json
similarity index 100%
rename from results/laser2/MassiveScenarioClassification.json
rename to outputs/benchmark_results/laser2/MassiveScenarioClassification.json
diff --git a/results/laser2/MintakaRetrieval.json b/outputs/benchmark_results/laser2/MintakaRetrieval.json
similarity index 100%
rename from results/laser2/MintakaRetrieval.json
rename to outputs/benchmark_results/laser2/MintakaRetrieval.json
diff --git a/results/laser2/OpusparcusPC.json b/outputs/benchmark_results/laser2/OpusparcusPC.json
similarity index 100%
rename from results/laser2/OpusparcusPC.json
rename to outputs/benchmark_results/laser2/OpusparcusPC.json
diff --git a/results/laser2/PawsX.json b/outputs/benchmark_results/laser2/PawsX.json
similarity index 100%
rename from results/laser2/PawsX.json
rename to outputs/benchmark_results/laser2/PawsX.json
diff --git a/results/laser2/SICKFr.json b/outputs/benchmark_results/laser2/SICKFr.json
similarity index 100%
rename from results/laser2/SICKFr.json
rename to outputs/benchmark_results/laser2/SICKFr.json
diff --git a/results/laser2/STS22.json b/outputs/benchmark_results/laser2/STS22.json
similarity index 100%
rename from results/laser2/STS22.json
rename to outputs/benchmark_results/laser2/STS22.json
diff --git a/results/laser2/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/laser2/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/laser2/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/laser2/STSBenchmarkMultilingualSTS.json
diff --git a/results/laser2/SummEvalFr.json b/outputs/benchmark_results/laser2/SummEvalFr.json
similarity index 100%
rename from results/laser2/SummEvalFr.json
rename to outputs/benchmark_results/laser2/SummEvalFr.json
diff --git a/results/laser2/SyntecReranking.json b/outputs/benchmark_results/laser2/SyntecReranking.json
similarity index 100%
rename from results/laser2/SyntecReranking.json
rename to outputs/benchmark_results/laser2/SyntecReranking.json
diff --git a/results/laser2/SyntecRetrieval.json b/outputs/benchmark_results/laser2/SyntecRetrieval.json
similarity index 100%
rename from results/laser2/SyntecRetrieval.json
rename to outputs/benchmark_results/laser2/SyntecRetrieval.json
diff --git a/results/laser2/XPQARetrieval.json b/outputs/benchmark_results/laser2/XPQARetrieval.json
similarity index 100%
rename from results/laser2/XPQARetrieval.json
rename to outputs/benchmark_results/laser2/XPQARetrieval.json
diff --git a/results/mistral-embed/AlloProfClusteringP2P.json b/outputs/benchmark_results/mistral-embed/AlloProfClusteringP2P.json
similarity index 100%
rename from results/mistral-embed/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/mistral-embed/AlloProfClusteringP2P.json
diff --git a/results/mistral-embed/AlloProfClusteringS2S.json b/outputs/benchmark_results/mistral-embed/AlloProfClusteringS2S.json
similarity index 100%
rename from results/mistral-embed/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/mistral-embed/AlloProfClusteringS2S.json
diff --git a/results/mistral-embed/AlloprofReranking.json b/outputs/benchmark_results/mistral-embed/AlloprofReranking.json
similarity index 100%
rename from results/mistral-embed/AlloprofReranking.json
rename to outputs/benchmark_results/mistral-embed/AlloprofReranking.json
diff --git a/results/mistral-embed/AlloprofRetrieval.json b/outputs/benchmark_results/mistral-embed/AlloprofRetrieval.json
similarity index 100%
rename from results/mistral-embed/AlloprofRetrieval.json
rename to outputs/benchmark_results/mistral-embed/AlloprofRetrieval.json
diff --git a/results/mistral-embed/AmazonReviewsClassification.json b/outputs/benchmark_results/mistral-embed/AmazonReviewsClassification.json
similarity index 100%
rename from results/mistral-embed/AmazonReviewsClassification.json
rename to outputs/benchmark_results/mistral-embed/AmazonReviewsClassification.json
diff --git a/results/mistral-embed/BSARDRetrieval.json b/outputs/benchmark_results/mistral-embed/BSARDRetrieval.json
similarity index 100%
rename from results/mistral-embed/BSARDRetrieval.json
rename to outputs/benchmark_results/mistral-embed/BSARDRetrieval.json
diff --git a/results/mistral-embed/HALClusteringS2S.json b/outputs/benchmark_results/mistral-embed/HALClusteringS2S.json
similarity index 100%
rename from results/mistral-embed/HALClusteringS2S.json
rename to outputs/benchmark_results/mistral-embed/HALClusteringS2S.json
diff --git a/results/mistral-embed/MLSUMClusteringP2P.json b/outputs/benchmark_results/mistral-embed/MLSUMClusteringP2P.json
similarity index 100%
rename from results/mistral-embed/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/mistral-embed/MLSUMClusteringP2P.json
diff --git a/results/mistral-embed/MLSUMClusteringS2S.json b/outputs/benchmark_results/mistral-embed/MLSUMClusteringS2S.json
similarity index 100%
rename from results/mistral-embed/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/mistral-embed/MLSUMClusteringS2S.json
diff --git a/results/mistral-embed/MTOPDomainClassification.json b/outputs/benchmark_results/mistral-embed/MTOPDomainClassification.json
similarity index 100%
rename from results/mistral-embed/MTOPDomainClassification.json
rename to outputs/benchmark_results/mistral-embed/MTOPDomainClassification.json
diff --git a/results/mistral-embed/MTOPIntentClassification.json b/outputs/benchmark_results/mistral-embed/MTOPIntentClassification.json
similarity index 100%
rename from results/mistral-embed/MTOPIntentClassification.json
rename to outputs/benchmark_results/mistral-embed/MTOPIntentClassification.json
diff --git a/results/mistral-embed/MasakhaNEWSClassification.json b/outputs/benchmark_results/mistral-embed/MasakhaNEWSClassification.json
similarity index 100%
rename from results/mistral-embed/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/mistral-embed/MasakhaNEWSClassification.json
diff --git a/results/mistral-embed/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/mistral-embed/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/mistral-embed/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/mistral-embed/MasakhaNEWSClusteringP2P.json
diff --git a/results/mistral-embed/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/mistral-embed/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/mistral-embed/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/mistral-embed/MasakhaNEWSClusteringS2S.json
diff --git a/results/mistral-embed/MassiveIntentClassification.json b/outputs/benchmark_results/mistral-embed/MassiveIntentClassification.json
similarity index 100%
rename from results/mistral-embed/MassiveIntentClassification.json
rename to outputs/benchmark_results/mistral-embed/MassiveIntentClassification.json
diff --git a/results/mistral-embed/MassiveScenarioClassification.json b/outputs/benchmark_results/mistral-embed/MassiveScenarioClassification.json
similarity index 100%
rename from results/mistral-embed/MassiveScenarioClassification.json
rename to outputs/benchmark_results/mistral-embed/MassiveScenarioClassification.json
diff --git a/results/mistral-embed/MintakaRetrieval.json b/outputs/benchmark_results/mistral-embed/MintakaRetrieval.json
similarity index 100%
rename from results/mistral-embed/MintakaRetrieval.json
rename to outputs/benchmark_results/mistral-embed/MintakaRetrieval.json
diff --git a/results/mistral-embed/OpusparcusPC.json b/outputs/benchmark_results/mistral-embed/OpusparcusPC.json
similarity index 100%
rename from results/mistral-embed/OpusparcusPC.json
rename to outputs/benchmark_results/mistral-embed/OpusparcusPC.json
diff --git a/results/mistral-embed/PawsX.json b/outputs/benchmark_results/mistral-embed/PawsX.json
similarity index 100%
rename from results/mistral-embed/PawsX.json
rename to outputs/benchmark_results/mistral-embed/PawsX.json
diff --git a/results/mistral-embed/SICKFr.json b/outputs/benchmark_results/mistral-embed/SICKFr.json
similarity index 100%
rename from results/mistral-embed/SICKFr.json
rename to outputs/benchmark_results/mistral-embed/SICKFr.json
diff --git a/results/mistral-embed/STS22.json b/outputs/benchmark_results/mistral-embed/STS22.json
similarity index 100%
rename from results/mistral-embed/STS22.json
rename to outputs/benchmark_results/mistral-embed/STS22.json
diff --git a/results/mistral-embed/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/mistral-embed/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/mistral-embed/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/mistral-embed/STSBenchmarkMultilingualSTS.json
diff --git a/results/mistral-embed/SummEvalFr.json b/outputs/benchmark_results/mistral-embed/SummEvalFr.json
similarity index 100%
rename from results/mistral-embed/SummEvalFr.json
rename to outputs/benchmark_results/mistral-embed/SummEvalFr.json
diff --git a/results/mistral-embed/SyntecReranking.json b/outputs/benchmark_results/mistral-embed/SyntecReranking.json
similarity index 100%
rename from results/mistral-embed/SyntecReranking.json
rename to outputs/benchmark_results/mistral-embed/SyntecReranking.json
diff --git a/results/mistral-embed/SyntecRetrieval.json b/outputs/benchmark_results/mistral-embed/SyntecRetrieval.json
similarity index 100%
rename from results/mistral-embed/SyntecRetrieval.json
rename to outputs/benchmark_results/mistral-embed/SyntecRetrieval.json
diff --git a/results/mistral-embed/XPQARetrieval.json b/outputs/benchmark_results/mistral-embed/XPQARetrieval.json
similarity index 100%
rename from results/mistral-embed/XPQARetrieval.json
rename to outputs/benchmark_results/mistral-embed/XPQARetrieval.json
diff --git a/results/sentence-transformers/LaBSE/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/LaBSE/AlloProfClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/AlloProfClusteringP2P.json
diff --git a/results/sentence-transformers/LaBSE/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/LaBSE/AlloProfClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/AlloProfClusteringS2S.json
diff --git a/results/sentence-transformers/LaBSE/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/LaBSE/AlloprofReranking.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/AlloprofReranking.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/AlloprofReranking.json
diff --git a/results/sentence-transformers/LaBSE/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/LaBSE/AlloprofRetrieval.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/AlloprofRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/AlloprofRetrieval.json
diff --git a/results/sentence-transformers/LaBSE/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/LaBSE/AmazonReviewsClassification.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/AmazonReviewsClassification.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/AmazonReviewsClassification.json
diff --git a/results/sentence-transformers/LaBSE/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/LaBSE/BSARDRetrieval.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/BSARDRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/BSARDRetrieval.json
diff --git a/results/sentence-transformers/LaBSE/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/LaBSE/DiaBLaBitextMining.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/DiaBLaBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/DiaBLaBitextMining.json
diff --git a/results/sentence-transformers/LaBSE/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/LaBSE/FloresBitextMining.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/FloresBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/FloresBitextMining.json
diff --git a/results/sentence-transformers/LaBSE/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/LaBSE/HALClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/HALClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/HALClusteringS2S.json
diff --git a/results/sentence-transformers/LaBSE/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/LaBSE/MLSUMClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/MLSUMClusteringP2P.json
diff --git a/results/sentence-transformers/LaBSE/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/LaBSE/MLSUMClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/MLSUMClusteringS2S.json
diff --git a/results/sentence-transformers/LaBSE/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/LaBSE/MTOPDomainClassification.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/MTOPDomainClassification.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/MTOPDomainClassification.json
diff --git a/results/sentence-transformers/LaBSE/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/LaBSE/MTOPIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/MTOPIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/MTOPIntentClassification.json
diff --git a/results/sentence-transformers/LaBSE/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/LaBSE/MasakhaNEWSClassification.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/MasakhaNEWSClassification.json
diff --git a/results/sentence-transformers/LaBSE/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/LaBSE/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/MasakhaNEWSClusteringP2P.json
diff --git a/results/sentence-transformers/LaBSE/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/LaBSE/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/MasakhaNEWSClusteringS2S.json
diff --git a/results/sentence-transformers/LaBSE/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/LaBSE/MassiveIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/MassiveIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/MassiveIntentClassification.json
diff --git a/results/sentence-transformers/LaBSE/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/LaBSE/MassiveScenarioClassification.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/MassiveScenarioClassification.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/MassiveScenarioClassification.json
diff --git a/results/sentence-transformers/LaBSE/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/LaBSE/MintakaRetrieval.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/MintakaRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/MintakaRetrieval.json
diff --git a/results/sentence-transformers/LaBSE/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/LaBSE/OpusparcusPC.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/OpusparcusPC.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/OpusparcusPC.json
diff --git a/results/sentence-transformers/LaBSE/PawsX.json b/outputs/benchmark_results/sentence-transformers/LaBSE/PawsX.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/PawsX.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/PawsX.json
diff --git a/results/sentence-transformers/LaBSE/SICKFr.json b/outputs/benchmark_results/sentence-transformers/LaBSE/SICKFr.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/SICKFr.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/SICKFr.json
diff --git a/results/sentence-transformers/LaBSE/STS22.json b/outputs/benchmark_results/sentence-transformers/LaBSE/STS22.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/STS22.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/STS22.json
diff --git a/results/sentence-transformers/LaBSE/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/LaBSE/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/STSBenchmarkMultilingualSTS.json
diff --git a/results/sentence-transformers/LaBSE/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/LaBSE/SummEvalFr.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/SummEvalFr.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/SummEvalFr.json
diff --git a/results/sentence-transformers/LaBSE/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/LaBSE/SyntecReranking.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/SyntecReranking.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/SyntecReranking.json
diff --git a/results/sentence-transformers/LaBSE/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/LaBSE/SyntecRetrieval.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/SyntecRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/SyntecRetrieval.json
diff --git a/results/sentence-transformers/LaBSE/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/LaBSE/XPQARetrieval.json
similarity index 100%
rename from results/sentence-transformers/LaBSE/XPQARetrieval.json
rename to outputs/benchmark_results/sentence-transformers/LaBSE/XPQARetrieval.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/AlloProfClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/AlloProfClusteringP2P.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/AlloProfClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/AlloProfClusteringS2S.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/AlloprofReranking.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/AlloprofReranking.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/AlloprofReranking.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/AlloprofRetrieval.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/AlloprofRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/AlloprofRetrieval.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/AmazonReviewsClassification.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/AmazonReviewsClassification.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/AmazonReviewsClassification.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/BSARDRetrieval.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/BSARDRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/BSARDRetrieval.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/DiaBLaBitextMining.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/DiaBLaBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/DiaBLaBitextMining.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/FloresBitextMining.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/FloresBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/FloresBitextMining.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/HALClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/HALClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/HALClusteringS2S.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MLSUMClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MLSUMClusteringP2P.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MLSUMClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MLSUMClusteringS2S.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MTOPDomainClassification.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/MTOPDomainClassification.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MTOPDomainClassification.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MTOPIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/MTOPIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MTOPIntentClassification.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClassification.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClassification.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClusteringP2P.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClusteringS2S.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MassiveIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/MassiveIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MassiveIntentClassification.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MassiveScenarioClassification.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/MassiveScenarioClassification.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MassiveScenarioClassification.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MintakaRetrieval.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/MintakaRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MintakaRetrieval.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/OpusparcusPC.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/OpusparcusPC.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/OpusparcusPC.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/PawsX.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/PawsX.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/PawsX.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/PawsX.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/SICKFr.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/SICKFr.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/SICKFr.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/SICKFr.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/STS22.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/STS22.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/STS22.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/STS22.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/STSBenchmarkMultilingualSTS.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/SummEvalFr.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/SummEvalFr.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/SummEvalFr.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/SyntecReranking.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/SyntecReranking.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/SyntecReranking.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/SyntecRetrieval.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/SyntecRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/SyntecRetrieval.json
diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/XPQARetrieval.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L12-v2/XPQARetrieval.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/XPQARetrieval.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/AlloProfClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/AlloProfClusteringP2P.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/AlloProfClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/AlloProfClusteringS2S.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/AlloprofReranking.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/AlloprofReranking.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/AlloprofReranking.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/AlloprofRetrieval.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/AlloprofRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/AlloprofRetrieval.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/AmazonReviewsClassification.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/AmazonReviewsClassification.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/AmazonReviewsClassification.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/BSARDRetrieval.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/BSARDRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/BSARDRetrieval.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/DiaBLaBitextMining.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/DiaBLaBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/DiaBLaBitextMining.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/FloresBitextMining.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/FloresBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/FloresBitextMining.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/HALClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/HALClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/HALClusteringS2S.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MLSUMClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MLSUMClusteringP2P.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MLSUMClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MLSUMClusteringS2S.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MTOPDomainClassification.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/MTOPDomainClassification.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MTOPDomainClassification.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MTOPIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/MTOPIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MTOPIntentClassification.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClassification.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClassification.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClusteringP2P.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClusteringS2S.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MassiveIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/MassiveIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MassiveIntentClassification.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MassiveScenarioClassification.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/MassiveScenarioClassification.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MassiveScenarioClassification.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MintakaRetrieval.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/MintakaRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MintakaRetrieval.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/OpusparcusPC.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/OpusparcusPC.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/OpusparcusPC.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/PawsX.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/PawsX.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/PawsX.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/PawsX.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/SICKFr.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/SICKFr.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/SICKFr.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/SICKFr.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/STS22.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/STS22.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/STS22.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/STS22.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/STSBenchmarkMultilingualSTS.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/SummEvalFr.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/SummEvalFr.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/SummEvalFr.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/SyntecReranking.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/SyntecReranking.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/SyntecReranking.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/SyntecRetrieval.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/SyntecRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/SyntecRetrieval.json
diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/XPQARetrieval.json
similarity index 100%
rename from results/sentence-transformers/all-MiniLM-L6-v2/XPQARetrieval.json
rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/XPQARetrieval.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloProfClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloProfClusteringP2P.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloProfClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloProfClusteringS2S.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloprofReranking.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloprofReranking.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloprofReranking.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloprofRetrieval.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloprofRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloprofRetrieval.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/AmazonReviewsClassification.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/AmazonReviewsClassification.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/AmazonReviewsClassification.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/BSARDRetrieval.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/BSARDRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/BSARDRetrieval.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/DiaBLaBitextMining.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/DiaBLaBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/DiaBLaBitextMining.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/FloresBitextMining.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/FloresBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/FloresBitextMining.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/HALClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/HALClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/HALClusteringS2S.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MLSUMClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MLSUMClusteringP2P.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MLSUMClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MLSUMClusteringS2S.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MTOPDomainClassification.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/MTOPDomainClassification.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MTOPDomainClassification.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MTOPIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/MTOPIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MTOPIntentClassification.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClassification.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClassification.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClusteringP2P.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClusteringS2S.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MassiveIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/MassiveIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MassiveIntentClassification.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MassiveScenarioClassification.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/MassiveScenarioClassification.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MassiveScenarioClassification.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MintakaRetrieval.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/MintakaRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MintakaRetrieval.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/OpusparcusPC.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/OpusparcusPC.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/OpusparcusPC.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/PawsX.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/PawsX.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/PawsX.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/PawsX.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/SICKFr.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/SICKFr.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/SICKFr.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/SICKFr.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/STS22.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/STS22.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/STS22.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/STS22.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/STSBenchmarkMultilingualSTS.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/SummEvalFr.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/SummEvalFr.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/SummEvalFr.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/SyntecReranking.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/SyntecReranking.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/SyntecReranking.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/SyntecRetrieval.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/SyntecRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/SyntecRetrieval.json
diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/XPQARetrieval.json
similarity index 100%
rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/XPQARetrieval.json
rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/XPQARetrieval.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloProfClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloProfClusteringP2P.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloProfClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloProfClusteringS2S.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloprofReranking.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloprofReranking.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloprofReranking.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloprofRetrieval.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloprofRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloprofRetrieval.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AmazonReviewsClassification.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AmazonReviewsClassification.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AmazonReviewsClassification.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/BSARDRetrieval.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/BSARDRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/BSARDRetrieval.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/DiaBLaBitextMining.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/DiaBLaBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/DiaBLaBitextMining.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/FloresBitextMining.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/FloresBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/FloresBitextMining.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/HALClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/HALClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/HALClusteringS2S.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MLSUMClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MLSUMClusteringP2P.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MLSUMClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MLSUMClusteringS2S.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MTOPDomainClassification.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MTOPDomainClassification.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MTOPDomainClassification.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MTOPIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MTOPIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MTOPIntentClassification.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClassification.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClassification.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClusteringP2P.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClusteringS2S.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MassiveIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MassiveIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MassiveIntentClassification.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MassiveScenarioClassification.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MassiveScenarioClassification.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MassiveScenarioClassification.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MintakaRetrieval.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MintakaRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MintakaRetrieval.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/OpusparcusPC.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/OpusparcusPC.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/OpusparcusPC.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/PawsX.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/PawsX.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/PawsX.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/PawsX.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SICKFr.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SICKFr.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SICKFr.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SICKFr.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/STS22.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/STS22.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/STS22.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/STS22.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/STSBenchmarkMultilingualSTS.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SummEvalFr.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SummEvalFr.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SummEvalFr.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SyntecReranking.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SyntecReranking.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SyntecReranking.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SyntecRetrieval.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SyntecRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SyntecRetrieval.json
diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/XPQARetrieval.json
similarity index 100%
rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/XPQARetrieval.json
rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/XPQARetrieval.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloProfClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloProfClusteringP2P.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloProfClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloProfClusteringS2S.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloprofReranking.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloprofReranking.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloprofReranking.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloprofRetrieval.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloprofRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloprofRetrieval.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AmazonReviewsClassification.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AmazonReviewsClassification.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AmazonReviewsClassification.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/BSARDRetrieval.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/BSARDRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/BSARDRetrieval.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/DiaBLaBitextMining.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/DiaBLaBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/DiaBLaBitextMining.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/FloresBitextMining.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/FloresBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/FloresBitextMining.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/HALClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/HALClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/HALClusteringS2S.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MLSUMClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MLSUMClusteringP2P.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MLSUMClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MLSUMClusteringS2S.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MTOPDomainClassification.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MTOPDomainClassification.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MTOPDomainClassification.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MTOPIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MTOPIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MTOPIntentClassification.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClassification.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClassification.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClusteringP2P.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClusteringS2S.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MassiveIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MassiveIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MassiveIntentClassification.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MassiveScenarioClassification.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MassiveScenarioClassification.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MassiveScenarioClassification.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MintakaRetrieval.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MintakaRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MintakaRetrieval.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/OpusparcusPC.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/OpusparcusPC.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/OpusparcusPC.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/PawsX.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/PawsX.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/PawsX.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/PawsX.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SICKFr.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SICKFr.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SICKFr.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SICKFr.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/STS22.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/STS22.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/STS22.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/STS22.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/STSBenchmarkMultilingualSTS.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SummEvalFr.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SummEvalFr.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SummEvalFr.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SyntecReranking.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SyntecReranking.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SyntecReranking.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SyntecRetrieval.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SyntecRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SyntecRetrieval.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/XPQARetrieval.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/XPQARetrieval.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/XPQARetrieval.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloProfClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloProfClusteringP2P.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloProfClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloProfClusteringS2S.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloprofReranking.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloprofReranking.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloprofReranking.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloprofRetrieval.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloprofRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloprofRetrieval.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AmazonReviewsClassification.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AmazonReviewsClassification.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AmazonReviewsClassification.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/BSARDRetrieval.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/BSARDRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/BSARDRetrieval.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/DiaBLaBitextMining.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/DiaBLaBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/DiaBLaBitextMining.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/FloresBitextMining.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/FloresBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/FloresBitextMining.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/HALClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/HALClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/HALClusteringS2S.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MLSUMClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MLSUMClusteringP2P.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MLSUMClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MLSUMClusteringS2S.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPDomainClassification.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPDomainClassification.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPDomainClassification.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPIntentClassification.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClassification.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClassification.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClusteringP2P.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClusteringS2S.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveIntentClassification.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveScenarioClassification.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveScenarioClassification.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveScenarioClassification.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MintakaRetrieval.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MintakaRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MintakaRetrieval.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/OpusparcusPC.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/OpusparcusPC.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/OpusparcusPC.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/PawsX.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/PawsX.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/PawsX.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/PawsX.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SICKFr.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SICKFr.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SICKFr.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SICKFr.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STS22.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STS22.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STS22.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STS22.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STSBenchmarkMultilingualSTS.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SummEvalFr.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SummEvalFr.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SummEvalFr.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SyntecReranking.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SyntecReranking.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SyntecReranking.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SyntecRetrieval.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SyntecRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SyntecRetrieval.json
diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/XPQARetrieval.json
similarity index 100%
rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/XPQARetrieval.json
rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/XPQARetrieval.json
diff --git a/results/sentence-transformers/sentence-t5-base/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/AlloProfClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/AlloProfClusteringP2P.json
diff --git a/results/sentence-transformers/sentence-t5-base/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/AlloProfClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/AlloProfClusteringS2S.json
diff --git a/results/sentence-transformers/sentence-t5-base/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/AlloprofReranking.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/AlloprofReranking.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/AlloprofReranking.json
diff --git a/results/sentence-transformers/sentence-t5-base/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/AlloprofRetrieval.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/AlloprofRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/AlloprofRetrieval.json
diff --git a/results/sentence-transformers/sentence-t5-base/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/AmazonReviewsClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/AmazonReviewsClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/AmazonReviewsClassification.json
diff --git a/results/sentence-transformers/sentence-t5-base/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/BSARDRetrieval.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/BSARDRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/BSARDRetrieval.json
diff --git a/results/sentence-transformers/sentence-t5-base/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/DiaBLaBitextMining.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/DiaBLaBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/DiaBLaBitextMining.json
diff --git a/results/sentence-transformers/sentence-t5-base/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/FloresBitextMining.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/FloresBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/FloresBitextMining.json
diff --git a/results/sentence-transformers/sentence-t5-base/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/HALClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/HALClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/HALClusteringS2S.json
diff --git a/results/sentence-transformers/sentence-t5-base/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/MLSUMClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/MLSUMClusteringP2P.json
diff --git a/results/sentence-transformers/sentence-t5-base/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/MLSUMClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/MLSUMClusteringS2S.json
diff --git a/results/sentence-transformers/sentence-t5-base/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/MTOPDomainClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/MTOPDomainClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/MTOPDomainClassification.json
diff --git a/results/sentence-transformers/sentence-t5-base/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/MTOPIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/MTOPIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/MTOPIntentClassification.json
diff --git a/results/sentence-transformers/sentence-t5-base/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/MasakhaNEWSClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/MasakhaNEWSClassification.json
diff --git a/results/sentence-transformers/sentence-t5-base/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/MasakhaNEWSClusteringP2P.json
diff --git a/results/sentence-transformers/sentence-t5-base/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/MasakhaNEWSClusteringS2S.json
diff --git a/results/sentence-transformers/sentence-t5-base/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/MassiveIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/MassiveIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/MassiveIntentClassification.json
diff --git a/results/sentence-transformers/sentence-t5-base/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/MassiveScenarioClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/MassiveScenarioClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/MassiveScenarioClassification.json
diff --git a/results/sentence-transformers/sentence-t5-base/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/MintakaRetrieval.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/MintakaRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/MintakaRetrieval.json
diff --git a/results/sentence-transformers/sentence-t5-base/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/OpusparcusPC.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/OpusparcusPC.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/OpusparcusPC.json
diff --git a/results/sentence-transformers/sentence-t5-base/PawsX.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/PawsX.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/PawsX.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/PawsX.json
diff --git a/results/sentence-transformers/sentence-t5-base/SICKFr.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/SICKFr.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/SICKFr.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/SICKFr.json
diff --git a/results/sentence-transformers/sentence-t5-base/STS22.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/STS22.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/STS22.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/STS22.json
diff --git a/results/sentence-transformers/sentence-t5-base/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/STSBenchmarkMultilingualSTS.json
diff --git a/results/sentence-transformers/sentence-t5-base/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/SummEvalFr.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/SummEvalFr.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/SummEvalFr.json
diff --git a/results/sentence-transformers/sentence-t5-base/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/SyntecReranking.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/SyntecReranking.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/SyntecReranking.json
diff --git a/results/sentence-transformers/sentence-t5-base/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/SyntecRetrieval.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/SyntecRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/SyntecRetrieval.json
diff --git a/results/sentence-transformers/sentence-t5-base/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/XPQARetrieval.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-base/XPQARetrieval.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/XPQARetrieval.json
diff --git a/results/sentence-transformers/sentence-t5-large/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/AlloProfClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/AlloProfClusteringP2P.json
diff --git a/results/sentence-transformers/sentence-t5-large/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/AlloProfClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/AlloProfClusteringS2S.json
diff --git a/results/sentence-transformers/sentence-t5-large/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/AlloprofReranking.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/AlloprofReranking.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/AlloprofReranking.json
diff --git a/results/sentence-transformers/sentence-t5-large/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/AlloprofRetrieval.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/AlloprofRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/AlloprofRetrieval.json
diff --git a/results/sentence-transformers/sentence-t5-large/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/AmazonReviewsClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/AmazonReviewsClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/AmazonReviewsClassification.json
diff --git a/results/sentence-transformers/sentence-t5-large/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/BSARDRetrieval.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/BSARDRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/BSARDRetrieval.json
diff --git a/results/sentence-transformers/sentence-t5-large/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/DiaBLaBitextMining.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/DiaBLaBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/DiaBLaBitextMining.json
diff --git a/results/sentence-transformers/sentence-t5-large/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/FloresBitextMining.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/FloresBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/FloresBitextMining.json
diff --git a/results/sentence-transformers/sentence-t5-large/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/HALClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/HALClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/HALClusteringS2S.json
diff --git a/results/sentence-transformers/sentence-t5-large/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/MLSUMClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/MLSUMClusteringP2P.json
diff --git a/results/sentence-transformers/sentence-t5-large/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/MLSUMClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/MLSUMClusteringS2S.json
diff --git a/results/sentence-transformers/sentence-t5-large/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/MTOPDomainClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/MTOPDomainClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/MTOPDomainClassification.json
diff --git a/results/sentence-transformers/sentence-t5-large/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/MTOPIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/MTOPIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/MTOPIntentClassification.json
diff --git a/results/sentence-transformers/sentence-t5-large/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/MasakhaNEWSClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/MasakhaNEWSClassification.json
diff --git a/results/sentence-transformers/sentence-t5-large/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/MasakhaNEWSClusteringP2P.json
diff --git a/results/sentence-transformers/sentence-t5-large/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/MasakhaNEWSClusteringS2S.json
diff --git a/results/sentence-transformers/sentence-t5-large/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/MassiveIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/MassiveIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/MassiveIntentClassification.json
diff --git a/results/sentence-transformers/sentence-t5-large/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/MassiveScenarioClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/MassiveScenarioClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/MassiveScenarioClassification.json
diff --git a/results/sentence-transformers/sentence-t5-large/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/MintakaRetrieval.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/MintakaRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/MintakaRetrieval.json
diff --git a/results/sentence-transformers/sentence-t5-large/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/OpusparcusPC.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/OpusparcusPC.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/OpusparcusPC.json
diff --git a/results/sentence-transformers/sentence-t5-large/PawsX.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/PawsX.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/PawsX.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/PawsX.json
diff --git a/results/sentence-transformers/sentence-t5-large/SICKFr.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/SICKFr.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/SICKFr.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/SICKFr.json
diff --git a/results/sentence-transformers/sentence-t5-large/STS22.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/STS22.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/STS22.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/STS22.json
diff --git a/results/sentence-transformers/sentence-t5-large/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/STSBenchmarkMultilingualSTS.json
diff --git a/results/sentence-transformers/sentence-t5-large/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/SummEvalFr.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/SummEvalFr.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/SummEvalFr.json
diff --git a/results/sentence-transformers/sentence-t5-large/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/SyntecReranking.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/SyntecReranking.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/SyntecReranking.json
diff --git a/results/sentence-transformers/sentence-t5-large/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/SyntecRetrieval.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/SyntecRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/SyntecRetrieval.json
diff --git a/results/sentence-transformers/sentence-t5-large/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/XPQARetrieval.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-large/XPQARetrieval.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/XPQARetrieval.json
diff --git a/results/sentence-transformers/sentence-t5-xl/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/AlloProfClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/AlloProfClusteringP2P.json
diff --git a/results/sentence-transformers/sentence-t5-xl/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/AlloProfClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/AlloProfClusteringS2S.json
diff --git a/results/sentence-transformers/sentence-t5-xl/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/AlloprofReranking.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/AlloprofReranking.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/AlloprofReranking.json
diff --git a/results/sentence-transformers/sentence-t5-xl/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/AlloprofRetrieval.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/AlloprofRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/AlloprofRetrieval.json
diff --git a/results/sentence-transformers/sentence-t5-xl/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/AmazonReviewsClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/AmazonReviewsClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/AmazonReviewsClassification.json
diff --git a/results/sentence-transformers/sentence-t5-xl/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/BSARDRetrieval.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/BSARDRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/BSARDRetrieval.json
diff --git a/results/sentence-transformers/sentence-t5-xl/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/DiaBLaBitextMining.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/DiaBLaBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/DiaBLaBitextMining.json
diff --git a/results/sentence-transformers/sentence-t5-xl/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/FloresBitextMining.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/FloresBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/FloresBitextMining.json
diff --git a/results/sentence-transformers/sentence-t5-xl/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/HALClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/HALClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/HALClusteringS2S.json
diff --git a/results/sentence-transformers/sentence-t5-xl/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MLSUMClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MLSUMClusteringP2P.json
diff --git a/results/sentence-transformers/sentence-t5-xl/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MLSUMClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MLSUMClusteringS2S.json
diff --git a/results/sentence-transformers/sentence-t5-xl/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MTOPDomainClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/MTOPDomainClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MTOPDomainClassification.json
diff --git a/results/sentence-transformers/sentence-t5-xl/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MTOPIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/MTOPIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MTOPIntentClassification.json
diff --git a/results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClassification.json
diff --git a/results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClusteringP2P.json
diff --git a/results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClusteringS2S.json
diff --git a/results/sentence-transformers/sentence-t5-xl/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MassiveIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/MassiveIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MassiveIntentClassification.json
diff --git a/results/sentence-transformers/sentence-t5-xl/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MassiveScenarioClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/MassiveScenarioClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MassiveScenarioClassification.json
diff --git a/results/sentence-transformers/sentence-t5-xl/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MintakaRetrieval.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/MintakaRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MintakaRetrieval.json
diff --git a/results/sentence-transformers/sentence-t5-xl/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/OpusparcusPC.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/OpusparcusPC.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/OpusparcusPC.json
diff --git a/results/sentence-transformers/sentence-t5-xl/PawsX.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/PawsX.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/PawsX.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/PawsX.json
diff --git a/results/sentence-transformers/sentence-t5-xl/SICKFr.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/SICKFr.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/SICKFr.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/SICKFr.json
diff --git a/results/sentence-transformers/sentence-t5-xl/STS22.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/STS22.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/STS22.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/STS22.json
diff --git a/results/sentence-transformers/sentence-t5-xl/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/STSBenchmarkMultilingualSTS.json
diff --git a/results/sentence-transformers/sentence-t5-xl/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/SummEvalFr.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/SummEvalFr.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/SummEvalFr.json
diff --git a/results/sentence-transformers/sentence-t5-xl/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/SyntecReranking.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/SyntecReranking.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/SyntecReranking.json
diff --git a/results/sentence-transformers/sentence-t5-xl/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/SyntecRetrieval.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/SyntecRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/SyntecRetrieval.json
diff --git a/results/sentence-transformers/sentence-t5-xl/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/XPQARetrieval.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xl/XPQARetrieval.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/XPQARetrieval.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/AlloProfClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/AlloProfClusteringP2P.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/AlloProfClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/AlloProfClusteringS2S.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/AlloprofReranking.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/AlloprofReranking.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/AlloprofReranking.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/AlloprofRetrieval.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/AlloprofRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/AlloprofRetrieval.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/AmazonReviewsClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/AmazonReviewsClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/AmazonReviewsClassification.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/BSARDRetrieval.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/BSARDRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/BSARDRetrieval.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/DiaBLaBitextMining.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/DiaBLaBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/DiaBLaBitextMining.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/FloresBitextMining.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/FloresBitextMining.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/FloresBitextMining.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/HALClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/HALClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/HALClusteringS2S.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MLSUMClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MLSUMClusteringP2P.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MLSUMClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MLSUMClusteringS2S.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MTOPDomainClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/MTOPDomainClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MTOPDomainClassification.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MTOPIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/MTOPIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MTOPIntentClassification.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClassification.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClusteringP2P.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClusteringS2S.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MassiveIntentClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/MassiveIntentClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MassiveIntentClassification.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MassiveScenarioClassification.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/MassiveScenarioClassification.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MassiveScenarioClassification.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MintakaRetrieval.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/MintakaRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MintakaRetrieval.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/OpusparcusPC.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/OpusparcusPC.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/OpusparcusPC.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/PawsX.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/PawsX.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/PawsX.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/PawsX.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/SICKFr.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/SICKFr.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/SICKFr.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/SICKFr.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/STS22.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/STS22.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/STS22.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/STS22.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/STSBenchmarkMultilingualSTS.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/SummEvalFr.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/SummEvalFr.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/SummEvalFr.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/SyntecReranking.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/SyntecReranking.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/SyntecReranking.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/SyntecRetrieval.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/SyntecRetrieval.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/SyntecRetrieval.json
diff --git a/results/sentence-transformers/sentence-t5-xxl/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/XPQARetrieval.json
similarity index 100%
rename from results/sentence-transformers/sentence-t5-xxl/XPQARetrieval.json
rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/XPQARetrieval.json
diff --git a/results/shibing624/text2vec-base-multilingual/AlloProfClusteringP2P.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/AlloProfClusteringP2P.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/AlloProfClusteringP2P.json
diff --git a/results/shibing624/text2vec-base-multilingual/AlloProfClusteringS2S.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/AlloProfClusteringS2S.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/AlloProfClusteringS2S.json
diff --git a/results/shibing624/text2vec-base-multilingual/AlloprofReranking.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/AlloprofReranking.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/AlloprofReranking.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/AlloprofReranking.json
diff --git a/results/shibing624/text2vec-base-multilingual/AlloprofRetrieval.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/AlloprofRetrieval.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/AlloprofRetrieval.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/AlloprofRetrieval.json
diff --git a/results/shibing624/text2vec-base-multilingual/AmazonReviewsClassification.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/AmazonReviewsClassification.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/AmazonReviewsClassification.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/AmazonReviewsClassification.json
diff --git a/results/shibing624/text2vec-base-multilingual/BSARDRetrieval.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/BSARDRetrieval.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/BSARDRetrieval.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/BSARDRetrieval.json
diff --git a/results/shibing624/text2vec-base-multilingual/DiaBLaBitextMining.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/DiaBLaBitextMining.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/DiaBLaBitextMining.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/DiaBLaBitextMining.json
diff --git a/results/shibing624/text2vec-base-multilingual/FloresBitextMining.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/FloresBitextMining.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/FloresBitextMining.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/FloresBitextMining.json
diff --git a/results/shibing624/text2vec-base-multilingual/HALClusteringS2S.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/HALClusteringS2S.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/HALClusteringS2S.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/HALClusteringS2S.json
diff --git a/results/shibing624/text2vec-base-multilingual/MLSUMClusteringP2P.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/MLSUMClusteringP2P.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/MLSUMClusteringP2P.json
diff --git a/results/shibing624/text2vec-base-multilingual/MLSUMClusteringS2S.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/MLSUMClusteringS2S.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/MLSUMClusteringS2S.json
diff --git a/results/shibing624/text2vec-base-multilingual/MTOPDomainClassification.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/MTOPDomainClassification.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/MTOPDomainClassification.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/MTOPDomainClassification.json
diff --git a/results/shibing624/text2vec-base-multilingual/MTOPIntentClassification.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/MTOPIntentClassification.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/MTOPIntentClassification.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/MTOPIntentClassification.json
diff --git a/results/shibing624/text2vec-base-multilingual/MasakhaNEWSClassification.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/MasakhaNEWSClassification.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/MasakhaNEWSClassification.json
diff --git a/results/shibing624/text2vec-base-multilingual/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/MasakhaNEWSClusteringP2P.json
diff --git a/results/shibing624/text2vec-base-multilingual/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/MasakhaNEWSClusteringS2S.json
diff --git a/results/shibing624/text2vec-base-multilingual/MassiveIntentClassification.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/MassiveIntentClassification.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/MassiveIntentClassification.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/MassiveIntentClassification.json
diff --git a/results/shibing624/text2vec-base-multilingual/MassiveScenarioClassification.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/MassiveScenarioClassification.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/MassiveScenarioClassification.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/MassiveScenarioClassification.json
diff --git a/results/shibing624/text2vec-base-multilingual/MintakaRetrieval.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/MintakaRetrieval.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/MintakaRetrieval.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/MintakaRetrieval.json
diff --git a/results/shibing624/text2vec-base-multilingual/OpusparcusPC.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/OpusparcusPC.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/OpusparcusPC.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/OpusparcusPC.json
diff --git a/results/shibing624/text2vec-base-multilingual/PawsX.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/PawsX.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/PawsX.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/PawsX.json
diff --git a/results/shibing624/text2vec-base-multilingual/SICKFr.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/SICKFr.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/SICKFr.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/SICKFr.json
diff --git a/results/shibing624/text2vec-base-multilingual/STS22.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/STS22.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/STS22.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/STS22.json
diff --git a/results/shibing624/text2vec-base-multilingual/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/STSBenchmarkMultilingualSTS.json
diff --git a/results/shibing624/text2vec-base-multilingual/SummEvalFr.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/SummEvalFr.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/SummEvalFr.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/SummEvalFr.json
diff --git a/results/shibing624/text2vec-base-multilingual/SyntecReranking.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/SyntecReranking.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/SyntecReranking.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/SyntecReranking.json
diff --git a/results/shibing624/text2vec-base-multilingual/SyntecRetrieval.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/SyntecRetrieval.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/SyntecRetrieval.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/SyntecRetrieval.json
diff --git a/results/shibing624/text2vec-base-multilingual/XPQARetrieval.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/XPQARetrieval.json
similarity index 100%
rename from results/shibing624/text2vec-base-multilingual/XPQARetrieval.json
rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/XPQARetrieval.json
diff --git a/results/text-embedding-3-large/AlloprofRetrieval.json b/outputs/benchmark_results/text-embedding-3-large/AlloprofRetrieval.json
similarity index 100%
rename from results/text-embedding-3-large/AlloprofRetrieval.json
rename to outputs/benchmark_results/text-embedding-3-large/AlloprofRetrieval.json
diff --git a/results/text-embedding-3-large/MintakaRetrieval.json b/outputs/benchmark_results/text-embedding-3-large/MintakaRetrieval.json
similarity index 100%
rename from results/text-embedding-3-large/MintakaRetrieval.json
rename to outputs/benchmark_results/text-embedding-3-large/MintakaRetrieval.json
diff --git a/results/text-embedding-3-large/SyntecRetrieval.json b/outputs/benchmark_results/text-embedding-3-large/SyntecRetrieval.json
similarity index 100%
rename from results/text-embedding-3-large/SyntecRetrieval.json
rename to outputs/benchmark_results/text-embedding-3-large/SyntecRetrieval.json
diff --git a/results/text-embedding-3-large/XPQARetrieval.json b/outputs/benchmark_results/text-embedding-3-large/XPQARetrieval.json
similarity index 100%
rename from results/text-embedding-3-large/XPQARetrieval.json
rename to outputs/benchmark_results/text-embedding-3-large/XPQARetrieval.json
diff --git a/results/text-embedding-3-small/AlloprofRetrieval.json b/outputs/benchmark_results/text-embedding-3-small/AlloprofRetrieval.json
similarity index 100%
rename from results/text-embedding-3-small/AlloprofRetrieval.json
rename to outputs/benchmark_results/text-embedding-3-small/AlloprofRetrieval.json
diff --git a/results/text-embedding-3-small/MintakaRetrieval.json b/outputs/benchmark_results/text-embedding-3-small/MintakaRetrieval.json
similarity index 100%
rename from results/text-embedding-3-small/MintakaRetrieval.json
rename to outputs/benchmark_results/text-embedding-3-small/MintakaRetrieval.json
diff --git a/results/text-embedding-3-small/SyntecRetrieval.json b/outputs/benchmark_results/text-embedding-3-small/SyntecRetrieval.json
similarity index 100%
rename from results/text-embedding-3-small/SyntecRetrieval.json
rename to outputs/benchmark_results/text-embedding-3-small/SyntecRetrieval.json
diff --git a/results/text-embedding-3-small/XPQARetrieval.json b/outputs/benchmark_results/text-embedding-3-small/XPQARetrieval.json
similarity index 100%
rename from results/text-embedding-3-small/XPQARetrieval.json
rename to outputs/benchmark_results/text-embedding-3-small/XPQARetrieval.json
diff --git a/results/text-embedding-ada-002/AlloProfClusteringP2P.json b/outputs/benchmark_results/text-embedding-ada-002/AlloProfClusteringP2P.json
similarity index 100%
rename from results/text-embedding-ada-002/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/text-embedding-ada-002/AlloProfClusteringP2P.json
diff --git a/results/text-embedding-ada-002/AlloProfClusteringS2S.json b/outputs/benchmark_results/text-embedding-ada-002/AlloProfClusteringS2S.json
similarity index 100%
rename from results/text-embedding-ada-002/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/text-embedding-ada-002/AlloProfClusteringS2S.json
diff --git a/results/text-embedding-ada-002/AlloprofRetrieval.json b/outputs/benchmark_results/text-embedding-ada-002/AlloprofRetrieval.json
similarity index 100%
rename from results/text-embedding-ada-002/AlloprofRetrieval.json
rename to outputs/benchmark_results/text-embedding-ada-002/AlloprofRetrieval.json
diff --git a/results/text-embedding-ada-002/AmazonReviewsClassification.json b/outputs/benchmark_results/text-embedding-ada-002/AmazonReviewsClassification.json
similarity index 100%
rename from results/text-embedding-ada-002/AmazonReviewsClassification.json
rename to outputs/benchmark_results/text-embedding-ada-002/AmazonReviewsClassification.json
diff --git a/results/text-embedding-ada-002/BSARDRetrieval.json b/outputs/benchmark_results/text-embedding-ada-002/BSARDRetrieval.json
similarity index 100%
rename from results/text-embedding-ada-002/BSARDRetrieval.json
rename to outputs/benchmark_results/text-embedding-ada-002/BSARDRetrieval.json
diff --git a/results/text-embedding-ada-002/DiaBLaBitextMining.json b/outputs/benchmark_results/text-embedding-ada-002/DiaBLaBitextMining.json
similarity index 100%
rename from results/text-embedding-ada-002/DiaBLaBitextMining.json
rename to outputs/benchmark_results/text-embedding-ada-002/DiaBLaBitextMining.json
diff --git a/results/text-embedding-ada-002/FloresBitextMining.json b/outputs/benchmark_results/text-embedding-ada-002/FloresBitextMining.json
similarity index 100%
rename from results/text-embedding-ada-002/FloresBitextMining.json
rename to outputs/benchmark_results/text-embedding-ada-002/FloresBitextMining.json
diff --git a/results/text-embedding-ada-002/HALClusteringS2S.json b/outputs/benchmark_results/text-embedding-ada-002/HALClusteringS2S.json
similarity index 100%
rename from results/text-embedding-ada-002/HALClusteringS2S.json
rename to outputs/benchmark_results/text-embedding-ada-002/HALClusteringS2S.json
diff --git a/results/text-embedding-ada-002/MLSUMClusteringP2P.json b/outputs/benchmark_results/text-embedding-ada-002/MLSUMClusteringP2P.json
similarity index 100%
rename from results/text-embedding-ada-002/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/text-embedding-ada-002/MLSUMClusteringP2P.json
diff --git a/results/text-embedding-ada-002/MLSUMClusteringS2S.json b/outputs/benchmark_results/text-embedding-ada-002/MLSUMClusteringS2S.json
similarity index 100%
rename from results/text-embedding-ada-002/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/text-embedding-ada-002/MLSUMClusteringS2S.json
diff --git a/results/text-embedding-ada-002/MTOPDomainClassification.json b/outputs/benchmark_results/text-embedding-ada-002/MTOPDomainClassification.json
similarity index 100%
rename from results/text-embedding-ada-002/MTOPDomainClassification.json
rename to outputs/benchmark_results/text-embedding-ada-002/MTOPDomainClassification.json
diff --git a/results/text-embedding-ada-002/MTOPIntentClassification.json b/outputs/benchmark_results/text-embedding-ada-002/MTOPIntentClassification.json
similarity index 100%
rename from results/text-embedding-ada-002/MTOPIntentClassification.json
rename to outputs/benchmark_results/text-embedding-ada-002/MTOPIntentClassification.json
diff --git a/results/text-embedding-ada-002/MasakhaNEWSClassification.json b/outputs/benchmark_results/text-embedding-ada-002/MasakhaNEWSClassification.json
similarity index 100%
rename from results/text-embedding-ada-002/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/text-embedding-ada-002/MasakhaNEWSClassification.json
diff --git a/results/text-embedding-ada-002/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/text-embedding-ada-002/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/text-embedding-ada-002/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/text-embedding-ada-002/MasakhaNEWSClusteringP2P.json
diff --git a/results/text-embedding-ada-002/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/text-embedding-ada-002/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/text-embedding-ada-002/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/text-embedding-ada-002/MasakhaNEWSClusteringS2S.json
diff --git a/results/text-embedding-ada-002/MassiveIntentClassification.json b/outputs/benchmark_results/text-embedding-ada-002/MassiveIntentClassification.json
similarity index 100%
rename from results/text-embedding-ada-002/MassiveIntentClassification.json
rename to outputs/benchmark_results/text-embedding-ada-002/MassiveIntentClassification.json
diff --git a/results/text-embedding-ada-002/MassiveScenarioClassification.json b/outputs/benchmark_results/text-embedding-ada-002/MassiveScenarioClassification.json
similarity index 100%
rename from results/text-embedding-ada-002/MassiveScenarioClassification.json
rename to outputs/benchmark_results/text-embedding-ada-002/MassiveScenarioClassification.json
diff --git a/results/text-embedding-ada-002/MintakaRetrieval.json b/outputs/benchmark_results/text-embedding-ada-002/MintakaRetrieval.json
similarity index 100%
rename from results/text-embedding-ada-002/MintakaRetrieval.json
rename to outputs/benchmark_results/text-embedding-ada-002/MintakaRetrieval.json
diff --git a/results/text-embedding-ada-002/OpusparcusPC.json b/outputs/benchmark_results/text-embedding-ada-002/OpusparcusPC.json
similarity index 100%
rename from results/text-embedding-ada-002/OpusparcusPC.json
rename to outputs/benchmark_results/text-embedding-ada-002/OpusparcusPC.json
diff --git a/results/text-embedding-ada-002/PawsX.json b/outputs/benchmark_results/text-embedding-ada-002/PawsX.json
similarity index 100%
rename from results/text-embedding-ada-002/PawsX.json
rename to outputs/benchmark_results/text-embedding-ada-002/PawsX.json
diff --git a/results/text-embedding-ada-002/SICKFr.json b/outputs/benchmark_results/text-embedding-ada-002/SICKFr.json
similarity index 100%
rename from results/text-embedding-ada-002/SICKFr.json
rename to outputs/benchmark_results/text-embedding-ada-002/SICKFr.json
diff --git a/results/text-embedding-ada-002/STS22.json b/outputs/benchmark_results/text-embedding-ada-002/STS22.json
similarity index 100%
rename from results/text-embedding-ada-002/STS22.json
rename to outputs/benchmark_results/text-embedding-ada-002/STS22.json
diff --git a/results/text-embedding-ada-002/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/text-embedding-ada-002/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/text-embedding-ada-002/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/text-embedding-ada-002/STSBenchmarkMultilingualSTS.json
diff --git a/results/text-embedding-ada-002/SummEvalFr.json b/outputs/benchmark_results/text-embedding-ada-002/SummEvalFr.json
similarity index 100%
rename from results/text-embedding-ada-002/SummEvalFr.json
rename to outputs/benchmark_results/text-embedding-ada-002/SummEvalFr.json
diff --git a/results/text-embedding-ada-002/SyntecReranking.json b/outputs/benchmark_results/text-embedding-ada-002/SyntecReranking.json
similarity index 100%
rename from results/text-embedding-ada-002/SyntecReranking.json
rename to outputs/benchmark_results/text-embedding-ada-002/SyntecReranking.json
diff --git a/results/text-embedding-ada-002/SyntecRetrieval.json b/outputs/benchmark_results/text-embedding-ada-002/SyntecRetrieval.json
similarity index 100%
rename from results/text-embedding-ada-002/SyntecRetrieval.json
rename to outputs/benchmark_results/text-embedding-ada-002/SyntecRetrieval.json
diff --git a/results/text-embedding-ada-002/XPQARetrieval.json b/outputs/benchmark_results/text-embedding-ada-002/XPQARetrieval.json
similarity index 100%
rename from results/text-embedding-ada-002/XPQARetrieval.json
rename to outputs/benchmark_results/text-embedding-ada-002/XPQARetrieval.json
diff --git a/results/voyage-2/AlloProfClusteringP2P.json b/outputs/benchmark_results/voyage-2/AlloProfClusteringP2P.json
similarity index 100%
rename from results/voyage-2/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/voyage-2/AlloProfClusteringP2P.json
diff --git a/results/voyage-2/AlloProfClusteringS2S.json b/outputs/benchmark_results/voyage-2/AlloProfClusteringS2S.json
similarity index 100%
rename from results/voyage-2/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/voyage-2/AlloProfClusteringS2S.json
diff --git a/results/voyage-2/AlloprofReranking.json b/outputs/benchmark_results/voyage-2/AlloprofReranking.json
similarity index 100%
rename from results/voyage-2/AlloprofReranking.json
rename to outputs/benchmark_results/voyage-2/AlloprofReranking.json
diff --git a/results/voyage-2/AlloprofRetrieval.json b/outputs/benchmark_results/voyage-2/AlloprofRetrieval.json
similarity index 100%
rename from results/voyage-2/AlloprofRetrieval.json
rename to outputs/benchmark_results/voyage-2/AlloprofRetrieval.json
diff --git a/results/voyage-2/AmazonReviewsClassification.json b/outputs/benchmark_results/voyage-2/AmazonReviewsClassification.json
similarity index 100%
rename from results/voyage-2/AmazonReviewsClassification.json
rename to outputs/benchmark_results/voyage-2/AmazonReviewsClassification.json
diff --git a/results/voyage-2/BSARDRetrieval.json b/outputs/benchmark_results/voyage-2/BSARDRetrieval.json
similarity index 100%
rename from results/voyage-2/BSARDRetrieval.json
rename to outputs/benchmark_results/voyage-2/BSARDRetrieval.json
diff --git a/results/voyage-2/DiaBLaBitextMining.json b/outputs/benchmark_results/voyage-2/DiaBLaBitextMining.json
similarity index 100%
rename from results/voyage-2/DiaBLaBitextMining.json
rename to outputs/benchmark_results/voyage-2/DiaBLaBitextMining.json
diff --git a/results/voyage-2/FloresBitextMining.json b/outputs/benchmark_results/voyage-2/FloresBitextMining.json
similarity index 100%
rename from results/voyage-2/FloresBitextMining.json
rename to outputs/benchmark_results/voyage-2/FloresBitextMining.json
diff --git a/results/voyage-2/HALClusteringS2S.json b/outputs/benchmark_results/voyage-2/HALClusteringS2S.json
similarity index 100%
rename from results/voyage-2/HALClusteringS2S.json
rename to outputs/benchmark_results/voyage-2/HALClusteringS2S.json
diff --git a/results/voyage-2/MLSUMClusteringP2P.json b/outputs/benchmark_results/voyage-2/MLSUMClusteringP2P.json
similarity index 100%
rename from results/voyage-2/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/voyage-2/MLSUMClusteringP2P.json
diff --git a/results/voyage-2/MLSUMClusteringS2S.json b/outputs/benchmark_results/voyage-2/MLSUMClusteringS2S.json
similarity index 100%
rename from results/voyage-2/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/voyage-2/MLSUMClusteringS2S.json
diff --git a/results/voyage-2/MTOPDomainClassification.json b/outputs/benchmark_results/voyage-2/MTOPDomainClassification.json
similarity index 100%
rename from results/voyage-2/MTOPDomainClassification.json
rename to outputs/benchmark_results/voyage-2/MTOPDomainClassification.json
diff --git a/results/voyage-2/MTOPIntentClassification.json b/outputs/benchmark_results/voyage-2/MTOPIntentClassification.json
similarity index 100%
rename from results/voyage-2/MTOPIntentClassification.json
rename to outputs/benchmark_results/voyage-2/MTOPIntentClassification.json
diff --git a/results/voyage-2/MasakhaNEWSClassification.json b/outputs/benchmark_results/voyage-2/MasakhaNEWSClassification.json
similarity index 100%
rename from results/voyage-2/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/voyage-2/MasakhaNEWSClassification.json
diff --git a/results/voyage-2/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/voyage-2/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/voyage-2/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/voyage-2/MasakhaNEWSClusteringP2P.json
diff --git a/results/voyage-2/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/voyage-2/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/voyage-2/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/voyage-2/MasakhaNEWSClusteringS2S.json
diff --git a/results/voyage-2/MassiveIntentClassification.json b/outputs/benchmark_results/voyage-2/MassiveIntentClassification.json
similarity index 100%
rename from results/voyage-2/MassiveIntentClassification.json
rename to outputs/benchmark_results/voyage-2/MassiveIntentClassification.json
diff --git a/results/voyage-2/MassiveScenarioClassification.json b/outputs/benchmark_results/voyage-2/MassiveScenarioClassification.json
similarity index 100%
rename from results/voyage-2/MassiveScenarioClassification.json
rename to outputs/benchmark_results/voyage-2/MassiveScenarioClassification.json
diff --git a/results/voyage-2/MintakaRetrieval.json b/outputs/benchmark_results/voyage-2/MintakaRetrieval.json
similarity index 100%
rename from results/voyage-2/MintakaRetrieval.json
rename to outputs/benchmark_results/voyage-2/MintakaRetrieval.json
diff --git a/results/voyage-2/OpusparcusPC.json b/outputs/benchmark_results/voyage-2/OpusparcusPC.json
similarity index 100%
rename from results/voyage-2/OpusparcusPC.json
rename to outputs/benchmark_results/voyage-2/OpusparcusPC.json
diff --git a/results/voyage-2/PawsX.json b/outputs/benchmark_results/voyage-2/PawsX.json
similarity index 100%
rename from results/voyage-2/PawsX.json
rename to outputs/benchmark_results/voyage-2/PawsX.json
diff --git a/results/voyage-2/SICKFr.json b/outputs/benchmark_results/voyage-2/SICKFr.json
similarity index 100%
rename from results/voyage-2/SICKFr.json
rename to outputs/benchmark_results/voyage-2/SICKFr.json
diff --git a/results/voyage-2/STS22.json b/outputs/benchmark_results/voyage-2/STS22.json
similarity index 100%
rename from results/voyage-2/STS22.json
rename to outputs/benchmark_results/voyage-2/STS22.json
diff --git a/results/voyage-2/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/voyage-2/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/voyage-2/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/voyage-2/STSBenchmarkMultilingualSTS.json
diff --git a/results/voyage-2/SummEvalFr.json b/outputs/benchmark_results/voyage-2/SummEvalFr.json
similarity index 100%
rename from results/voyage-2/SummEvalFr.json
rename to outputs/benchmark_results/voyage-2/SummEvalFr.json
diff --git a/results/voyage-2/SyntecReranking.json b/outputs/benchmark_results/voyage-2/SyntecReranking.json
similarity index 100%
rename from results/voyage-2/SyntecReranking.json
rename to outputs/benchmark_results/voyage-2/SyntecReranking.json
diff --git a/results/voyage-2/SyntecRetrieval.json b/outputs/benchmark_results/voyage-2/SyntecRetrieval.json
similarity index 100%
rename from results/voyage-2/SyntecRetrieval.json
rename to outputs/benchmark_results/voyage-2/SyntecRetrieval.json
diff --git a/results/voyage-2/XPQARetrieval.json b/outputs/benchmark_results/voyage-2/XPQARetrieval.json
similarity index 100%
rename from results/voyage-2/XPQARetrieval.json
rename to outputs/benchmark_results/voyage-2/XPQARetrieval.json
diff --git a/results/voyage-code-2/AlloProfClusteringP2P.json b/outputs/benchmark_results/voyage-code-2/AlloProfClusteringP2P.json
similarity index 100%
rename from results/voyage-code-2/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/voyage-code-2/AlloProfClusteringP2P.json
diff --git a/results/voyage-code-2/AlloProfClusteringS2S.json b/outputs/benchmark_results/voyage-code-2/AlloProfClusteringS2S.json
similarity index 100%
rename from results/voyage-code-2/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/voyage-code-2/AlloProfClusteringS2S.json
diff --git a/results/voyage-code-2/AlloprofReranking.json b/outputs/benchmark_results/voyage-code-2/AlloprofReranking.json
similarity index 100%
rename from results/voyage-code-2/AlloprofReranking.json
rename to outputs/benchmark_results/voyage-code-2/AlloprofReranking.json
diff --git a/results/voyage-code-2/AlloprofRetrieval.json b/outputs/benchmark_results/voyage-code-2/AlloprofRetrieval.json
similarity index 100%
rename from results/voyage-code-2/AlloprofRetrieval.json
rename to outputs/benchmark_results/voyage-code-2/AlloprofRetrieval.json
diff --git a/results/voyage-code-2/AmazonReviewsClassification.json b/outputs/benchmark_results/voyage-code-2/AmazonReviewsClassification.json
similarity index 100%
rename from results/voyage-code-2/AmazonReviewsClassification.json
rename to outputs/benchmark_results/voyage-code-2/AmazonReviewsClassification.json
diff --git a/results/voyage-code-2/BSARDRetrieval.json b/outputs/benchmark_results/voyage-code-2/BSARDRetrieval.json
similarity index 100%
rename from results/voyage-code-2/BSARDRetrieval.json
rename to outputs/benchmark_results/voyage-code-2/BSARDRetrieval.json
diff --git a/results/voyage-code-2/DiaBLaBitextMining.json b/outputs/benchmark_results/voyage-code-2/DiaBLaBitextMining.json
similarity index 100%
rename from results/voyage-code-2/DiaBLaBitextMining.json
rename to outputs/benchmark_results/voyage-code-2/DiaBLaBitextMining.json
diff --git a/results/voyage-code-2/FloresBitextMining.json b/outputs/benchmark_results/voyage-code-2/FloresBitextMining.json
similarity index 100%
rename from results/voyage-code-2/FloresBitextMining.json
rename to outputs/benchmark_results/voyage-code-2/FloresBitextMining.json
diff --git a/results/voyage-code-2/HALClusteringS2S.json b/outputs/benchmark_results/voyage-code-2/HALClusteringS2S.json
similarity index 100%
rename from results/voyage-code-2/HALClusteringS2S.json
rename to outputs/benchmark_results/voyage-code-2/HALClusteringS2S.json
diff --git a/results/voyage-code-2/MLSUMClusteringP2P.json b/outputs/benchmark_results/voyage-code-2/MLSUMClusteringP2P.json
similarity index 100%
rename from results/voyage-code-2/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/voyage-code-2/MLSUMClusteringP2P.json
diff --git a/results/voyage-code-2/MLSUMClusteringS2S.json b/outputs/benchmark_results/voyage-code-2/MLSUMClusteringS2S.json
similarity index 100%
rename from results/voyage-code-2/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/voyage-code-2/MLSUMClusteringS2S.json
diff --git a/results/voyage-code-2/MTOPDomainClassification.json b/outputs/benchmark_results/voyage-code-2/MTOPDomainClassification.json
similarity index 100%
rename from results/voyage-code-2/MTOPDomainClassification.json
rename to outputs/benchmark_results/voyage-code-2/MTOPDomainClassification.json
diff --git a/results/voyage-code-2/MTOPIntentClassification.json b/outputs/benchmark_results/voyage-code-2/MTOPIntentClassification.json
similarity index 100%
rename from results/voyage-code-2/MTOPIntentClassification.json
rename to outputs/benchmark_results/voyage-code-2/MTOPIntentClassification.json
diff --git a/results/voyage-code-2/MasakhaNEWSClassification.json b/outputs/benchmark_results/voyage-code-2/MasakhaNEWSClassification.json
similarity index 100%
rename from results/voyage-code-2/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/voyage-code-2/MasakhaNEWSClassification.json
diff --git a/results/voyage-code-2/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/voyage-code-2/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/voyage-code-2/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/voyage-code-2/MasakhaNEWSClusteringP2P.json
diff --git a/results/voyage-code-2/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/voyage-code-2/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/voyage-code-2/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/voyage-code-2/MasakhaNEWSClusteringS2S.json
diff --git a/results/voyage-code-2/MassiveIntentClassification.json b/outputs/benchmark_results/voyage-code-2/MassiveIntentClassification.json
similarity index 100%
rename from results/voyage-code-2/MassiveIntentClassification.json
rename to outputs/benchmark_results/voyage-code-2/MassiveIntentClassification.json
diff --git a/results/voyage-code-2/MassiveScenarioClassification.json b/outputs/benchmark_results/voyage-code-2/MassiveScenarioClassification.json
similarity index 100%
rename from results/voyage-code-2/MassiveScenarioClassification.json
rename to outputs/benchmark_results/voyage-code-2/MassiveScenarioClassification.json
diff --git a/results/voyage-code-2/MintakaRetrieval.json b/outputs/benchmark_results/voyage-code-2/MintakaRetrieval.json
similarity index 100%
rename from results/voyage-code-2/MintakaRetrieval.json
rename to outputs/benchmark_results/voyage-code-2/MintakaRetrieval.json
diff --git a/results/voyage-code-2/OpusparcusPC.json b/outputs/benchmark_results/voyage-code-2/OpusparcusPC.json
similarity index 100%
rename from results/voyage-code-2/OpusparcusPC.json
rename to outputs/benchmark_results/voyage-code-2/OpusparcusPC.json
diff --git a/results/voyage-code-2/PawsX.json b/outputs/benchmark_results/voyage-code-2/PawsX.json
similarity index 100%
rename from results/voyage-code-2/PawsX.json
rename to outputs/benchmark_results/voyage-code-2/PawsX.json
diff --git a/results/voyage-code-2/SICKFr.json b/outputs/benchmark_results/voyage-code-2/SICKFr.json
similarity index 100%
rename from results/voyage-code-2/SICKFr.json
rename to outputs/benchmark_results/voyage-code-2/SICKFr.json
diff --git a/results/voyage-code-2/STS22.json b/outputs/benchmark_results/voyage-code-2/STS22.json
similarity index 100%
rename from results/voyage-code-2/STS22.json
rename to outputs/benchmark_results/voyage-code-2/STS22.json
diff --git a/results/voyage-code-2/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/voyage-code-2/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/voyage-code-2/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/voyage-code-2/STSBenchmarkMultilingualSTS.json
diff --git a/results/voyage-code-2/SummEvalFr.json b/outputs/benchmark_results/voyage-code-2/SummEvalFr.json
similarity index 100%
rename from results/voyage-code-2/SummEvalFr.json
rename to outputs/benchmark_results/voyage-code-2/SummEvalFr.json
diff --git a/results/voyage-code-2/SyntecReranking.json b/outputs/benchmark_results/voyage-code-2/SyntecReranking.json
similarity index 100%
rename from results/voyage-code-2/SyntecReranking.json
rename to outputs/benchmark_results/voyage-code-2/SyntecReranking.json
diff --git a/results/voyage-code-2/SyntecRetrieval.json b/outputs/benchmark_results/voyage-code-2/SyntecRetrieval.json
similarity index 100%
rename from results/voyage-code-2/SyntecRetrieval.json
rename to outputs/benchmark_results/voyage-code-2/SyntecRetrieval.json
diff --git a/results/voyage-code-2/XPQARetrieval.json b/outputs/benchmark_results/voyage-code-2/XPQARetrieval.json
similarity index 100%
rename from results/voyage-code-2/XPQARetrieval.json
rename to outputs/benchmark_results/voyage-code-2/XPQARetrieval.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/AlloProfClusteringP2P.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/AlloProfClusteringP2P.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/AlloProfClusteringP2P.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/AlloProfClusteringS2S.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/AlloProfClusteringS2S.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/AlloProfClusteringS2S.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/AlloprofReranking.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/AlloprofReranking.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/AlloprofReranking.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/AlloprofReranking.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/AlloprofRetrieval.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/AlloprofRetrieval.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/AlloprofRetrieval.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/AlloprofRetrieval.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/AmazonReviewsClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/AmazonReviewsClassification.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/AmazonReviewsClassification.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/AmazonReviewsClassification.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/BSARDRetrieval.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/BSARDRetrieval.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/BSARDRetrieval.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/BSARDRetrieval.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/DiaBLaBitextMining.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/DiaBLaBitextMining.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/DiaBLaBitextMining.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/DiaBLaBitextMining.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/FloresBitextMining.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/FloresBitextMining.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/FloresBitextMining.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/FloresBitextMining.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/HALClusteringS2S.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/HALClusteringS2S.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/HALClusteringS2S.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/HALClusteringS2S.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/MLSUMClusteringP2P.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MLSUMClusteringP2P.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MLSUMClusteringP2P.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/MLSUMClusteringS2S.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MLSUMClusteringS2S.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MLSUMClusteringS2S.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/MTOPDomainClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MTOPDomainClassification.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/MTOPDomainClassification.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MTOPDomainClassification.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/MTOPIntentClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MTOPIntentClassification.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/MTOPIntentClassification.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MTOPIntentClassification.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClassification.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClassification.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClusteringP2P.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClusteringS2S.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/MassiveIntentClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MassiveIntentClassification.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/MassiveIntentClassification.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MassiveIntentClassification.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/MassiveScenarioClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MassiveScenarioClassification.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/MassiveScenarioClassification.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MassiveScenarioClassification.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/MintakaRetrieval.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MintakaRetrieval.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/MintakaRetrieval.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MintakaRetrieval.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/OpusparcusPC.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/OpusparcusPC.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/OpusparcusPC.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/OpusparcusPC.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/PawsX.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/PawsX.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/PawsX.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/PawsX.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/SICKFr.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/SICKFr.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/SICKFr.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/SICKFr.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/STS22.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/STS22.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/STS22.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/STS22.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/STSBenchmarkMultilingualSTS.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/SummEvalFr.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/SummEvalFr.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/SummEvalFr.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/SummEvalFr.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/SyntecReranking.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/SyntecReranking.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/SyntecReranking.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/SyntecReranking.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/SyntecRetrieval.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/SyntecRetrieval.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/SyntecRetrieval.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/SyntecRetrieval.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/XPQARetrieval.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/XPQARetrieval.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-3/XPQARetrieval.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/XPQARetrieval.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloProfClusteringP2P.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloProfClusteringP2P.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloProfClusteringP2P.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloProfClusteringS2S.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloProfClusteringS2S.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloProfClusteringS2S.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloprofReranking.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloprofReranking.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloprofReranking.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloprofReranking.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloprofRetrieval.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloprofRetrieval.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloprofRetrieval.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloprofRetrieval.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/AmazonReviewsClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/AmazonReviewsClassification.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/AmazonReviewsClassification.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/AmazonReviewsClassification.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/BSARDRetrieval.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/BSARDRetrieval.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/BSARDRetrieval.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/BSARDRetrieval.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/DiaBLaBitextMining.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/DiaBLaBitextMining.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/DiaBLaBitextMining.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/DiaBLaBitextMining.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/FloresBitextMining.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/FloresBitextMining.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/FloresBitextMining.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/FloresBitextMining.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/HALClusteringS2S.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/HALClusteringS2S.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/HALClusteringS2S.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/HALClusteringS2S.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/MLSUMClusteringP2P.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MLSUMClusteringP2P.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MLSUMClusteringP2P.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/MLSUMClusteringS2S.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MLSUMClusteringS2S.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MLSUMClusteringS2S.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/MTOPDomainClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MTOPDomainClassification.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/MTOPDomainClassification.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MTOPDomainClassification.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/MTOPIntentClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MTOPIntentClassification.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/MTOPIntentClassification.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MTOPIntentClassification.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClassification.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClassification.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClusteringP2P.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClusteringS2S.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/MassiveIntentClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MassiveIntentClassification.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/MassiveIntentClassification.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MassiveIntentClassification.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/MassiveScenarioClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MassiveScenarioClassification.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/MassiveScenarioClassification.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MassiveScenarioClassification.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/MintakaRetrieval.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MintakaRetrieval.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/MintakaRetrieval.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MintakaRetrieval.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/OpusparcusPC.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/OpusparcusPC.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/OpusparcusPC.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/OpusparcusPC.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/PawsX.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/PawsX.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/PawsX.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/PawsX.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/SICKFr.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/SICKFr.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/SICKFr.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/SICKFr.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/STS22.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/STS22.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/STS22.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/STS22.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/STSBenchmarkMultilingualSTS.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/SummEvalFr.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/SummEvalFr.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/SummEvalFr.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/SummEvalFr.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/SyntecReranking.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/SyntecReranking.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/SyntecReranking.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/SyntecReranking.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/SyntecRetrieval.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/SyntecRetrieval.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/SyntecRetrieval.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/SyntecRetrieval.json
diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/XPQARetrieval.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/XPQARetrieval.json
similarity index 100%
rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/XPQARetrieval.json
rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/XPQARetrieval.json
diff --git a/results/xlm-roberta-base/AlloProfClusteringP2P.json b/outputs/benchmark_results/xlm-roberta-base/AlloProfClusteringP2P.json
similarity index 100%
rename from results/xlm-roberta-base/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/xlm-roberta-base/AlloProfClusteringP2P.json
diff --git a/results/xlm-roberta-base/AlloProfClusteringS2S.json b/outputs/benchmark_results/xlm-roberta-base/AlloProfClusteringS2S.json
similarity index 100%
rename from results/xlm-roberta-base/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/xlm-roberta-base/AlloProfClusteringS2S.json
diff --git a/results/xlm-roberta-base/AlloprofReranking.json b/outputs/benchmark_results/xlm-roberta-base/AlloprofReranking.json
similarity index 100%
rename from results/xlm-roberta-base/AlloprofReranking.json
rename to outputs/benchmark_results/xlm-roberta-base/AlloprofReranking.json
diff --git a/results/xlm-roberta-base/AlloprofRetrieval.json b/outputs/benchmark_results/xlm-roberta-base/AlloprofRetrieval.json
similarity index 100%
rename from results/xlm-roberta-base/AlloprofRetrieval.json
rename to outputs/benchmark_results/xlm-roberta-base/AlloprofRetrieval.json
diff --git a/results/xlm-roberta-base/AmazonReviewsClassification.json b/outputs/benchmark_results/xlm-roberta-base/AmazonReviewsClassification.json
similarity index 100%
rename from results/xlm-roberta-base/AmazonReviewsClassification.json
rename to outputs/benchmark_results/xlm-roberta-base/AmazonReviewsClassification.json
diff --git a/results/xlm-roberta-base/BSARDRetrieval.json b/outputs/benchmark_results/xlm-roberta-base/BSARDRetrieval.json
similarity index 100%
rename from results/xlm-roberta-base/BSARDRetrieval.json
rename to outputs/benchmark_results/xlm-roberta-base/BSARDRetrieval.json
diff --git a/results/xlm-roberta-base/DiaBLaBitextMining.json b/outputs/benchmark_results/xlm-roberta-base/DiaBLaBitextMining.json
similarity index 100%
rename from results/xlm-roberta-base/DiaBLaBitextMining.json
rename to outputs/benchmark_results/xlm-roberta-base/DiaBLaBitextMining.json
diff --git a/results/xlm-roberta-base/FloresBitextMining.json b/outputs/benchmark_results/xlm-roberta-base/FloresBitextMining.json
similarity index 100%
rename from results/xlm-roberta-base/FloresBitextMining.json
rename to outputs/benchmark_results/xlm-roberta-base/FloresBitextMining.json
diff --git a/results/xlm-roberta-base/HALClusteringS2S.json b/outputs/benchmark_results/xlm-roberta-base/HALClusteringS2S.json
similarity index 100%
rename from results/xlm-roberta-base/HALClusteringS2S.json
rename to outputs/benchmark_results/xlm-roberta-base/HALClusteringS2S.json
diff --git a/results/xlm-roberta-base/MLSUMClusteringP2P.json b/outputs/benchmark_results/xlm-roberta-base/MLSUMClusteringP2P.json
similarity index 100%
rename from results/xlm-roberta-base/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/xlm-roberta-base/MLSUMClusteringP2P.json
diff --git a/results/xlm-roberta-base/MLSUMClusteringS2S.json b/outputs/benchmark_results/xlm-roberta-base/MLSUMClusteringS2S.json
similarity index 100%
rename from results/xlm-roberta-base/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/xlm-roberta-base/MLSUMClusteringS2S.json
diff --git a/results/xlm-roberta-base/MTOPDomainClassification.json b/outputs/benchmark_results/xlm-roberta-base/MTOPDomainClassification.json
similarity index 100%
rename from results/xlm-roberta-base/MTOPDomainClassification.json
rename to outputs/benchmark_results/xlm-roberta-base/MTOPDomainClassification.json
diff --git a/results/xlm-roberta-base/MTOPIntentClassification.json b/outputs/benchmark_results/xlm-roberta-base/MTOPIntentClassification.json
similarity index 100%
rename from results/xlm-roberta-base/MTOPIntentClassification.json
rename to outputs/benchmark_results/xlm-roberta-base/MTOPIntentClassification.json
diff --git a/results/xlm-roberta-base/MasakhaNEWSClassification.json b/outputs/benchmark_results/xlm-roberta-base/MasakhaNEWSClassification.json
similarity index 100%
rename from results/xlm-roberta-base/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/xlm-roberta-base/MasakhaNEWSClassification.json
diff --git a/results/xlm-roberta-base/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/xlm-roberta-base/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/xlm-roberta-base/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/xlm-roberta-base/MasakhaNEWSClusteringP2P.json
diff --git a/results/xlm-roberta-base/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/xlm-roberta-base/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/xlm-roberta-base/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/xlm-roberta-base/MasakhaNEWSClusteringS2S.json
diff --git a/results/xlm-roberta-base/MassiveIntentClassification.json b/outputs/benchmark_results/xlm-roberta-base/MassiveIntentClassification.json
similarity index 100%
rename from results/xlm-roberta-base/MassiveIntentClassification.json
rename to outputs/benchmark_results/xlm-roberta-base/MassiveIntentClassification.json
diff --git a/results/xlm-roberta-base/MassiveScenarioClassification.json b/outputs/benchmark_results/xlm-roberta-base/MassiveScenarioClassification.json
similarity index 100%
rename from results/xlm-roberta-base/MassiveScenarioClassification.json
rename to outputs/benchmark_results/xlm-roberta-base/MassiveScenarioClassification.json
diff --git a/results/xlm-roberta-base/MintakaRetrieval.json b/outputs/benchmark_results/xlm-roberta-base/MintakaRetrieval.json
similarity index 100%
rename from results/xlm-roberta-base/MintakaRetrieval.json
rename to outputs/benchmark_results/xlm-roberta-base/MintakaRetrieval.json
diff --git a/results/xlm-roberta-base/OpusparcusPC.json b/outputs/benchmark_results/xlm-roberta-base/OpusparcusPC.json
similarity index 100%
rename from results/xlm-roberta-base/OpusparcusPC.json
rename to outputs/benchmark_results/xlm-roberta-base/OpusparcusPC.json
diff --git a/results/xlm-roberta-base/PawsX.json b/outputs/benchmark_results/xlm-roberta-base/PawsX.json
similarity index 100%
rename from results/xlm-roberta-base/PawsX.json
rename to outputs/benchmark_results/xlm-roberta-base/PawsX.json
diff --git a/results/xlm-roberta-base/SICKFr.json b/outputs/benchmark_results/xlm-roberta-base/SICKFr.json
similarity index 100%
rename from results/xlm-roberta-base/SICKFr.json
rename to outputs/benchmark_results/xlm-roberta-base/SICKFr.json
diff --git a/results/xlm-roberta-base/STS22.json b/outputs/benchmark_results/xlm-roberta-base/STS22.json
similarity index 100%
rename from results/xlm-roberta-base/STS22.json
rename to outputs/benchmark_results/xlm-roberta-base/STS22.json
diff --git a/results/xlm-roberta-base/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/xlm-roberta-base/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/xlm-roberta-base/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/xlm-roberta-base/STSBenchmarkMultilingualSTS.json
diff --git a/results/xlm-roberta-base/SummEvalFr.json b/outputs/benchmark_results/xlm-roberta-base/SummEvalFr.json
similarity index 100%
rename from results/xlm-roberta-base/SummEvalFr.json
rename to outputs/benchmark_results/xlm-roberta-base/SummEvalFr.json
diff --git a/results/xlm-roberta-base/SyntecReranking.json b/outputs/benchmark_results/xlm-roberta-base/SyntecReranking.json
similarity index 100%
rename from results/xlm-roberta-base/SyntecReranking.json
rename to outputs/benchmark_results/xlm-roberta-base/SyntecReranking.json
diff --git a/results/xlm-roberta-base/SyntecRetrieval.json b/outputs/benchmark_results/xlm-roberta-base/SyntecRetrieval.json
similarity index 100%
rename from results/xlm-roberta-base/SyntecRetrieval.json
rename to outputs/benchmark_results/xlm-roberta-base/SyntecRetrieval.json
diff --git a/results/xlm-roberta-base/XPQARetrieval.json b/outputs/benchmark_results/xlm-roberta-base/XPQARetrieval.json
similarity index 100%
rename from results/xlm-roberta-base/XPQARetrieval.json
rename to outputs/benchmark_results/xlm-roberta-base/XPQARetrieval.json
diff --git a/results/xlm-roberta-large/AlloProfClusteringP2P.json b/outputs/benchmark_results/xlm-roberta-large/AlloProfClusteringP2P.json
similarity index 100%
rename from results/xlm-roberta-large/AlloProfClusteringP2P.json
rename to outputs/benchmark_results/xlm-roberta-large/AlloProfClusteringP2P.json
diff --git a/results/xlm-roberta-large/AlloProfClusteringS2S.json b/outputs/benchmark_results/xlm-roberta-large/AlloProfClusteringS2S.json
similarity index 100%
rename from results/xlm-roberta-large/AlloProfClusteringS2S.json
rename to outputs/benchmark_results/xlm-roberta-large/AlloProfClusteringS2S.json
diff --git a/results/xlm-roberta-large/AlloprofReranking.json b/outputs/benchmark_results/xlm-roberta-large/AlloprofReranking.json
similarity index 100%
rename from results/xlm-roberta-large/AlloprofReranking.json
rename to outputs/benchmark_results/xlm-roberta-large/AlloprofReranking.json
diff --git a/results/xlm-roberta-large/AlloprofRetrieval.json b/outputs/benchmark_results/xlm-roberta-large/AlloprofRetrieval.json
similarity index 100%
rename from results/xlm-roberta-large/AlloprofRetrieval.json
rename to outputs/benchmark_results/xlm-roberta-large/AlloprofRetrieval.json
diff --git a/results/xlm-roberta-large/AmazonReviewsClassification.json b/outputs/benchmark_results/xlm-roberta-large/AmazonReviewsClassification.json
similarity index 100%
rename from results/xlm-roberta-large/AmazonReviewsClassification.json
rename to outputs/benchmark_results/xlm-roberta-large/AmazonReviewsClassification.json
diff --git a/results/xlm-roberta-large/BSARDRetrieval.json b/outputs/benchmark_results/xlm-roberta-large/BSARDRetrieval.json
similarity index 100%
rename from results/xlm-roberta-large/BSARDRetrieval.json
rename to outputs/benchmark_results/xlm-roberta-large/BSARDRetrieval.json
diff --git a/results/xlm-roberta-large/DiaBLaBitextMining.json b/outputs/benchmark_results/xlm-roberta-large/DiaBLaBitextMining.json
similarity index 100%
rename from results/xlm-roberta-large/DiaBLaBitextMining.json
rename to outputs/benchmark_results/xlm-roberta-large/DiaBLaBitextMining.json
diff --git a/results/xlm-roberta-large/FloresBitextMining.json b/outputs/benchmark_results/xlm-roberta-large/FloresBitextMining.json
similarity index 100%
rename from results/xlm-roberta-large/FloresBitextMining.json
rename to outputs/benchmark_results/xlm-roberta-large/FloresBitextMining.json
diff --git a/results/xlm-roberta-large/HALClusteringS2S.json b/outputs/benchmark_results/xlm-roberta-large/HALClusteringS2S.json
similarity index 100%
rename from results/xlm-roberta-large/HALClusteringS2S.json
rename to outputs/benchmark_results/xlm-roberta-large/HALClusteringS2S.json
diff --git a/results/xlm-roberta-large/MLSUMClusteringP2P.json b/outputs/benchmark_results/xlm-roberta-large/MLSUMClusteringP2P.json
similarity index 100%
rename from results/xlm-roberta-large/MLSUMClusteringP2P.json
rename to outputs/benchmark_results/xlm-roberta-large/MLSUMClusteringP2P.json
diff --git a/results/xlm-roberta-large/MLSUMClusteringS2S.json b/outputs/benchmark_results/xlm-roberta-large/MLSUMClusteringS2S.json
similarity index 100%
rename from results/xlm-roberta-large/MLSUMClusteringS2S.json
rename to outputs/benchmark_results/xlm-roberta-large/MLSUMClusteringS2S.json
diff --git a/results/xlm-roberta-large/MTOPDomainClassification.json b/outputs/benchmark_results/xlm-roberta-large/MTOPDomainClassification.json
similarity index 100%
rename from results/xlm-roberta-large/MTOPDomainClassification.json
rename to outputs/benchmark_results/xlm-roberta-large/MTOPDomainClassification.json
diff --git a/results/xlm-roberta-large/MTOPIntentClassification.json b/outputs/benchmark_results/xlm-roberta-large/MTOPIntentClassification.json
similarity index 100%
rename from results/xlm-roberta-large/MTOPIntentClassification.json
rename to outputs/benchmark_results/xlm-roberta-large/MTOPIntentClassification.json
diff --git a/results/xlm-roberta-large/MasakhaNEWSClassification.json b/outputs/benchmark_results/xlm-roberta-large/MasakhaNEWSClassification.json
similarity index 100%
rename from results/xlm-roberta-large/MasakhaNEWSClassification.json
rename to outputs/benchmark_results/xlm-roberta-large/MasakhaNEWSClassification.json
diff --git a/results/xlm-roberta-large/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/xlm-roberta-large/MasakhaNEWSClusteringP2P.json
similarity index 100%
rename from results/xlm-roberta-large/MasakhaNEWSClusteringP2P.json
rename to outputs/benchmark_results/xlm-roberta-large/MasakhaNEWSClusteringP2P.json
diff --git a/results/xlm-roberta-large/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/xlm-roberta-large/MasakhaNEWSClusteringS2S.json
similarity index 100%
rename from results/xlm-roberta-large/MasakhaNEWSClusteringS2S.json
rename to outputs/benchmark_results/xlm-roberta-large/MasakhaNEWSClusteringS2S.json
diff --git a/results/xlm-roberta-large/MassiveIntentClassification.json b/outputs/benchmark_results/xlm-roberta-large/MassiveIntentClassification.json
similarity index 100%
rename from results/xlm-roberta-large/MassiveIntentClassification.json
rename to outputs/benchmark_results/xlm-roberta-large/MassiveIntentClassification.json
diff --git a/results/xlm-roberta-large/MassiveScenarioClassification.json b/outputs/benchmark_results/xlm-roberta-large/MassiveScenarioClassification.json
similarity index 100%
rename from results/xlm-roberta-large/MassiveScenarioClassification.json
rename to outputs/benchmark_results/xlm-roberta-large/MassiveScenarioClassification.json
diff --git a/results/xlm-roberta-large/MintakaRetrieval.json b/outputs/benchmark_results/xlm-roberta-large/MintakaRetrieval.json
similarity index 100%
rename from results/xlm-roberta-large/MintakaRetrieval.json
rename to outputs/benchmark_results/xlm-roberta-large/MintakaRetrieval.json
diff --git a/results/xlm-roberta-large/OpusparcusPC.json b/outputs/benchmark_results/xlm-roberta-large/OpusparcusPC.json
similarity index 100%
rename from results/xlm-roberta-large/OpusparcusPC.json
rename to outputs/benchmark_results/xlm-roberta-large/OpusparcusPC.json
diff --git a/results/xlm-roberta-large/PawsX.json b/outputs/benchmark_results/xlm-roberta-large/PawsX.json
similarity index 100%
rename from results/xlm-roberta-large/PawsX.json
rename to outputs/benchmark_results/xlm-roberta-large/PawsX.json
diff --git a/results/xlm-roberta-large/SICKFr.json b/outputs/benchmark_results/xlm-roberta-large/SICKFr.json
similarity index 100%
rename from results/xlm-roberta-large/SICKFr.json
rename to outputs/benchmark_results/xlm-roberta-large/SICKFr.json
diff --git a/results/xlm-roberta-large/STS22.json b/outputs/benchmark_results/xlm-roberta-large/STS22.json
similarity index 100%
rename from results/xlm-roberta-large/STS22.json
rename to outputs/benchmark_results/xlm-roberta-large/STS22.json
diff --git a/results/xlm-roberta-large/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/xlm-roberta-large/STSBenchmarkMultilingualSTS.json
similarity index 100%
rename from results/xlm-roberta-large/STSBenchmarkMultilingualSTS.json
rename to outputs/benchmark_results/xlm-roberta-large/STSBenchmarkMultilingualSTS.json
diff --git a/results/xlm-roberta-large/SummEvalFr.json b/outputs/benchmark_results/xlm-roberta-large/SummEvalFr.json
similarity index 100%
rename from results/xlm-roberta-large/SummEvalFr.json
rename to outputs/benchmark_results/xlm-roberta-large/SummEvalFr.json
diff --git a/results/xlm-roberta-large/SyntecReranking.json b/outputs/benchmark_results/xlm-roberta-large/SyntecReranking.json
similarity index 100%
rename from results/xlm-roberta-large/SyntecReranking.json
rename to outputs/benchmark_results/xlm-roberta-large/SyntecReranking.json
diff --git a/results/xlm-roberta-large/SyntecRetrieval.json b/outputs/benchmark_results/xlm-roberta-large/SyntecRetrieval.json
similarity index 100%
rename from results/xlm-roberta-large/SyntecRetrieval.json
rename to outputs/benchmark_results/xlm-roberta-large/SyntecRetrieval.json
diff --git a/results/xlm-roberta-large/XPQARetrieval.json b/outputs/benchmark_results/xlm-roberta-large/XPQARetrieval.json
similarity index 100%
rename from results/xlm-roberta-large/XPQARetrieval.json
rename to outputs/benchmark_results/xlm-roberta-large/XPQARetrieval.json
diff --git a/cost_estimation.json b/outputs/models_analysis/cost_estimation.json
similarity index 100%
rename from cost_estimation.json
rename to outputs/models_analysis/cost_estimation.json
diff --git a/script_mteb_french/results_analysis/model_specs.json b/outputs/models_analysis/model_specs.json
similarity index 100%
rename from script_mteb_french/results_analysis/model_specs.json
rename to outputs/models_analysis/model_specs.json
diff --git a/analyses_outputs/datasets_similarity/PCA_components_all.pdf b/outputs/results_analysis/datasets_similarity/PCA_components_all.pdf
similarity index 100%
rename from analyses_outputs/datasets_similarity/PCA_components_all.pdf
rename to outputs/results_analysis/datasets_similarity/PCA_components_all.pdf
diff --git a/analyses_outputs/datasets_similarity/PCA_components_all.png b/outputs/results_analysis/datasets_similarity/PCA_components_all.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/PCA_components_all.png
rename to outputs/results_analysis/datasets_similarity/PCA_components_all.png
diff --git a/analyses_outputs/datasets_similarity/PCA_components_all.svg b/outputs/results_analysis/datasets_similarity/PCA_components_all.svg
similarity index 100%
rename from analyses_outputs/datasets_similarity/PCA_components_all.svg
rename to outputs/results_analysis/datasets_similarity/PCA_components_all.svg
diff --git a/analyses_outputs/datasets_similarity/PCA_components_all_with names.png b/outputs/results_analysis/datasets_similarity/PCA_components_all_with names.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/PCA_components_all_with names.png
rename to outputs/results_analysis/datasets_similarity/PCA_components_all_with names.png
diff --git a/analyses_outputs/datasets_similarity/PCA_components_classification.png b/outputs/results_analysis/datasets_similarity/PCA_components_classification.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/PCA_components_classification.png
rename to outputs/results_analysis/datasets_similarity/PCA_components_classification.png
diff --git a/analyses_outputs/datasets_similarity/PCA_components_clustering.png b/outputs/results_analysis/datasets_similarity/PCA_components_clustering.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/PCA_components_clustering.png
rename to outputs/results_analysis/datasets_similarity/PCA_components_clustering.png
diff --git a/analyses_outputs/datasets_similarity/PCA_components_pairclassification.png b/outputs/results_analysis/datasets_similarity/PCA_components_pairclassification.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/PCA_components_pairclassification.png
rename to outputs/results_analysis/datasets_similarity/PCA_components_pairclassification.png
diff --git a/analyses_outputs/datasets_similarity/PCA_components_reranking.png b/outputs/results_analysis/datasets_similarity/PCA_components_reranking.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/PCA_components_reranking.png
rename to outputs/results_analysis/datasets_similarity/PCA_components_reranking.png
diff --git a/analyses_outputs/datasets_similarity/PCA_components_retrieval.pdf b/outputs/results_analysis/datasets_similarity/PCA_components_retrieval.pdf
similarity index 100%
rename from analyses_outputs/datasets_similarity/PCA_components_retrieval.pdf
rename to outputs/results_analysis/datasets_similarity/PCA_components_retrieval.pdf
diff --git a/analyses_outputs/datasets_similarity/PCA_components_retrieval.png b/outputs/results_analysis/datasets_similarity/PCA_components_retrieval.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/PCA_components_retrieval.png
rename to outputs/results_analysis/datasets_similarity/PCA_components_retrieval.png
diff --git a/analyses_outputs/datasets_similarity/PCA_components_sts.png b/outputs/results_analysis/datasets_similarity/PCA_components_sts.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/PCA_components_sts.png
rename to outputs/results_analysis/datasets_similarity/PCA_components_sts.png
diff --git a/analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_all.png b/outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_all.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_all.png
rename to outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_all.png
diff --git a/analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_classification.png b/outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_classification.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_classification.png
rename to outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_classification.png
diff --git a/analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_clustering.png b/outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_clustering.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_clustering.png
rename to outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_clustering.png
diff --git a/analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_pairclassification.png b/outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_pairclassification.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_pairclassification.png
rename to outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_pairclassification.png
diff --git a/analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_reranking.png b/outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_reranking.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_reranking.png
rename to outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_reranking.png
diff --git a/analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_retrieval.png b/outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_retrieval.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_retrieval.png
rename to outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_retrieval.png
diff --git a/analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_sts.png b/outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_sts.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_sts.png
rename to outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_sts.png
diff --git a/analyses_outputs/datasets_similarity/cosim_all.pdf b/outputs/results_analysis/datasets_similarity/cosim_all.pdf
similarity index 100%
rename from analyses_outputs/datasets_similarity/cosim_all.pdf
rename to outputs/results_analysis/datasets_similarity/cosim_all.pdf
diff --git a/analyses_outputs/datasets_similarity/cosim_all.png b/outputs/results_analysis/datasets_similarity/cosim_all.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/cosim_all.png
rename to outputs/results_analysis/datasets_similarity/cosim_all.png
diff --git a/analyses_outputs/datasets_similarity/cosim_classification.png b/outputs/results_analysis/datasets_similarity/cosim_classification.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/cosim_classification.png
rename to outputs/results_analysis/datasets_similarity/cosim_classification.png
diff --git a/analyses_outputs/datasets_similarity/cosim_clustering.png b/outputs/results_analysis/datasets_similarity/cosim_clustering.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/cosim_clustering.png
rename to outputs/results_analysis/datasets_similarity/cosim_clustering.png
diff --git a/analyses_outputs/datasets_similarity/cosim_pairclassification.png b/outputs/results_analysis/datasets_similarity/cosim_pairclassification.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/cosim_pairclassification.png
rename to outputs/results_analysis/datasets_similarity/cosim_pairclassification.png
diff --git a/analyses_outputs/datasets_similarity/cosim_reranking.png b/outputs/results_analysis/datasets_similarity/cosim_reranking.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/cosim_reranking.png
rename to outputs/results_analysis/datasets_similarity/cosim_reranking.png
diff --git a/analyses_outputs/datasets_similarity/cosim_retrieval.pdf b/outputs/results_analysis/datasets_similarity/cosim_retrieval.pdf
similarity index 100%
rename from analyses_outputs/datasets_similarity/cosim_retrieval.pdf
rename to outputs/results_analysis/datasets_similarity/cosim_retrieval.pdf
diff --git a/analyses_outputs/datasets_similarity/cosim_retrieval.png b/outputs/results_analysis/datasets_similarity/cosim_retrieval.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/cosim_retrieval.png
rename to outputs/results_analysis/datasets_similarity/cosim_retrieval.png
diff --git a/analyses_outputs/datasets_similarity/cosim_sts.png b/outputs/results_analysis/datasets_similarity/cosim_sts.png
similarity index 100%
rename from analyses_outputs/datasets_similarity/cosim_sts.png
rename to outputs/results_analysis/datasets_similarity/cosim_sts.png
diff --git a/analyses_outputs/performance_vs_characteristics/correlation_heatmap.pdf b/outputs/results_analysis/performance_vs_characteristics/correlation_heatmap.pdf
similarity index 100%
rename from analyses_outputs/performance_vs_characteristics/correlation_heatmap.pdf
rename to outputs/results_analysis/performance_vs_characteristics/correlation_heatmap.pdf
diff --git a/analyses_outputs/performance_vs_characteristics/correlation_heatmap.png b/outputs/results_analysis/performance_vs_characteristics/correlation_heatmap.png
similarity index 100%
rename from analyses_outputs/performance_vs_characteristics/correlation_heatmap.png
rename to outputs/results_analysis/performance_vs_characteristics/correlation_heatmap.png
diff --git a/analyses_outputs/performance_vs_characteristics/correlation_matrix.csv b/outputs/results_analysis/performance_vs_characteristics/correlation_matrix.csv
similarity index 100%
rename from analyses_outputs/performance_vs_characteristics/correlation_matrix.csv
rename to outputs/results_analysis/performance_vs_characteristics/correlation_matrix.csv
diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_embedding_dim_avg.pdf b/outputs/results_analysis/performance_vs_characteristics/perf_vs_embedding_dim_avg.pdf
similarity index 100%
rename from analyses_outputs/performance_vs_characteristics/perf_vs_embedding_dim_avg.pdf
rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_embedding_dim_avg.pdf
diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_embedding_dim_avg.png b/outputs/results_analysis/performance_vs_characteristics/perf_vs_embedding_dim_avg.png
similarity index 100%
rename from analyses_outputs/performance_vs_characteristics/perf_vs_embedding_dim_avg.png
rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_embedding_dim_avg.png
diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_finetuned_avg.pdf b/outputs/results_analysis/performance_vs_characteristics/perf_vs_finetuned_avg.pdf
similarity index 100%
rename from analyses_outputs/performance_vs_characteristics/perf_vs_finetuned_avg.pdf
rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_finetuned_avg.pdf
diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_finetuned_avg.png b/outputs/results_analysis/performance_vs_characteristics/perf_vs_finetuned_avg.png
similarity index 100%
rename from analyses_outputs/performance_vs_characteristics/perf_vs_finetuned_avg.png
rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_finetuned_avg.png
diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_license_avg.pdf b/outputs/results_analysis/performance_vs_characteristics/perf_vs_license_avg.pdf
similarity index 100%
rename from analyses_outputs/performance_vs_characteristics/perf_vs_license_avg.pdf
rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_license_avg.pdf
diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_license_avg.png b/outputs/results_analysis/performance_vs_characteristics/perf_vs_license_avg.png
similarity index 100%
rename from analyses_outputs/performance_vs_characteristics/perf_vs_license_avg.png
rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_license_avg.png
diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_model_type_avg.pdf b/outputs/results_analysis/performance_vs_characteristics/perf_vs_model_type_avg.pdf
similarity index 100%
rename from analyses_outputs/performance_vs_characteristics/perf_vs_model_type_avg.pdf
rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_model_type_avg.pdf
diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_model_type_avg.png b/outputs/results_analysis/performance_vs_characteristics/perf_vs_model_type_avg.png
similarity index 100%
rename from analyses_outputs/performance_vs_characteristics/perf_vs_model_type_avg.png
rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_model_type_avg.png
diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_multilingual_or_french_avg.pdf b/outputs/results_analysis/performance_vs_characteristics/perf_vs_multilingual_or_french_avg.pdf
similarity index 100%
rename from analyses_outputs/performance_vs_characteristics/perf_vs_multilingual_or_french_avg.pdf
rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_multilingual_or_french_avg.pdf
diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_multilingual_or_french_avg.png b/outputs/results_analysis/performance_vs_characteristics/perf_vs_multilingual_or_french_avg.png
similarity index 100%
rename from analyses_outputs/performance_vs_characteristics/perf_vs_multilingual_or_french_avg.png
rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_multilingual_or_french_avg.png
diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_number_params_avg.pdf b/outputs/results_analysis/performance_vs_characteristics/perf_vs_number_params_avg.pdf
similarity index 100%
rename from analyses_outputs/performance_vs_characteristics/perf_vs_number_params_avg.pdf
rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_number_params_avg.pdf
diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_number_params_avg.png b/outputs/results_analysis/performance_vs_characteristics/perf_vs_number_params_avg.png
similarity index 100%
rename from analyses_outputs/performance_vs_characteristics/perf_vs_number_params_avg.png
rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_number_params_avg.png
diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_seq_len_avg.pdf b/outputs/results_analysis/performance_vs_characteristics/perf_vs_seq_len_avg.pdf
similarity index 100%
rename from analyses_outputs/performance_vs_characteristics/perf_vs_seq_len_avg.pdf
rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_seq_len_avg.pdf
diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_seq_len_avg.png b/outputs/results_analysis/performance_vs_characteristics/perf_vs_seq_len_avg.png
similarity index 100%
rename from analyses_outputs/performance_vs_characteristics/perf_vs_seq_len_avg.png
rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_seq_len_avg.png
diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_size_gb_avg.pdf b/outputs/results_analysis/performance_vs_characteristics/perf_vs_size_gb_avg.pdf
similarity index 100%
rename from analyses_outputs/performance_vs_characteristics/perf_vs_size_gb_avg.pdf
rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_size_gb_avg.pdf
diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_size_gb_avg.png b/outputs/results_analysis/performance_vs_characteristics/perf_vs_size_gb_avg.png
similarity index 100%
rename from analyses_outputs/performance_vs_characteristics/perf_vs_size_gb_avg.png
rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_size_gb_avg.png
diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_tuned_on_sentence_sim_avg.pdf b/outputs/results_analysis/performance_vs_characteristics/perf_vs_tuned_on_sentence_sim_avg.pdf
similarity index 100%
rename from analyses_outputs/performance_vs_characteristics/perf_vs_tuned_on_sentence_sim_avg.pdf
rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_tuned_on_sentence_sim_avg.pdf
diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_tuned_on_sentence_sim_avg.png b/outputs/results_analysis/performance_vs_characteristics/perf_vs_tuned_on_sentence_sim_avg.png
similarity index 100%
rename from analyses_outputs/performance_vs_characteristics/perf_vs_tuned_on_sentence_sim_avg.png
rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_tuned_on_sentence_sim_avg.png
diff --git a/analyses_outputs/results.xlsx b/outputs/results_analysis/results.xlsx
similarity index 100%
rename from analyses_outputs/results.xlsx
rename to outputs/results_analysis/results.xlsx
diff --git a/analyses_outputs/results_correlations/spearman_corr_heatmap_datasets.pdf b/outputs/results_analysis/results_correlations/spearman_corr_heatmap_datasets.pdf
similarity index 100%
rename from analyses_outputs/results_correlations/spearman_corr_heatmap_datasets.pdf
rename to outputs/results_analysis/results_correlations/spearman_corr_heatmap_datasets.pdf
diff --git a/analyses_outputs/results_correlations/spearman_corr_heatmap_datasets.png b/outputs/results_analysis/results_correlations/spearman_corr_heatmap_datasets.png
similarity index 100%
rename from analyses_outputs/results_correlations/spearman_corr_heatmap_datasets.png
rename to outputs/results_analysis/results_correlations/spearman_corr_heatmap_datasets.png
diff --git a/analyses_outputs/results_correlations/spearman_corr_heatmap_models.pdf b/outputs/results_analysis/results_correlations/spearman_corr_heatmap_models.pdf
similarity index 100%
rename from analyses_outputs/results_correlations/spearman_corr_heatmap_models.pdf
rename to outputs/results_analysis/results_correlations/spearman_corr_heatmap_models.pdf
diff --git a/analyses_outputs/results_correlations/spearman_corr_heatmap_models.png b/outputs/results_analysis/results_correlations/spearman_corr_heatmap_models.png
similarity index 100%
rename from analyses_outputs/results_correlations/spearman_corr_heatmap_models.png
rename to outputs/results_analysis/results_correlations/spearman_corr_heatmap_models.png
diff --git a/analyses_outputs/results_correlations/spearman_corr_matrix_datasets.csv b/outputs/results_analysis/results_correlations/spearman_corr_matrix_datasets.csv
similarity index 100%
rename from analyses_outputs/results_correlations/spearman_corr_matrix_datasets.csv
rename to outputs/results_analysis/results_correlations/spearman_corr_matrix_datasets.csv
diff --git a/analyses_outputs/results_correlations/spearman_corr_matrix_models.csv b/outputs/results_analysis/results_correlations/spearman_corr_matrix_models.csv
similarity index 100%
rename from analyses_outputs/results_correlations/spearman_corr_matrix_models.csv
rename to outputs/results_analysis/results_correlations/spearman_corr_matrix_models.csv
diff --git a/analyses_outputs/statistical_tests/conover_friedman.pdf b/outputs/results_analysis/statistical_tests/conover_friedman.pdf
similarity index 100%
rename from analyses_outputs/statistical_tests/conover_friedman.pdf
rename to outputs/results_analysis/statistical_tests/conover_friedman.pdf
diff --git a/analyses_outputs/statistical_tests/conover_friedman.png b/outputs/results_analysis/statistical_tests/conover_friedman.png
similarity index 100%
rename from analyses_outputs/statistical_tests/conover_friedman.png
rename to outputs/results_analysis/statistical_tests/conover_friedman.png
diff --git a/analyses_outputs/statistical_tests/critical_difference_diagram.pdf b/outputs/results_analysis/statistical_tests/critical_difference_diagram.pdf
similarity index 100%
rename from analyses_outputs/statistical_tests/critical_difference_diagram.pdf
rename to outputs/results_analysis/statistical_tests/critical_difference_diagram.pdf
diff --git a/analyses_outputs/statistical_tests/critical_difference_diagram.png b/outputs/results_analysis/statistical_tests/critical_difference_diagram.png
similarity index 100%
rename from analyses_outputs/statistical_tests/critical_difference_diagram.png
rename to outputs/results_analysis/statistical_tests/critical_difference_diagram.png
diff --git a/paper/mteb.pdf b/paper/mteb.pdf
deleted file mode 100644
index 8f8b1e80..00000000
Binary files a/paper/mteb.pdf and /dev/null differ
diff --git a/paper/mteb.zip b/paper/mteb.zip
deleted file mode 100644
index 64303a08..00000000
Binary files a/paper/mteb.zip and /dev/null differ
diff --git a/plotstables/MTEB_EACL2023_POSTER.pdf b/plotstables/MTEB_EACL2023_POSTER.pdf
deleted file mode 100644
index 1cc2ea1d..00000000
Binary files a/plotstables/MTEB_EACL2023_POSTER.pdf and /dev/null differ
diff --git a/plotstables/MTEB_EACL2023_PRES.pdf b/plotstables/MTEB_EACL2023_PRES.pdf
deleted file mode 100644
index f608d7cb..00000000
Binary files a/plotstables/MTEB_EACL2023_PRES.pdf and /dev/null differ
diff --git a/plotstables/all_en.txt b/plotstables/all_en.txt
deleted file mode 100644
index d32c23d3..00000000
--- a/plotstables/all_en.txt
+++ /dev/null
@@ -1,58 +0,0 @@
-Dataset & Language & Glove & Komninos & BERT & SimCSE-BERT-unsup & SimCSE-BERT-sup & coCondenser-msmarco & Contriever & SPECTER & LaBSE & LASER2 & MiniLM-L6 & MiniLM-L12 & MiniLM-L12-multilingual & MPNet & MPNet-multilingual & Ada Similarity & SGPT-125M-nli & SGPT-5.8B-nli & SGPT-125M-msmarco & SGPT-1.3B-msmarco & SGPT-2.7B-msmarco & SGPT-5.8B-msmarco & SGPT-BLOOM-7.1B-msmarco & GTR-Base & GTR-Large & GTR-XL & GTR-XXL & ST5-Base & ST5-Large & ST5-XL & ST5-XXL \\
-AmazonCounterfactualClassification & en & 56.91 & 60.54 & 74.25 & 67.09 & 75.75 & 64.06 & 72.19 & 58.7 & 75.93 & 76.84 & 64.15 & 65.28 & 71.57 & 65.27 & 75.81 & 76.4 & 65.88 & 74.07 & 61.24 & 65.21 & 67.57 & 69.22 & 68.06 & 69.33 & 70.03 & 68.6 & 67.3 & 75.82 & 75.51 & 76.01 & 77.07 \\
-AmazonPolarityClassification & en & 60.32 & 59.59 & 71.33 & 74.48 & 82.47 & 66.88 & 68.63 & 57.77 & 68.95 & 61.01 & 62.58 & 62.98 & 69.21 & 67.13 & 76.41 & 92.83 & 74.94 & 82.31 & 65.4 & 73.21 & 71.44 & 71.26 & 68.97 & 67.82 & 73.92 & 74.58 & 75.05 & 85.12 & 92.87 & 93.17 & 92.79 \\
-AmazonReviewsClassification & en & 29.67 & 31.01 & 33.56 & 33.85 & 39.6 & 34.85 & 37.42 & 26.26 & 35.8 & 28.71 & 31.79 & 30.79 & 35.11 & 31.92 & 38.51 & 47.45 & 35.1 & 41.58 & 31.17 & 34.96 & 35.75 & 39.19 & 33.86 & 38.48 & 37.21 & 38.2 & 37.3 & 44.94 & 47.12 & 48.18 & 48.93 \\
-Banking77Classification & en & 67.69 & 67.05 & 63.41 & 73.55 & 75.76 & 82.35 & 80.02 & 66.66 & 69.85 & 57.76 & 79.75 & 80.4 & 79.77 & 81.86 & 81.07 & 68.04 & 74.68 & 81.74 & 77.7 & 82.06 & 83.22 & 84.49 & 84.33 & 79.26 & 81.21 & 82.22 & 82.32 & 76.48 & 78.46 & 80.88 & 82.31 \\
-EmotionClassification & en & 36.93 & 33.18 & 35.28 & 42.22 & 44.81 & 41.91 & 44.77 & 24.82 & 37.22 & 24.83 & 38.43 & 41.17 & 42.37 & 39.73 & 45.84 & 50.32 & 42.23 & 49.92 & 39.08 & 46.39 & 49.21 & 49.66 & 44.87 & 42.2 & 46.32 & 45.55 & 43.19 & 51.36 & 51.73 & 51.95 & 48.57 \\
-ImdbClassification & en & 62.57 & 63.98 & 65.35 & 69.63 & 73.53 & 60.17 & 67.04 & 56.35 & 62.04 & 57.58 & 60.66 & 59.76 & 60.46 & 70.72 & 64.57 & 89.38 & 62.9 & 74.33 & 58.67 & 64.05 & 63.53 & 66.64 & 61.77 & 65.99 & 70.86 & 68.15 & 70.8 & 77.34 & 87.01 & 87.54 & 90.23 \\
-MassiveIntentClassification & en & 56.19 & 57.21 & 59.88 & 59.84 & 65.95 & 70.4 & 67.78 & 51.73 & 61.46 & 47.91 & 67.4 & 67.15 & 66.84 & 69.57 & 69.32 & 65.17 & 58.08 & 70.0 & 61.41 & 68.65 & 69.01 & 70.39 & 69.67 & 67.05 & 70.06 & 70.23 & 70.61 & 69.74 & 71.78 & 72.09 & 73.44 \\
-MassiveScenarioClassification & en & 66.03 & 66.11 & 64.28 & 66.25 & 70.78 & 73.73 & 76.0 & 58.58 & 66.41 & 55.92 & 75.76 & 74.58 & 71.51 & 76.01 & 75.35 & 67.67 & 66.34 & 75.03 & 69.74 & 76.04 & 75.9 & 76.28 & 75.34 & 75.4 & 75.49 & 75.94 & 77.77 & 72.32 & 73.16 & 73.26 & 74.82 \\
-MTOPDomainClassification & en & 79.11 & 78.57 & 82.63 & 81.71 & 84.29 & 91.34 & 93.18 & 74.53 & 86.06 & 75.36 & 91.56 & 91.9 & 87.06 & 92.08 & 89.24 & 89.89 & 81.52 & 89.64 & 86.96 & 92.08 & 92.56 & 93.47 & 93.68 & 92.42 & 94.01 & 93.6 & 93.84 & 90.34 & 90.99 & 90.73 & 92.49 \\
-MTOPIntentClassification & en & 55.85 & 57.07 & 68.14 & 59.23 & 63.14 & 71.07 & 69.31 & 50.05 & 63.03 & 49.47 & 62.18 & 62.84 & 65.52 & 70.21 & 68.69 & 64.8 & 58.24 & 70.68 & 62.25 & 71.19 & 71.85 & 72.42 & 71.34 & 62.44 & 63.86 & 65.93 & 67.71 & 63.32 & 64.98 & 68.15 & 68.33 \\
-ToxicConversationsClassification & en & 65.4 & 67.76 & 70.0 & 68.82 & 72.04 & 64.01 & 67.77 & 57.44 & 66.9 & 54.05 & 66.99 & 67.47 & 66.07 & 60.86 & 71.02 & 70.0 & 62.79 & 69.93 & 62.66 & 68.73 & 68.84 & 67.71 & 66.55 & 66.6 & 68.65 & 67.56 & 68.48 & 68.2 & 71.73 & 70.95 & 70.04 \\
-TweetSentimentExtractionClassification & en & 50.8 & 49.68 & 51.81 & 53.36 & 59.73 & 55.74 & 56.1 & 45.52 & 58.82 & 48.73 & 55.41 & 54.25 & 56.12 & 55.46 & 59.03 & 63.35 & 54.82 & 62.44 & 52.41 & 55.67 & 56.69 & 56.85 & 55.85 & 56.02 & 54.09 & 54.77 & 54.54 & 62.71 & 62.33 & 61.21 & 62.01 \\
-ArxivClusteringP2P & en & 32.56 & 34.73 & 35.19 & 32.61 & 35.18 & 36.94 & 42.61 & 44.75 & 32.13 & 17.77 & 46.55 & 46.07 & 38.33 & 48.38 & 37.78 & 41.49 & 34.74 & 40.55 & 39.71 & 43.38 & 44.72 & 45.59 & 44.59 & 35.49 & 37.5 & 37.9 & 37.9 & 39.28 & 41.62 & 41.62 & 42.89 \\
-ArxivClusteringS2S & en & 23.14 & 26.01 & 27.51 & 24.68 & 27.54 & 29.03 & 32.32 & 35.27 & 22.05 & 12.39 & 37.86 & 37.5 & 31.55 & 39.72 & 31.68 & 28.47 & 24.68 & 32.49 & 28.24 & 33.71 & 35.08 & 38.86 & 38.03 & 27.18 & 30.55 & 30.45 & 32.39 & 27.26 & 29.44 & 31.17 & 33.47 \\
-BiorxivClusteringP2P & en & 29.27 & 29.76 & 30.12 & 24.9 & 30.15 & 32.35 & 34.97 & 39.52 & 29.84 & 12.4 & 38.48 & 36.99 & 33.49 & 39.62 & 33.09 & 36.86 & 28.93 & 33.59 & 33.63 & 35.06 & 34.41 & 36.55 & 36.03 & 27.66 & 29.59 & 30.52 & 30.48 & 33.99 & 35.99 & 36.43 & 36.53 \\
-BiorxivClusteringS2S & en & 19.18 & 20.71 & 24.77 & 19.55 & 24.67 & 28.16 & 29.08 & 34.53 & 20.57 & 8.83 & 33.17 & 33.21 & 29.44 & 35.02 & 29.6 & 27.55 & 23.08 & 29.13 & 27.04 & 30.71 & 30.53 & 33.7 & 32.48 & 23.25 & 25.72 & 26.06 & 27.5 & 22.92 & 24.02 & 26.47 & 28.66 \\
-MedrxivClusteringP2P & en & 26.12 & 26.65 & 26.09 & 23.6 & 26.25 & 30.23 & 31.19 & 35.04 & 30.13 & 17.91 & 34.41 & 34.25 & 31.52 & 35.58 & 31.96 & 31.09 & 28.3 & 30.33 & 31.37 & 32.08 & 31.35 & 31.51 & 31.05 & 27.57 & 28.72 & 28.69 & 29.12 & 33.2 & 32.4 & 32.3 & 32.09 \\
-MedrxivClusteringS2S & en & 20.38 & 21.5 & 23.6 & 21.97 & 24.12 & 27.01 & 27.27 & 31.66 & 24.82 & 16.63 & 32.29 & 32.24 & 30.87 & 32.87 & 31.7 & 26.5 & 24.93 & 28.02 & 26.87 & 29.45 & 28.77 & 28.76 & 29.26 & 25.13 & 27.39 & 26.69 & 27.56 & 26.13 & 26.33 & 26.93 & 26.82 \\
-RedditClustering & en & 28.46 & 28.84 & 27.24 & 32.18 & 40.23 & 48.04 & 54.89 & 24.13 & 28.79 & 9.96 & 50.67 & 51.18 & 42.02 & 54.82 & 45.24 & 42.47 & 33.76 & 42.17 & 40.23 & 48.23 & 46.47 & 40.45 & 35.53 & 56.13 & 61.69 & 61.34 & 64.13 & 52.93 & 54.53 & 57.03 & 58.99 \\
-RedditClusteringP2P & en & 35.82 & 7.37 & 43.32 & 45.14 & 47.74 & 53.53 & 57.58 & 35.06 & 49.14 & 26.42 & 54.15 & 54.8 & 50.73 & 56.77 & 51.31 & 58.1 & 41.01 & 48.02 & 49.09 & 53.18 & 54.17 & 55.75 & 54.52 & 58.53 & 61.67 & 61.11 & 62.84 & 59.67 & 62.5 & 62.34 & 64.46 \\
-StackExchangeClustering & en & 35.8 & 39.04 & 43.58 & 43.07 & 47.55 & 59.54 & 63.15 & 39.01 & 35.43 & 15.79 & 53.36 & 53.05 & 49.6 & 53.8 & 52.98 & 53.52 & 44.59 & 54.13 & 52.74 & 60.86 & 59.19 & 59.21 & 55.13 & 64.21 & 69.93 & 69.95 & 71.43 & 63.13 & 65.11 & 67.13 & 70.78 \\
-StackExchangeClusteringP2P & en & 28.51 & 30.23 & 26.55 & 28.5 & 29.45 & 30.48 & 32.25 & 31.46 & 28.83 & 18.63 & 38.0 & 33.13 & 31.69 & 34.28 & 32.94 & 30.43 & 28.23 & 31.12 & 32.66 & 32.36 & 32.57 & 33.95 & 34.31 & 33.01 & 33.21 & 32.73 & 32.85 & 35.68 & 36.86 & 34.79 & 35.25 \\
-TwentyNewsgroupsClustering & en & 25.83 & 27.42 & 23.35 & 23.21 & 34.86 & 38.68 & 46.82 & 24.22 & 23.28 & 11.38 & 46.86 & 47.47 & 39.28 & 49.74 & 44.1 & 36.26 & 28.24 & 37.2 & 32.13 & 40.06 & 40.89 & 39.46 & 37.28 & 46.72 & 51.64 & 51.15 & 50.44 & 48.1 & 49.33 & 49.53 & 50.93 \\
-SprintDuplicateQuestions & en & 86.96 & 85.55 & 36.81 & 69.41 & 69.39 & 96.09 & 95.55 & 71.63 & 89.26 & 65.54 & 94.55 & 92.45 & 89.46 & 90.15 & 90.55 & 77.85 & 77.73 & 80.54 & 89.89 & 92.58 & 93.47 & 93.84 & 94.93 & 94.55 & 95.05 & 95.45 & 95.68 & 91.23 & 89.01 & 91.44 & 88.89 \\
-TwitterSemEval2015 & en & 48.45 & 53.85 & 55.9 & 60.21 & 67.75 & 65.95 & 66.85 & 43.25 & 62.78 & 59.57 & 67.86 & 70.02 & 62.06 & 73.85 & 66.75 & 69.04 & 57.09 & 66.0 & 54.75 & 62.37 & 63.68 & 66.87 & 65.31 & 72.23 & 76.03 & 77.81 & 77.54 & 78.25 & 79.75 & 80.89 & 80.28 \\
-TwitterURLCorpus & en & 77.35 & 79.41 & 76.29 & 81.37 & 83.89 & 83.17 & 85.21 & 69.22 & 84.58 & 81.47 & 84.7 & 84.77 & 83.83 & 85.11 & 85.14 & 83.69 & 80.51 & 84.54 & 81.06 & 83.79 & 84.8 & 85.29 & 85.46 & 84.77 & 84.89 & 85.14 & 85.13 & 86.05 & 86.14 & 85.86 & 86.01 \\
-AskUbuntuDupQuestions & en & 49.57 & 50.88 & 45.84 & 51.57 & 51.8 & 58.99 & 56.69 & 50.07 & 52.75 & 48.99 & 63.48 & 64.06 & 60.49 & 65.85 & 60.16 & 53.49 & 52.63 & 55.9 & 55.84 & 58.13 & 59.63 & 61.63 & 59.97 & 60.86 & 61.64 & 63.08 & 63.23 & 59.73 & 61.51 & 62.86 & 66.16 \\
-MindSmallReranking & en & 27.01 & 28.92 & 28.37 & 28.62 & 29.3 & 27.13 & 31.58 & 24.8 & 29.81 & 24.79 & 30.8 & 31.02 & 30.37 & 30.97 & 30.15 & 30.71 & 29.27 & 31.11 & 30.4 & 31.34 & 31.72 & 32.29 & 31.79 & 31.33 & 31.84 & 31.5 & 31.93 & 30.2 & 30.27 & 29.77 & 30.6 \\
-SciDocsRR & en & 62.56 & 63.55 & 64.94 & 66.33 & 70.14 & 72.78 & 76.51 & 81.31 & 68.72 & 54.99 & 87.12 & 87.2 & 77.78 & 88.65 & 78.09 & 71.04 & 68.36 & 77.54 & 71.34 & 77.21 & 77.72 & 80.79 & 79.77 & 73.71 & 76.39 & 76.49 & 77.96 & 73.96 & 74.88 & 75.16 & 76.09 \\
-StackOverflowDupQuestions & en & 34.03 & 35.65 & 34.62 & 39.35 & 38.9 & 48.48 & 47.78 & 36.22 & 42.42 & 36.98 & 50.76 & 51.47 & 45.85 & 51.98 & 46.79 & 40.85 & 39.97 & 44.77 & 44.74 & 49.32 & 49.61 & 51.53 & 51.07 & 51.01 & 51.58 & 52.79 & 53.5 & 48.46 & 49.34 & 51.05 & 52.85 \\
-ArguAna & en & 36.3 & 30.96 & 28.29 & 38.34 & 38.33 & 45.15 & 48.32 & 32.67 & 34.18 & 12.86 & 50.17 & 47.13 & 44.88 & 46.52 & 48.91 & & 31.04 & 35.07 & 45.42 & 49.68 & 50.49 & 51.38 & 47.28 & 50.83 & 52.09 & 52.81 & 53.77 & 44.85 & 39.27 & 39.4 & 39.85 \\
-ClimateFEVER & en & 14.44 & 14.87 & 5.41 & 11.8 & 11.98 & 16.96 & 24.79 & 6.86 & 3.83 & 0.36 & 20.27 & 21.57 & 18.49 & 21.97 & 15.27 & & 11.01 & 17.57 & 21.86 & 26.6 & 27.11 & 30.46 & 29.39 & 24.88 & 26.9 & 27.01 & 27.21 & 10.37 & 11.36 & 10.61 & 14.63 \\
-CQADupstackRetrieval & en & 15.47 & 16.79 & 5.51 & 13.22 & 14.5 & 27.72 & 33.67 & 14.6 & 18.75 & 4.12 & 41.32 & 42.53 & 30.71 & 44.96 & 31.32 & & 20.29 & 29.98 & 27.25 & 33.33 & 36.53 & 39.4 & 39.62 & 34.55 & 36.62 & 37.35 & 38.56 & 35.23 & 38.96 & 40.78 & 44.65 \\
-DBPedia & en & 18.29 & 15.88 & 4.13 & 15.04 & 19.73 & 27.86 & 38.1 & 4.14 & 15.57 & 1.53 & 32.33 & 33.36 & 22.63 & 32.09 & 26.22 & & 10.87 & 26.1 & 22.72 & 31.51 & 34.7 & 39.87 & 39.03 & 35.24 & 39.55 & 39.74 & 41.28 & 27.77 & 31.55 & 33.65 & 39.19 \\
-FEVER & en & 14.99 & 15.56 & 3.3 & 21.05 & 20.41 & 45.68 & 59.29 & 5.45 & 12.17 & 0.77 & 51.93 & 55.91 & 52.66 & 50.86 & 56.76 & & 18.4 & 38.64 & 60.45 & 68.12 & 72.73 & 78.24 & 73.97 & 68.93 & 72.66 & 72.18 & 74.08 & 26.16 & 36.21 & 36.12 & 51.2 \\
-FiQA2018 & en & 10.09 & 10.49 & 2.19 & 9.84 & 10.41 & 15.62 & 27.42 & 5.64 & 7.0 & 1.73 & 36.87 & 37.27 & 20.33 & 49.96 & 22.96 & & 8.94 & 18.59 & 21.12 & 29.99 & 33.29 & 37.2 & 35.84 & 35.15 & 42.79 & 44.19 & 46.78 & 34.83 & 43.55 & 44.71 & 46.68 \\
-HotpotQA & en & 19.18 & 20.77 & 8.26 & 19.75 & 22.89 & 35.61 & 56.81 & 5.46 & 18.75 & 5.5 & 46.51 & 44.59 & 30.01 & 39.29 & 37.03 & & 17.73 & 33.99 & 40.88 & 49.93 & 52.84 & 59.26 & 57.26 & 54.93 & 57.85 & 58.91 & 59.67 & 33.2 & 33.95 & 37.17 & 42.14 \\
-MSMARCO & en & 9.6 & 9.75 & 1.91 & 9.35 & 11.0 & 29.57 & 36.77 & 5.58 & 7.6 & 1.09 & 36.54 & 39.03 & 23.72 & 39.75 & 26.6 & & 6.27 & 15.83 & 27.98 & 36.05 & 38.83 & 39.91 & 41.12 & 41.16 & 42.73 & 43.52 & 44.05 & 20.71 & 23.96 & 25.17 & 27.68 \\
-NFCorpus & en & 13.87 & 11.79 & 4.3 & 9.88 & 12.42 & 22.29 & 31.31 & 0.84 & 16.54 & 2.44 & 31.59 & 32.25 & 23.45 & 33.29 & 25.49 & & 11.8 & 28.26 & 22.79 & 32.08 & 33.89 & 36.21 & 35.78 & 30.22 & 32.63 & 33.34 & 34.18 & 28.64 & 31.1 & 33.18 & 35.08 \\
-NQ & en & 12.87 & 12.75 & 2.61 & 11.69 & 16.08 & 29.85 & 41.83 & 5.99 & 8.42 & 0.64 & 43.87 & 46.47 & 29.8 & 50.45 & 33.6 & & 7.63 & 24.63 & 29.73 & 42.94 & 46.7 & 52.41 & 53.15 & 50.47 & 55.09 & 56.16 & 57.24 & 36.32 & 42.02 & 46.29 & 52.87 \\
-QuoraRetrieval & en & 71.32 & 71.58 & 61.03 & 78.03 & 79.62 & 86.51 & 86.72 & 64.65 & 77.03 & 71.14 & 87.56 & 87.75 & 86.55 & 87.46 & 86.41 & & 78.96 & 84.68 & 72.98 & 85.28 & 85.6 & 84.58 & 74.71 & 87.98 & 88.47 & 88.91 & 89.09 & 85.49 & 85.73 & 85.85 & 85.96 \\
-SCIDOCS & en & 8.04 & 8.47 & 2.81 & 5.5 & 7.53 & 10.13 & 17.12 & 0.0 & 5.63 & 0.78 & 21.64 & 21.82 & 0.03 & 23.77 & 13.96 & & 7.13 & 13.55 & 12.21 & 16.18 & 16.57 & 19.87 & 18.62 & 14.0 & 15.51 & 15.71 & 15.88 & 14.16 & 15.38 & 15.97 & 17.17 \\
-SciFact & en & 29.58 & 29.53 & 13.34 & 25.72 & 29.59 & 52.31 & 65.51 & 47.88 & 38.2 & 4.04 & 64.51 & 62.64 & 48.37 & 65.57 & 50.3 & & 31.79 & 46.66 & 56.9 & 68.29 & 70.17 & 74.7 & 72.11 & 59.74 & 63.42 & 64.2 & 66.77 & 45.76 & 49.91 & 50.91 & 55.38 \\
-Touche2020 & en & 13.99 & 13.17 & 0.97 & 8.9 & 9.89 & 8.57 & 15.79 & 8.46 & 4.88 & 1.06 & 16.9 & 17.22 & 16.06 & 19.93 & 17.4 & & 12.27 & 16.18 & 22.97 & 24.45 & 23.44 & 25.43 & 23.98 & 25.89 & 28.29 & 25.26 & 26.76 & 20.3 & 21.63 & 22.51 & 21.65 \\
-TRECCOVID & en & 36.22 & 35.92 & 14.74 & 26.2 & 22.93 & 40.54 & 44.77 & 29.91 & 16.34 & 10.97 & 47.25 & 50.82 & 39.12 & 51.33 & 37.87 & & 39.31 & 55.35 & 70.3 & 72.98 & 75.17 & 84.88 & 81.37 & 56.05 & 56.68 & 60.09 & 51.9 & 40.7 & 46.11 & 54.77 & 59.48 \\
-BIOSSES & en & 44.93 & 50.25 & 54.7 & 72.31 & 68.38 & 77.32 & 83.32 & 64.95 & 78.7 & 62.01 & 81.64 & 83.57 & 74.18 & 80.43 & 76.27 & 78.04 & 70.93 & 79.5 & 75.21 & 83.02 & 84.84 & 86.25 & 85.31 & 79.0 & 84.86 & 78.94 & 81.91 & 75.89 & 78.93 & 73.12 & 80.43 \\
-SICK-R & en & 55.43 & 55.49 & 58.65 & 72.24 & 80.77 & 72.0 & 70.2 & 56.39 & 69.99 & 62.86 & 77.58 & 79.32 & 79.61 & 80.59 & 79.62 & 77.48 & 74.57 & 79.59 & 65.93 & 67.23 & 68.2 & 69.63 & 69.82 & 71.45 & 73.39 & 73.63 & 74.29 & 80.18 & 80.34 & 79.98 & 80.47 \\
-STS12 & en & 54.64 & 53.51 & 30.87 & 66.05 & 75.3 & 68.19 & 64.34 & 62.49 & 65.08 & 62.6 & 72.37 & 73.08 & 76.02 & 72.63 & 77.9 & 72.3 & 69.17 & 74.29 & 66.53 & 66.59 & 66.99 & 67.5 & 69.66 & 68.59 & 70.33 & 69.11 & 70.12 & 78.05 & 79.11 & 79.02 & 78.85 \\
-STS13 & en & 69.16 & 70.8 & 59.89 & 81.49 & 84.67 & 80.4 & 80.03 & 58.7 & 67.98 & 59.62 & 80.6 & 82.13 & 80.7 & 83.48 & 85.11 & 81.49 & 77.23 & 85.35 & 76.17 & 77.33 & 77.58 & 79.16 & 79.67 & 79.09 & 82.19 & 81.82 & 82.72 & 85.85 & 87.33 & 88.8 & 88.94 \\
-STS14 & en & 60.81 & 63.56 & 47.73 & 73.61 & 80.19 & 74.02 & 74.51 & 54.87 & 64.03 & 57.03 & 75.59 & 76.73 & 78.85 & 78.0 & 80.81 & 74.74 & 70.99 & 79.21 & 69.05 & 71.83 & 72.78 & 74.46 & 74.61 & 74.64 & 77.16 & 77.07 & 78.24 & 82.19 & 83.17 & 84.33 & 84.86 \\
-STS15 & en & 72.31 & 74.08 & 60.29 & 79.72 & 85.4 & 82.57 & 83.3 & 62.54 & 76.59 & 71.57 & 85.39 & 85.58 & 85.84 & 85.66 & 87.48 & 84.28 & 79.74 & 85.52 & 79.24 & 80.66 & 82.62 & 84.47 & 83.81 & 84.85 & 86.31 & 86.01 & 86.26 & 87.46 & 88.28 & 88.89 & 89.32 \\
-STS16 & en & 65.34 & 64.6 & 63.73 & 78.12 & 80.82 & 79.78 & 79.67 & 64.27 & 72.98 & 70.75 & 78.99 & 80.23 & 81.05 & 80.03 & 83.2 & 82.06 & 77.93 & 82.54 & 76.07 & 78.91 & 80.1 & 80.96 & 80.4 & 81.57 & 81.85 & 82.23 & 81.61 & 84.03 & 84.36 & 85.31 & 84.67 \\
-STS17 & en-en & 77.95 & 76.91 & 64.1 & 83.58 & 89.44 & 85.94 & 86.32 & 69.63 & 79.45 & 76.73 & 87.59 & 88.63 & 86.87 & 90.6 & 86.99 & 87.08 & 87.33 & 90.44 & 84.95 & 86.99 & 87.25 & 87.78 & 87.07 & 85.8 & 83.93 & 84.9 & 85.18 & 89.57 & 88.99 & 88.91 & 89.46 \\
-STS22 & en & 56.35 & 53.89 & 56.37 & 59.65 & 61.96 & 67.54 & 64.64 & 55.06 & 60.97 & 39.75 & 67.21 & 65.67 & 61.72 & 67.95 & 63.06 & 64.71 & 59.64 & 63.2 & 65.66 & 67.3 & 68.75 & 69.35 & 66.13 & 66.17 & 64.3 & 66.61 & 65.76 & 62.66 & 62.39 & 64.32 & 65.33 \\
-STSBenchmark & en & 61.54 & 61.55 & 47.29 & 76.52 & 84.25 & 76.97 & 78.81 & 61.26 & 72.25 & 69.77 & 82.03 & 83.09 & 84.42 & 83.42 & 86.82 & 83.78 & 79.54 & 85.67 & 75.34 & 77.59 & 79.21 & 81.39 & 80.9 & 79.58 & 77.6 & 77.65 & 77.73 & 85.52 & 85.36 & 83.93 & 84.01 \\
-SummEval & en & 28.87 & 30.49 & 29.82 & 31.15 & 23.31 & 29.5 & 30.36 & 27.66 & 31.05 & 26.8 & 30.81 & 27.9 & 30.67 & 27.49 & 31.57 & 26.94 & 30.26 & 30.38 & 28.9 & 25.44 & 27.87 & 24.75 & 24.99 & 29.67 & 29.5 & 30.21 & 30.64 & 31.39 & 29.64 & 29.91 & 30.08 \\
-Average & en & 41.97 & 42.06 & 38.33 & 45.45 & 48.72 & 52.35 & 56.0 & 40.28 & 45.21 & 34.95 & 56.26 & 56.53 & 52.44 & 57.78 & 54.71 & & 45.97 & 53.74 & 51.23 & 56.11 & 57.12 & 58.81 & 57.44 & 56.19 & 58.28 & 58.42 & 58.97 & 55.27 & 57.06 & 57.87 & 59.51 \\
diff --git a/plotstables/avg_table.txt b/plotstables/avg_table.txt
deleted file mode 100644
index 1885217e..00000000
--- a/plotstables/avg_table.txt
+++ /dev/null
@@ -1,34 +0,0 @@
-Task ($\ rightarrow$) & Class. & Clust. & PairClass. & Rerank. & Retr. & STS & Summ. & Avg. \\
-Num. Datasets ($\ rightarrow$) & 12 & 11 & 3 & 4 & 15 & 10 & 1 & 56 \\
-Model ($\downarrow$) & Class. & Clust. & PairClass. & Rerank. & Retr. & STS & Summ. & Avg. \\
-Glove & 57.29 & 27.73 & 70.92 & 43.29 & 21.62 & 61.85 & 28.87 & 41.97 \\
-Komninos & 57.65 & 26.57 & 72.94 & 44.75 & 21.22 & 62.47 & 30.49 & 42.06 \\
-LASER2 & 53.18 & 15.28 & 68.86 & 41.44 & 7.93 & 63.27 & 26.8 & 34.95 \\
-LaBSE & 62.71 & 29.55 & 78.87 & 48.42 & 18.99 & 70.8 & 31.05 & 45.21 \\
-BERT & 61.66 & 30.12 & 56.33 & 43.44 & 10.59 & 54.36 & 29.82 & 38.33 \\
-coCondenser-msmarco & 64.71 & 37.64 & 81.74 & 51.84 & 32.96 & 76.47 & 29.5 & 52.35 \\
-SPECTER & 52.37 & 34.06 & 61.37 & 48.1 & 15.88 & 61.02 & 27.66 & 40.28 \\
-SimCSE-BERT-unsup & 62.5 & 29.04 & 70.33 & 46.47 & 20.29 & 74.33 & 31.15 & 45.45 \\
-SimCSE-BERT-sup & 67.32 & 33.43 & 73.68 & 47.54 & 21.82 & 79.12 & 23.31 & 48.72 \\
-MiniLM-L6 & 63.06 & 42.35 & 82.37 & 58.04 & 41.95 & 78.9 & 30.81 & 56.26 \\
-MiniLM-L12 & 63.21 & 41.81 & 82.41 & 58.44 & 42.69 & 79.8 & 27.9 & 56.53 \\
-MiniLM-L12-multilingual & 64.3 & 37.14 & 78.45 & 53.62 & 32.45 & 78.92 & 30.67 & 52.44 \\
-MPNet & 65.07 & 43.69 & 83.04 & 59.36 & 43.81 & 80.28 & 27.49 & 57.78 \\
-MPNet-multilingual & 67.91 & 38.4 & 80.81 & 53.8 & 35.34 & 80.73 & 31.57 & 54.71 \\
-Contriever & 66.68 & 41.1 & 82.53 & 53.14 & 41.88 & 76.51 & 30.36 & 56.0 \\
-Ada Similarity & 70.44 & 37.52 & 76.86 & 49.02 & & 78.6 & 26.94 & \\
-SGPT-125M-nli & 61.46 & 30.95 & 71.78 & 47.56 & 20.9 & 74.71 & 30.26 & 45.97 \\
-SGPT-5.8B-nli & 70.14 & 36.98 & 77.03 & 52.33 & 32.34 & 80.53 & 30.38 & 53.74 \\
-SGPT-125M-msmarco & 60.72 & 35.79 & 75.23 & 50.58 & 37.04 & 73.41 & 28.9 & 51.23 \\
-SGPT-1.3B-msmarco & 66.52 & 39.92 & 79.58 & 54.0 & 44.49 & 75.74 & 25.44 & 56.11 \\
-SGPT-2.7B-msmarco & 67.13 & 39.83 & 80.65 & 54.67 & 46.54 & 76.83 & 27.87 & 57.12 \\
-SGPT-5.8B-msmarco & 68.13 & 40.35 & 82.0 & 56.56 & 50.25 & 78.1 & 24.75 & 58.81 \\
-SGPT-BLOOM-7.1B-msmarco & 66.19 & 38.93 & 81.9 & 55.65 & 48.21 & 77.74 & 24.99 & 57.44 \\
-GTR-Base & 65.25 & 38.63 & 83.85 & 54.23 & 44.67 & 77.07 & 29.67 & 56.19 \\
-GTR-Large & 67.14 & 41.6 & 85.33 & 55.36 & 47.42 & 78.19 & 29.5 & 58.28 \\
-GTR-XL & 67.11 & 41.51 & 86.13 & 55.96 & 47.96 & 77.8 & 30.21 & 58.42 \\
-GTR-XXL & 67.41 & 42.42 & 86.12 & 56.65 & 48.48 & 78.38 & 30.64 & 58.97 \\
-ST5-Base & 69.81 & 40.21 & 85.17 & 53.09 & 33.63 & 81.14 & 31.39 & 55.27 \\
-ST5-Large & 72.31 & 41.65 & 84.97 & 54.0 & 36.71 & 81.83 & 29.64 & 57.06 \\
-ST5-XL & 72.84 & 42.34 & 86.06 & 54.71 & 38.47 & 81.66 & 29.91 & 57.87 \\
-ST5-XXL & 73.42 & 43.71 & 85.06 & 56.43 & 42.24 & 82.63 & 30.08 & 59.51 \\
diff --git a/plotstables/benchmark.pdf b/plotstables/benchmark.pdf
deleted file mode 100644
index 4879780c..00000000
Binary files a/plotstables/benchmark.pdf and /dev/null differ
diff --git a/plotstables/benchmark.png b/plotstables/benchmark.png
deleted file mode 100644
index babd9a2b..00000000
Binary files a/plotstables/benchmark.png and /dev/null differ
diff --git a/plotstables/benchmark_gpu.json b/plotstables/benchmark_gpu.json
deleted file mode 100644
index e8f840fa..00000000
--- a/plotstables/benchmark_gpu.json
+++ /dev/null
@@ -1,134 +0,0 @@
-{
- "LASER2": {
- "STS15": {
- "speed_ms": 1.095552682876587,
- "embedding_size_kb": 4.096
- }
- },
- "komninos": {
- "STS15": {
- "speed_ms": 0.014420787493387857,
- "embedding_size_kb": 1.2
- }
- },
- "glove.6B.300d": {
- "STS15": {
- "speed_ms": 0.015402833620707195,
- "embedding_size_kb": 1.2
- }
- },
- "SGPT-125M-weightedmean-nli-bitfit": {
- "STS15": {
- "speed_ms": 0.3835549751917521,
- "embedding_size_kb": 3.072
- }
- },
- "SGPT-125M-weightedmean-msmarco-specb-bitfit": {
- "STS15": {
- "speed_ms": 0.40986963113149005,
- "embedding_size_kb": 3.072
- }
- },
- "SGPT-5.8B-weightedmean-nli-bitfit": {
- "STS15": {
- "speed_ms": 13.105161627133688,
- "embedding_size_kb": 16.384
- }
- },
- "SGPT-5.8B-weightedmean-msmarco-specb-bitfit": {
- "STS15": {
- "speed_ms": 14.537005225817362,
- "embedding_size_kb": 16.384
- }
- },
- "all-MiniLM-L6-v2": {
- "STS15": {
- "speed_ms": 0.1488305727640788,
- "embedding_size_kb": 1.536
- }
- },
- "all-mpnet-base-v2": {
- "STS15": {
- "speed_ms": 0.35439515113830566,
- "embedding_size_kb": 3.072
- }
- },
- "paraphrase-multilingual-mpnet-base-v2": {
- "STS15": {
- "speed_ms": 0.39108145236968994,
- "embedding_size_kb": 3.072
- }
- },
- "sentence-t5-base": {
- "STS15": {
- "speed_ms": 0.41539565722147626,
- "embedding_size_kb": 3.072
- }
- },
- "sentence-t5-xxl": {
- "STS15": {
- "speed_ms": 15.400389790534973,
- "embedding_size_kb": 3.072
- }
- },
- "gtr-t5-base": {
- "STS15": {
- "speed_ms": 0.41492275396982825,
- "embedding_size_kb": 3.072
- }
- },
- "gtr-t5-xxl": {
- "STS15": {
- "speed_ms": 15.39513130982717,
- "embedding_size_kb": 3.072
- }
- },
- "contriever-base-msmarco": {
- "STS15": {
- "speed_ms": 0.34681657950083417,
- "embedding_size_kb": 3.072
- }
- },
- "msmarco-bert-co-condensor": {
- "STS15": {
- "speed_ms": 0.3462672630945841,
- "embedding_size_kb": 3.072
- }
- },
- "bert-base-uncased": {
- "STS15": {
- "speed_ms": 0.34756950537363684,
- "embedding_size_kb": 3.072
- }
- },
- "sup-simcse-bert-base-uncased": {
- "STS15": {
- "speed_ms": 0.3426841100056966,
- "embedding_size_kb": 3.072
- }
- },
- "unsup-simcse-bert-base-uncased": {
- "STS15": {
- "speed_ms": 0.3423287868499756,
- "embedding_size_kb": 3.072
- }
- },
- "LaBSE": {
- "STS15": {
- "speed_ms": 0.3441281318664551,
- "embedding_size_kb": 3.072
- }
- },
- "all-MiniLM-L12-v2": {
- "STS15": {
- "speed_ms": 0.2361156940460205,
- "embedding_size_kb": 1.536
- }
- },
- "allenai-specter": {
- "STS15": {
- "speed_ms": 0.3867043654123942,
- "embedding_size_kb": 3.072
- }
- }
-}
diff --git a/plotstables/benchmark_to_plot.py b/plotstables/benchmark_to_plot.py
deleted file mode 100644
index ea7b30d8..00000000
--- a/plotstables/benchmark_to_plot.py
+++ /dev/null
@@ -1,348 +0,0 @@
-"""
-Usage:
-Inspired by Fig 3 from https://arxiv.org/pdf/2011.04006.pdf
-"""
-import json
-import os
-import sys
-
-import matplotlib.pyplot as plt
-from mteb import MTEB
-
-
-### GLOBAL VARIABLES ###
-
-TASK_LIST_CLASSIFICATION = [
- "AmazonCounterfactualClassification",
- "AmazonPolarityClassification",
- "AmazonReviewsClassification",
- "Banking77Classification",
- "EmotionClassification",
- "ImdbClassification",
- "MassiveIntentClassification",
- "MassiveScenarioClassification",
- "MTOPDomainClassification",
- "MTOPIntentClassification",
- "ToxicConversationsClassification",
- "TweetSentimentExtractionClassification",
-]
-
-TASK_LIST_CLUSTERING = [
- "ArxivClusteringP2P",
- "ArxivClusteringS2S",
- "BiorxivClusteringP2P",
- "BiorxivClusteringS2S",
- "MedrxivClusteringP2P",
- "MedrxivClusteringS2S",
- "RedditClustering",
- "RedditClusteringP2P",
- "StackExchangeClustering",
- "StackExchangeClusteringP2P",
- "TwentyNewsgroupsClustering",
-]
-
-TASK_LIST_PAIR_CLASSIFICATION = [
- "SprintDuplicateQuestions",
- "TwitterSemEval2015",
- "TwitterURLCorpus",
-]
-
-TASK_LIST_RERANKING = [
- "AskUbuntuDupQuestions",
- "MindSmallReranking",
- "SciDocsRR",
- "StackOverflowDupQuestions",
-]
-
-TASK_LIST_RETRIEVAL = [
- "ArguAna",
- "ClimateFEVER",
- "CQADupstackRetrieval",
- "DBPedia",
- "FEVER",
- "FiQA2018",
- "HotpotQA",
- "MSMARCO",
- "NFCorpus",
- "NQ",
- "QuoraRetrieval",
- "SCIDOCS",
- "SciFact",
- "Touche2020",
- "TRECCOVID",
-]
-
-TASK_LIST_STS = [
- "BIOSSES",
- "SICK-R",
- "STS12",
- "STS13",
- "STS14",
- "STS15",
- "STS16",
- "STS17",
- "STS22",
- "STSBenchmark",
-]
-
-
-TASK_LIST_SUMMARIZATION = [
- "SummEval",
-]
-
-TASK_LIST_EN = (
- TASK_LIST_CLASSIFICATION
- + TASK_LIST_CLUSTERING
- + TASK_LIST_PAIR_CLASSIFICATION
- + TASK_LIST_RERANKING
- + TASK_LIST_RETRIEVAL
- + TASK_LIST_STS
- + TASK_LIST_SUMMARIZATION
-)
-
-MODEL_TO_NAME = {
- "bert-base-uncased": "BERT",
- "gtr-t5-base": "GTR-Base",
- "gtr-t5-large": "GTR-Large",
- "gtr-t5-xl": "GTR-XL",
- "gtr-t5-xxl": "GTR-XXL",
- "sentence-t5-base": "ST5-Base",
- "sentence-t5-large": "ST5-Large",
- "sentence-t5-xl": "ST5-XL",
- "sentence-t5-xxl": "ST5-XXL",
- "SGPT-125M-weightedmean-msmarco-specb-bitfit": "SGPT-125M-msmarco",
- "SGPT-1.3B-weightedmean-msmarco-specb-bitfit": "SGPT-1.3B-msmarco",
- "SGPT-2.7B-weightedmean-msmarco-specb-bitfit": "SGPT-2.7B-msmarco",
- "SGPT-5.8B-weightedmean-msmarco-specb-bitfit": "SGPT-5.8B-msmarco",
- "sgpt-bloom-7b1-msmarco": "SGPT-BLOOM-7.1B-msmarco",
- "SGPT-125M-weightedmean-nli-bitfit": "SGPT-125M-nli",
- "SGPT-5.8B-weightedmean-nli-bitfit": "SGPT-5.8B-nli",
- "sup-simcse-bert-base-uncased": "SimCSE-BERT-sup",
- "contriever-base-msmarco": "Contriever",
- "msmarco-bert-co-condensor": "coCondenser-msmarco", # They write it as coCondenser in the paper
- "unsup-simcse-bert-base-uncased": "SimCSE-BERT-unsup",
- "glove.6B.300d": "Glove",
- "komninos": "Komninos",
- "all-MiniLM-L6-v2": "MiniLM-L6",
- "all-MiniLM-L12-v2": "MiniLM-L12",
- "paraphrase-multilingual-MiniLM-L12-v2": "MiniLM-L12-multilingual",
- "all-mpnet-base-v2": "MPNet",
- "paraphrase-multilingual-mpnet-base-v2": "MPNet-multilingual",
- "allenai-specter": "SPECTER",
- "text-similarity-ada-001": "Ada Similarity",
-}
-
-NAME_TO_ARCH = {
- "gtr": "T5",
- "st5": "T5",
- "sgpt": "GPT",
- "simcse": "BERT",
- "contriever": "BERT",
- "bert": "BERT",
- "cocondenser": "BERT",
- "specter": "SciBERT",
- "mpnet": "MPNet",
- "minilm": "MiniLM",
- "laser2": "LASER",
- "labse": "BERT",
- "glove": "WordEmbeddings",
- "komninos": "WordEmbeddings",
-}
-
-# Base from:
-# https://coolors.co/palette/ff5400-ff6d00-ff8500-ff9100-ff9e00-00b4d8-0096c7-0077b6-023e8a-03045e
-# Yellow tones from:
-# https://coolors.co/palette/6ab6dc-49a6d4-2f94c6-277ba5-1f6284-e0b700-ffd20a-ffda33-ffe15c-ffe570
-# Green from:
-# https://coolors.co/palette/f94144-f3722c-f8961e-f9844a-f9c74f-90be6d-43aa8b-4d908e-577590-277da1
-MODEL_TO_COLOR = {
- "MiniLM": "#BAF19C",#"#017600", # Green
- "MPNet": "#F94144",#"#007A7A", # Light Green
- "GTR": "#FF5400",#"#221D91", # Blue 1
- "ST5": "#FF9E00",#"#86D4F1", # Blue 2
- "SGPT": "#00B4D8",#"#7B3FB9", # Purple
- "SimCSE": "#F9C74F",#"#2070B4", # Blue 3
- "LaBSE": "#F9C74F",#"#2070B4", # Blue 3
- "SPECTER": "#E0B700", # Shade of #2070B4
- "Glove": "#023E8A",#"#9BC7DD", # Light Blue
- "LASER2": "#03045E", # Grey
-}
-
-ARCH_TO_COLOR = {
- "T5": MODEL_TO_COLOR["GTR"],
- "GPT": MODEL_TO_COLOR["SGPT"],
- "BERT": MODEL_TO_COLOR["SimCSE"],
- "SciBERT": MODEL_TO_COLOR["SPECTER"],
- "MiniLM": MODEL_TO_COLOR["MiniLM"],
- "MPNet": MODEL_TO_COLOR["MPNet"],
- "WordEmbeddings": MODEL_TO_COLOR["Glove"],
- "LASER": MODEL_TO_COLOR["LASER2"],
-}
-
-
-### LOGIC ###
-
-# Get average MTEB performance
-
-results_folder = sys.argv[1].strip("/")
-benchmark_json = sys.argv[2]
-
-all_results = {}
-
-for model_name in os.listdir(results_folder):
- model_res_folder = os.path.join(results_folder, model_name)
- if os.path.isdir(model_res_folder):
- all_results.setdefault(model_name, {})
- for file_name in os.listdir(model_res_folder):
- if not file_name.endswith(".json"):
- print(f"Skipping non-json {file_name}")
- continue
- with open(os.path.join(model_res_folder, file_name), "r", encoding="utf-8") as f:
- results = json.load(f)
- all_results[model_name] = {**all_results[model_name], **{file_name.replace(".json", ""): results}}
-
-def get_row(dataset, model_name, limit_langs=[], skip_langs=[]):
- # CQADupstackRetrieval uses the same metric as its subsets
- tasks = MTEB(tasks=[dataset.replace("CQADupstackRetrieval", "CQADupstackTexRetrieval")]).tasks
- assert len(tasks) == 1, f"Found {len(tasks)} for {dataset}. Expected 1."
- main_metric = tasks[0].description["main_score"]
- test_result = all_results.get(model_name, {}). get(dataset, {})
-
- # Dev / Val set is used for MSMARCO (See BEIR paper)
- if "MSMARCO" in dataset:
- test_result = (
- test_result.get("dev") if "dev" in test_result else test_result.get("validation")
- )
- else:
- test_result = test_result.get("test")
-
- for lang in tasks[0].description["eval_langs"]:
- if (limit_langs and lang not in limit_langs) or (skip_langs and lang in skip_langs):
- continue
- elif test_result is None:
- raise NotImplementedError(f"Got no test result {test_result} for ds: {dataset} model: {model_name}")
-
- test_result_lang = test_result.get(lang, test_result)
- if main_metric == "cosine_spearman":
- test_result_lang = test_result_lang.get("cos_sim", {}).get("spearman")
- elif main_metric == "ap":
- test_result_lang = test_result_lang.get("cos_sim", {}).get("ap")
- else:
- test_result_lang = test_result_lang.get(main_metric)
-
- if test_result_lang is None:
- raise NotImplementedError
-
- return test_result_lang
- raise NotImplementedError
-
-results_avg = {}
-
-for model in all_results:
- try:
- model_task_results = [get_row(task, model, limit_langs=["en", "en-en"]) for task in TASK_LIST_EN]
- except:
- continue
- results_avg[model] = 100 * (sum(model_task_results) / len(model_task_results))
-
-
-with open(benchmark_json, "r") as f:
- gpu_bench = json.load(f)
-
-import numpy as np
-
-fig, ax = plt.subplots(figsize=(14,8))
-
-for k, v in gpu_bench.items():
- if k in ("specs", "sgpt-bloom-7b1-msmarco", "paraphrase-multilingual-MiniLM-L12-v2", "paraphrase-multilingual-mpnet-base-v2"):
- continue
-
- model_name = MODEL_TO_NAME.get(k, k)
- model_arch = NAME_TO_ARCH.get(model_name.split(" ")[0].split("-")[0].lower(), (model_name))
- color = ARCH_TO_COLOR[model_arch]
-
- if k not in results_avg:
- print(f"Missing average score for {k}")
- continue
-
- speed = 1000 / v["STS15"]["speed_ms"]
- score = results_avg[k]
-
- ax.scatter(
- speed,
- score,
- label=model_arch,
- color=color,
- s=v["STS15"]["embedding_size_kb"] * 150,
- alpha=.5
- )
- # Empirical offsets
- x_offset = y_offset = 0
- if model_name in ("ST5-Base"):
- x_offset = 0.5 * speed
- elif model_name in ("GTR-Base"):
- x_offset = 0.5 * speed
- y_offset = -0.01 * score
- elif model_name in ("Contriever"):
- x_offset = -0.14 * speed
- y_offset = 0.018 * score
- elif model_name in ("LaBSE"):
- x_offset = 0.45 * speed
- y_offset = 0.01 * score
- elif model_name in ("GTR-XXL", "ST5-XXL"):
- x_offset = -0.65 * speed
- if model_name == "GTR-XXL":
- y_offset = 0.01 * score
- elif model_name == "Komninos":
- x_offset = 0.4 * speed
- y_offset = 0.05 * score
- elif model_name in ("Glove", "SPECTER"):
- x_offset = 0.2 * speed
- y_offset = -0.025 * score
- elif model_name.startswith("SGPT-5.8B"):
- x_offset = 0.3 * speed
- y_offset = 0.05 * score
- elif model_name.startswith("SGPT-125M-nli"):
- x_offset = -0.45 * speed
- y_offset = -0.008 * score
- elif model_name.startswith("SGPT-125M-msmarco"):
- x_offset = -0.2 * speed
- y_offset = 0.01 * score
- elif model_name.startswith("MiniLM-L12"):
- y_offset = -0.01 * score
- x_offset = -0.15 * speed
- elif model_arch in ("BERT", "MiniLM", "MPNet", "LASER") or model_name.startswith("SGPT-125M"):
- x_offset = -0.2 * speed
-
- ax.text(
- speed - x_offset,
- score - y_offset,
- model_name,
- )
-
- # Annotate does not work with logscale, https://stackoverflow.com/questions/21140385/matplotlib-annotate-doesnt-work-on-log-scale
- #ax.annotate(
- # MODEL_TO_NAME.get(k, k),
- # xy=(np.log10(1000 / v["STS15"]["speed_ms"]), results_avg[k] - offset)
- #)
-
-ax.set_xlabel("Speed (examples per sec)")
-ax.set_ylabel("MTEB Score")
-ax.set_xscale('log')
-ax.grid(alpha=0.5)
-
-# Create deduplicated Global Legend
-handles, labels = plt.gca().get_legend_handles_labels()
-by_label = dict(zip(labels, handles))
-lgnd = plt.legend(
- by_label.values(),
- by_label.keys(),
- title="Base Architecture",
- loc=(0.08,0.08), # "lower left",
-)
-# Rescale bubbles to have the same size
-for handle in lgnd.legendHandles:
- handle.set_sizes([70.0])
-
-
-plt.savefig('benchmark.pdf', dpi=300, bbox_inches='tight')
diff --git a/plotstables/bitext.txt b/plotstables/bitext.txt
deleted file mode 100644
index fbe13655..00000000
--- a/plotstables/bitext.txt
+++ /dev/null
@@ -1,118 +0,0 @@
-Dataset & Language & LASER2 & LaBSE & MiniLM-L12-multilingual & MPNet-multilingual & SGPT-BLOOM-7.1B-msmarco \\
-BUCC & de-en & 99.21 & 99.35 & 97.11 & 98.59 & 54.0 \\
-BUCC & fr-en & 98.39 & 98.72 & 94.99 & 96.89 & 97.06 \\
-BUCC & ru-en & 97.62 & 97.78 & 95.06 & 96.44 & 45.3 \\
-BUCC & zh-en & 97.7 & 99.16 & 95.63 & 97.56 & 97.96 \\
-Tatoeba & sqi-eng & 97.22 & 96.76 & 98.17 & 98.57 & 10.38 \\
-Tatoeba & fry-eng & 42.07 & 89.31 & 31.13 & 43.54 & 24.62 \\
-Tatoeba & kur-eng & 19.09 & 83.59 & 46.94 & 61.44 & 8.26 \\
-Tatoeba & tur-eng & 98.03 & 98.0 & 95.08 & 96.17 & 6.15 \\
-Tatoeba & deu-eng & 99.07 & 99.2 & 97.02 & 97.73 & 70.1 \\
-Tatoeba & nld-eng & 95.35 & 96.07 & 94.58 & 95.5 & 29.74 \\
-Tatoeba & ron-eng & 96.52 & 96.92 & 95.3 & 96.43 & 27.23 \\
-Tatoeba & ang-eng & 25.22 & 59.28 & 10.24 & 16.72 & 28.76 \\
-Tatoeba & ido-eng & 80.86 & 89.42 & 40.25 & 43.91 & 43.91 \\
-Tatoeba & jav-eng & 9.95 & 79.77 & 17.04 & 23.39 & 15.02 \\
-Tatoeba & isl-eng & 94.32 & 94.75 & 24.07 & 59.25 & 6.29 \\
-Tatoeba & slv-eng & 95.4 & 96.03 & 96.92 & 97.08 & 10.14 \\
-Tatoeba & cym-eng & 5.85 & 92.0 & 13.25 & 22.31 & 6.97 \\
-Tatoeba & kaz-eng & 53.3 & 87.49 & 34.89 & 61.49 & 3.32 \\
-Tatoeba & est-eng & 96.43 & 96.55 & 97.33 & 98.4 & 4.76 \\
-Tatoeba & heb-eng & 0.0 & 91.53 & 86.88 & 88.26 & 1.69 \\
-Tatoeba & gla-eng & 1.52 & 85.66 & 3.61 & 4.72 & 2.09 \\
-Tatoeba & mar-eng & 92.93 & 92.65 & 92.38 & 93.83 & 45.53 \\
-Tatoeba & lat-eng & 64.81 & 80.07 & 19.47 & 24.25 & 28.76 \\
-Tatoeba & bel-eng & 79.54 & 95.0 & 67.73 & 79.94 & 8.03 \\
-Tatoeba & pms-eng & 36.23 & 64.57 & 30.7 & 34.19 & 31.94 \\
-Tatoeba & gle-eng & 4.2 & 93.8 & 11.62 & 16.85 & 3.26 \\
-Tatoeba & pes-eng & 93.13 & 94.7 & 92.59 & 93.47 & 12.13 \\
-Tatoeba & nob-eng & 95.77 & 98.4 & 97.73 & 98.53 & 21.07 \\
-Tatoeba & bul-eng & 93.57 & 94.58 & 92.65 & 93.52 & 20.09 \\
-Tatoeba & cbk-eng & 77.17 & 79.44 & 55.37 & 58.68 & 64.63 \\
-Tatoeba & hun-eng & 95.2 & 96.55 & 91.58 & 94.18 & 5.07 \\
-Tatoeba & uig-eng & 56.49 & 92.4 & 24.39 & 48.35 & 1.27 \\
-Tatoeba & rus-eng & 92.58 & 93.75 & 91.87 & 92.92 & 59.84 \\
-Tatoeba & spa-eng & 97.33 & 98.4 & 95.42 & 97.0 & 94.48 \\
-Tatoeba & hye-eng & 88.72 & 94.09 & 93.28 & 94.38 & 0.5 \\
-Tatoeba & tel-eng & 96.72 & 97.86 & 36.4 & 79.73 & 64.62 \\
-Tatoeba & afr-eng & 92.59 & 96.18 & 58.22 & 72.96 & 16.62 \\
-Tatoeba & mon-eng & 3.42 & 95.91 & 95.04 & 96.14 & 2.85 \\
-Tatoeba & arz-eng & 66.16 & 76.0 & 51.26 & 55.69 & 70.66 \\
-Tatoeba & hrv-eng & 96.72 & 96.95 & 95.98 & 97.0 & 12.79 \\
-Tatoeba & nov-eng & 60.02 & 74.38 & 47.99 & 50.23 & 52.23 \\
-Tatoeba & gsw-eng & 27.52 & 46.5 & 25.74 & 25.12 & 21.03 \\
-Tatoeba & nds-eng & 77.13 & 79.42 & 32.16 & 38.88 & 23.92 \\
-Tatoeba & ukr-eng & 93.52 & 93.97 & 92.82 & 92.67 & 22.06 \\
-Tatoeba & uzb-eng & 23.2 & 84.23 & 17.14 & 23.19 & 4.71 \\
-Tatoeba & lit-eng & 96.2 & 96.47 & 93.16 & 95.37 & 4.49 \\
-Tatoeba & ina-eng & 93.93 & 95.37 & 79.13 & 84.32 & 73.67 \\
-Tatoeba & lfn-eng & 63.39 & 67.54 & 47.02 & 49.56 & 44.85 \\
-Tatoeba & zsm-eng & 95.41 & 95.62 & 95.31 & 95.8 & 79.95 \\
-Tatoeba & ita-eng & 94.32 & 92.72 & 93.05 & 93.76 & 65.04 \\
-Tatoeba & cmn-eng & 85.62 & 95.1 & 94.93 & 95.83 & 91.45 \\
-Tatoeba & lvs-eng & 95.33 & 95.88 & 97.87 & 97.53 & 6.55 \\
-Tatoeba & glg-eng & 96.14 & 96.82 & 94.0 & 95.32 & 79.86 \\
-Tatoeba & ceb-eng & 9.93 & 64.42 & 8.05 & 7.39 & 6.64 \\
-Tatoeba & bre-eng & 31.2 & 15.07 & 5.56 & 6.42 & 4.67 \\
-Tatoeba & ben-eng & 89.43 & 88.55 & 36.48 & 64.9 & 75.98 \\
-Tatoeba & swg-eng & 33.1 & 59.36 & 26.31 & 22.8 & 16.89 \\
-Tatoeba & arq-eng & 26.63 & 42.69 & 18.6 & 19.84 & 27.75 \\
-Tatoeba & kab-eng & 65.88 & 4.31 & 1.16 & 1.41 & 1.69 \\
-Tatoeba & fra-eng & 94.28 & 94.86 & 91.72 & 93.12 & 91.44 \\
-Tatoeba & por-eng & 94.54 & 94.14 & 92.13 & 93.02 & 92.62 \\
-Tatoeba & tat-eng & 34.74 & 85.92 & 10.25 & 10.89 & 3.59 \\
-Tatoeba & oci-eng & 58.13 & 65.81 & 38.57 & 43.49 & 40.17 \\
-Tatoeba & pol-eng & 97.32 & 97.22 & 94.28 & 96.95 & 14.09 \\
-Tatoeba & war-eng & 8.25 & 60.29 & 7.25 & 7.42 & 10.38 \\
-Tatoeba & aze-eng & 82.41 & 94.93 & 62.1 & 76.36 & 6.32 \\
-Tatoeba & vie-eng & 96.73 & 97.2 & 95.12 & 97.23 & 94.2 \\
-Tatoeba & nno-eng & 72.75 & 94.48 & 76.34 & 81.41 & 16.28 \\
-Tatoeba & cha-eng & 14.86 & 31.77 & 15.98 & 12.59 & 23.26 \\
-Tatoeba & mhr-eng & 6.86 & 15.74 & 6.89 & 7.57 & 1.56 \\
-Tatoeba & dan-eng & 95.22 & 95.71 & 94.8 & 96.17 & 23.52 \\
-Tatoeba & ell-eng & 96.2 & 95.35 & 95.43 & 94.93 & 5.34 \\
-Tatoeba & amh-eng & 80.82 & 91.47 & 36.21 & 53.49 & 0.03 \\
-Tatoeba & pam-eng & 3.24 & 10.73 & 5.41 & 5.39 & 5.85 \\
-Tatoeba & hsb-eng & 45.75 & 67.11 & 36.1 & 44.32 & 9.68 \\
-Tatoeba & srp-eng & 93.64 & 94.43 & 92.24 & 94.12 & 11.69 \\
-Tatoeba & epo-eng & 96.61 & 98.2 & 41.73 & 55.12 & 26.2 \\
-Tatoeba & kzj-eng & 4.46 & 11.33 & 6.24 & 5.88 & 5.17 \\
-Tatoeba & awa-eng & 33.74 & 71.7 & 33.43 & 42.83 & 35.01 \\
-Tatoeba & fao-eng & 57.04 & 87.4 & 27.51 & 38.24 & 12.61 \\
-Tatoeba & mal-eng & 98.16 & 98.45 & 32.2 & 88.46 & 83.3 \\
-Tatoeba & ile-eng & 87.88 & 85.58 & 57.71 & 60.36 & 59.59 \\
-Tatoeba & bos-eng & 95.86 & 94.92 & 93.27 & 94.02 & 13.65 \\
-Tatoeba & cor-eng & 4.45 & 10.11 & 3.42 & 3.53 & 2.83 \\
-Tatoeba & cat-eng & 95.8 & 95.38 & 94.42 & 96.05 & 88.31 \\
-Tatoeba & eus-eng & 93.32 & 95.01 & 23.18 & 31.33 & 53.38 \\
-Tatoeba & yue-eng & 87.75 & 89.58 & 71.45 & 77.58 & 77.03 \\
-Tatoeba & swe-eng & 95.31 & 95.63 & 94.42 & 95.45 & 19.53 \\
-Tatoeba & dtp-eng & 7.39 & 10.85 & 5.69 & 5.03 & 3.41 \\
-Tatoeba & kat-eng & 81.16 & 95.02 & 95.44 & 95.46 & 0.42 \\
-Tatoeba & jpn-eng & 93.78 & 95.38 & 90.41 & 92.51 & 71.36 \\
-Tatoeba & csb-eng & 27.03 & 52.57 & 21.56 & 23.73 & 10.03 \\
-Tatoeba & xho-eng & 4.68 & 91.55 & 4.52 & 6.53 & 5.51 \\
-Tatoeba & orv-eng & 23.24 & 38.93 & 15.1 & 23.77 & 5.79 \\
-Tatoeba & ind-eng & 92.98 & 93.66 & 92.74 & 93.5 & 88.04 \\
-Tatoeba & tuk-eng & 16.35 & 75.27 & 15.16 & 14.91 & 5.48 \\
-Tatoeba & max-eng & 36.96 & 63.26 & 45.25 & 48.77 & 36.14 \\
-Tatoeba & swh-eng & 55.66 & 84.5 & 14.48 & 16.02 & 16.74 \\
-Tatoeba & hin-eng & 95.32 & 96.87 & 97.62 & 97.75 & 85.23 \\
-Tatoeba & dsb-eng & 42.34 & 64.81 & 33.43 & 36.85 & 8.78 \\
-Tatoeba & ber-eng & 77.63 & 8.4 & 4.43 & 4.88 & 4.92 \\
-Tatoeba & tam-eng & 87.32 & 89.0 & 24.64 & 73.6 & 72.76 \\
-Tatoeba & slk-eng & 95.82 & 96.5 & 95.15 & 96.62 & 9.98 \\
-Tatoeba & tgl-eng & 63.19 & 96.02 & 13.09 & 17.67 & 10.7 \\
-Tatoeba & ast-eng & 76.35 & 90.68 & 62.17 & 70.08 & 71.13 \\
-Tatoeba & mkd-eng & 93.63 & 93.6 & 91.0 & 93.02 & 10.47 \\
-Tatoeba & khm-eng & 74.19 & 78.37 & 32.11 & 58.8 & 0.37 \\
-Tatoeba & ces-eng & 95.52 & 96.68 & 95.12 & 95.73 & 9.55 \\
-Tatoeba & tzl-eng & 36.56 & 58.88 & 25.46 & 34.21 & 27.82 \\
-Tatoeba & urd-eng & 84.23 & 93.22 & 94.57 & 95.12 & 70.1 \\
-Tatoeba & ara-eng & 90.14 & 88.8 & 87.93 & 90.19 & 85.37 \\
-Tatoeba & kor-eng & 87.97 & 90.95 & 92.52 & 93.07 & 22.39 \\
-Tatoeba & yid-eng & 2.49 & 88.79 & 14.38 & 30.73 & 0.16 \\
-Tatoeba & fin-eng & 96.98 & 96.37 & 93.1 & 95.92 & 3.41 \\
-Tatoeba & tha-eng & 96.38 & 96.14 & 96.72 & 95.99 & 2.22 \\
-Tatoeba & wuu-eng & 75.09 & 90.18 & 76.0 & 78.25 & 79.58 \\
-Average & mix & 67.42 & 81.75 & 57.98 & 63.38 & 31.08 \\
diff --git a/plotstables/dataset_sim.py b/plotstables/dataset_sim.py
deleted file mode 100644
index f61eaf96..00000000
--- a/plotstables/dataset_sim.py
+++ /dev/null
@@ -1,304 +0,0 @@
-# pip install GitPython mteb beir seaborn
-import os
-import random
-
-import seaborn as sns
-import matplotlib.pyplot as plt
-from mteb import MTEB
-from mteb.evaluation.evaluators.utils import cos_sim
-import numpy as np
-import pandas as pd
-from sentence_transformers import SentenceTransformer
-import torch
-
-
-if os.path.exists("sim_data.csv"):
- data_emb_df = (pd.read_csv("sim_data.csv", index_col=0) * 100).round(0).astype(int)
- plt.figure(figsize=(40, 24))
- # define the mask to set the values in the upper triangle to True
- mask = np.triu(np.ones_like(data_emb_df, dtype=np.bool))
- heatmap = sns.heatmap(
- data_emb_df,
- mask=mask,
- vmin=data_emb_df.values.min(),
- vmax=data_emb_df.values.max(),
- annot=True,
- cmap='Blues',
- fmt='g',
- )
- heatmap.set_xticklabels(heatmap.get_xmajorticklabels(), fontsize=16)#, fontweight="bold")
- heatmap.set_yticklabels(heatmap.get_ymajorticklabels(), fontsize=16)#, fontweight="bold")
- # Save
- plt.savefig('heatmap_data.pdf', dpi=450, bbox_inches='tight')
- exit()
-
-
-### GLOBAL VARIABLES ###
-
-DATAPATH = "./"
-
-SEED = 42
-
-K_SAMPLES = 100
-LEN_KEYS = {
- "text",
- "sentences",
- "sentence1",
- "sentence2",
- "sent1",
- "sent2"
- "query",
- "positive",
- "negative"
- "queries",
- "corpus",
- "machine_summaries",
- "human_summaries",
-}
-
-TASK_LIST_CLASSIFICATION = [
- "AmazonCounterfactualClassification",
- "AmazonPolarityClassification",
- "AmazonReviewsClassification",
- "Banking77Classification",
- "EmotionClassification",
- "ImdbClassification",
- "MassiveIntentClassification",
- "MassiveScenarioClassification",
- "MTOPDomainClassification",
- "MTOPIntentClassification",
- "ToxicConversationsClassification",
- "TweetSentimentExtractionClassification",
-]
-
-TASK_LIST_CLUSTERING = [
- "ArxivClusteringP2P",
- "ArxivClusteringS2S",
- "BiorxivClusteringP2P",
- "BiorxivClusteringS2S",
- "MedrxivClusteringP2P",
- "MedrxivClusteringS2S",
- "RedditClustering",
- "RedditClusteringP2P",
- "StackExchangeClustering",
- "StackExchangeClusteringP2P",
- "TwentyNewsgroupsClustering",
-]
-
-TASK_LIST_PAIR_CLASSIFICATION = [
- "SprintDuplicateQuestions",
- "TwitterSemEval2015",
- "TwitterURLCorpus",
-]
-
-TASK_LIST_RERANKING = [
- "AskUbuntuDupQuestions",
- "MindSmallReranking",
- "SciDocsRR",
- "StackOverflowDupQuestions",
-]
-
-TASK_LIST_RETRIEVAL = [
- "ArguAna",
- "ClimateFEVER",
- "CQADupstackAndroidRetrieval",
- "CQADupstackEnglishRetrieval",
- "CQADupstackGamingRetrieval",
- "CQADupstackGisRetrieval",
- "CQADupstackMathematicaRetrieval",
- "CQADupstackPhysicsRetrieval",
- "CQADupstackProgrammersRetrieval",
- "CQADupstackStatsRetrieval",
- "CQADupstackTexRetrieval",
- "CQADupstackUnixRetrieval",
- "CQADupstackWebmastersRetrieval",
- "CQADupstackWordpressRetrieval",
- "DBPedia",
- "FEVER",
- "FiQA2018",
- "HotpotQA",
- "MSMARCO",
- "NFCorpus",
- "NQ",
- "QuoraRetrieval",
- "SCIDOCS",
- "SciFact",
- "Touche2020",
- "TRECCOVID",
-]
-
-TASK_LIST_STS = [
- "BIOSSES",
- "SICK-R",
- "STS12",
- "STS13",
- "STS14",
- "STS15",
- "STS16",
- "STS17",
- "STS22",
- "STSBenchmark",
-]
-
-
-TASK_LIST_SUMMARIZATION = [
- "SummEval",
-]
-
-TASK_LIST_EN = (
- TASK_LIST_CLASSIFICATION
- + TASK_LIST_CLUSTERING
- + TASK_LIST_PAIR_CLASSIFICATION
- + TASK_LIST_RERANKING
- + TASK_LIST_RETRIEVAL
- + TASK_LIST_STS
- + TASK_LIST_SUMMARIZATION
-)
-
-### LOGIC ###
-
-def get_samples_beir(hf_hub_name):
- # Somehow needs to be set in the function scope
- random.seed(SEED)
- from beir.datasets.data_loader import GenericDataLoader as BeirDataLoader
- path = os.path.join(DATAPATH, hf_hub_name)
- print("GOT PATH", path)
- split = "validation" if "MSMARCO" in hf_hub_name else "test"
- if not os.path.exists(path):
- from beir import util
- if "cqadupstack" in hf_hub_name:
- hf_hub_name = "cqadupstack"
- url = f"https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{hf_hub_name}.zip"
- util.download_and_unzip(url, DATAPATH)
- corpus, queries, relevant_docs = BeirDataLoader(path).load(split=split)
- # Pick shortest k samples
- samples = [v["text"] + " " + v["title"] for v in sorted(list(corpus.values()), key=lambda x: len(x["text"]))[:K_SAMPLES]]
- # Optionally randomly pick
- #samples = [v["text"] + " " + v["title"] for v in random.choices(sorted(list(corpus.values()), key=lambda x: len(x["text"])), k=K_SAMPLES)]
- return samples
-
-def load_data(hf_hub_name, subset=None):
- """
- Load dataset from Hub via cloning for easy offline usage with HF_DATASETS_OFFLINE=1
- Can be replaced with just `load_dataset(hf_hub_name, subset)` if preferred
- """
- from datasets import load_dataset
- path = os.path.join(DATAPATH, hf_hub_name)
- if os.path.exists(path):
- dataset = load_dataset(path, subset)
- else:
- from git import Repo
- Repo.clone_from("https://huggingface.co/datasets/mteb/" + hf_hub_name, path)
- dataset = load_dataset(path, subset)
- return dataset
-
-def get_samples_ds(hf_hub_name):
- ds = load_data(hf_hub_name)
- # Optionally shuffle
- # .shuffle(seed=SEED)
- assert "test" in ds, f"No test set for {hf_hub_name}"
- len_keys = list(set(ds["test"].features.keys()) & LEN_KEYS)
- split = "test"
- k = len_keys[0]
- if isinstance(ds[split][k][0], str):
- # Select K shortest examples
- samples = sorted([x for x in ds[split][k]], key=len)[:K_SAMPLES]
- elif isinstance(ds[split][k][0], list):
- assert isinstance(ds[split][k][0][0], str), f"Too nested: {k}"
- # Select K shortest examples
- samples = [y for x in ds[split][k] for y in x]
- samples = sorted(samples, key=len)[:K_SAMPLES]
- # Optionally randomly select
- # random.choices(samples, k=K_SAMPLES)
- else:
- raise ValueError(f"Unknown type {type(ds[split][k])}")
- return samples
-
-
-embeddings = {}
-model = SentenceTransformer("sentence-transformers/sentence-t5-xxl")
-
-# Optionally custom selection
-# TASKS = ["ArguAna", "ClimateFEVER", "DBPedia", "FEVER", "FiQA2018", "HotpotQA", "NFCorpus", "NQ", "QuoraRetrieval", "SCIDOCS", "SciFact", "Touche2020", "TRECCOVID"]
-
-TASKS = TASK_LIST_EN
-
-
-for task in MTEB(tasks=TASKS).tasks:
- print("Task: ", task)
- if "hf_hub_name" in task.description:
- hub_name = hub_url = task.description.get("hf_hub_name")
- samples = get_samples_ds(hub_name.split("/")[-1])
- if "beir_name" in task.description:
- hub_name = hub_url = "BeIR/" + task.description.get("beir_name")
- samples = get_samples_beir("/".join(hub_name.split("/")[1:]))
- embeddings[task.description["name"]] = model.encode(samples)
-
-# Plot 1: Compute all cos sims & then average
-"""
-data_dict = []
-for i, task_1 in enumerate(TASKS):
- data_dict.append({task_2: torch.mean(cos_sim(embeddings[task_1], embeddings[task_2])).item() for j, task_2 in enumerate(TASKS)})
-
-data_df = pd.DataFrame(data_dict)
-data_df.set_index(data_df.columns, inplace=True)
-
-
-# Save
-data_df.to_csv("data.csv")
-
-import seaborn as sns
-import matplotlib.pyplot as plt
-
-plt.figure(figsize=(32, 16))
-# define the mask to set the values in the upper triangle to True
-mask = np.triu(np.ones_like(data_df, dtype=np.bool))
-#heatmap = sns.heatmap(data_df, mask=mask, vmin=-1, vmax=1, annot=True, cmap='Blues')
-heatmap = sns.heatmap(data_df, mask=mask, vmin=data_df.values.min(), vmax=data_df.values.max(), annot=True, cmap='Blues')
-heatmap.set_title('Similarity of MTEB datasets', fontdict={'fontsize':18}, pad=16)
-
-plt.savefig('heatmap_data.pdf', dpi=300, bbox_inches='tight')
-"""
-
-
-# Plot 2: Average embeddings & then compute cos_sim
-
-data_dict_emb = []
-for i, task_1 in enumerate(TASKS):
- data_dict_emb.append({task_2: cos_sim(np.mean(embeddings[task_1], axis=0), np.mean(embeddings[task_2], axis=0)).item() for j, task_2 in enumerate(TASKS)})
-
-data_emb_df = pd.DataFrame(data_dict_emb)
-data_emb_df.set_index(data_emb_df.columns, inplace=True)
-
-
-plt.figure(figsize=(36, 24))
-# define the mask to set the values in the upper triangle to True
-mask = np.triu(np.ones_like(data_emb_df, dtype=np.bool))
-heatmap = sns.heatmap(data_emb_df, mask=mask, vmin=data_emb_df.values.min(), vmax=data_emb_df.values.max(), annot=True, cmap='Blues')
-#heatmap.set_title('Similarity of MTEB datasets', fontdict={'fontsize':18}, pad=16)
-
-# Save
-data_emb_df.to_csv("sim_data.csv")
-plt.savefig('heatmap_data.pdf', dpi=450, bbox_inches='tight')
-
-
-# Plot 3: Min (/Max) embeddings & then compute cos_sim
-"""
-data_dict_emb = []
-for i, task_1 in enumerate(TASKS):
- data_dict_emb.append({task_2: cos_sim(np.min(embeddings[i], axis=0), np.min(embeddings[j], axis=0)).item() for j, task_2 in enumerate(TASKS)})
-
-data_emb_df = pd.DataFrame(data_dict_emb)
-data_emb_df.set_index(data_emb_df.columns, inplace=True)
-
-import seaborn as sns
-import matplotlib.pyplot as plt
-
-plt.figure(figsize=(32, 16))
-# define the mask to set the values in the upper triangle to True
-mask = np.triu(np.ones_like(data_emb_df, dtype=np.bool))
-heatmap = sns.heatmap(data_emb_df, mask=mask, vmin=data_emb_df.values.min(), vmax=data_emb_df.values.max(), annot=True, cmap='Blues')
-heatmap.set_title('Similarity of MTEB datasets', fontdict={'fontsize':18}, pad=16)
-
-plt.savefig('heatmap_data.pdf', dpi=300, bbox_inches='tight')
-"""
diff --git a/plotstables/heatmap_data.pdf b/plotstables/heatmap_data.pdf
deleted file mode 100644
index c796621b..00000000
Binary files a/plotstables/heatmap_data.pdf and /dev/null differ
diff --git a/plotstables/heatmap_mean_emb.pdf b/plotstables/heatmap_mean_emb.pdf
deleted file mode 100644
index 04054bdd..00000000
Binary files a/plotstables/heatmap_mean_emb.pdf and /dev/null differ
diff --git a/plotstables/heatmap_mean_emb_small.png b/plotstables/heatmap_mean_emb_small.png
deleted file mode 100644
index ac5d2b47..00000000
Binary files a/plotstables/heatmap_mean_emb_small.png and /dev/null differ
diff --git a/plotstables/heatmap_model.pdf b/plotstables/heatmap_model.pdf
deleted file mode 100644
index 41569ce4..00000000
Binary files a/plotstables/heatmap_model.pdf and /dev/null differ
diff --git a/plotstables/heatmap_model.png b/plotstables/heatmap_model.png
deleted file mode 100644
index b8ea3078..00000000
Binary files a/plotstables/heatmap_model.png and /dev/null differ
diff --git a/plotstables/heatmap_tasks.pdf b/plotstables/heatmap_tasks.pdf
deleted file mode 100644
index 42918f34..00000000
Binary files a/plotstables/heatmap_tasks.pdf and /dev/null differ
diff --git a/plotstables/heatmap_tasks.png b/plotstables/heatmap_tasks.png
deleted file mode 100644
index 31b1519b..00000000
Binary files a/plotstables/heatmap_tasks.png and /dev/null differ
diff --git a/plotstables/mteb_diagram.drawio b/plotstables/mteb_diagram.drawio
deleted file mode 100644
index ca51accd..00000000
--- a/plotstables/mteb_diagram.drawio
+++ /dev/null
@@ -1 +0,0 @@

\ No newline at end of file
diff --git a/plotstables/mteb_diagram.pdf b/plotstables/mteb_diagram.pdf
deleted file mode 100644
index b39d212b..00000000
Binary files a/plotstables/mteb_diagram.pdf and /dev/null differ
diff --git a/plotstables/mteb_diagram.png b/plotstables/mteb_diagram.png
deleted file mode 100644
index d209768e..00000000
Binary files a/plotstables/mteb_diagram.png and /dev/null differ
diff --git a/plotstables/multilingclf.txt b/plotstables/multilingclf.txt
deleted file mode 100644
index 86efe576..00000000
--- a/plotstables/multilingclf.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-Dataset & Language & LASER2 & LaBSE & MiniLM-L12-multilingual & MPNet-multilingual & SGPT-BLOOM-7.1B-msmarco \\
-AmazonCounterfactualClassification & de & 67.82 & 73.17 & 68.35 & 69.95 & 61.35 \\
-AmazonCounterfactualClassification & ja & 68.76 & 76.42 & 63.45 & 69.79 & 58.23 \\
-AmazonReviewsClassification & de & 31.07 & 39.92 & 35.91 & 39.52 & 29.7 \\
-AmazonReviewsClassification & es & 32.72 & 39.39 & 37.49 & 39.99 & 35.97 \\
-AmazonReviewsClassification & fr & 31.12 & 38.52 & 35.3 & 39.0 & 35.92 \\
-AmazonReviewsClassification & ja & 28.94 & 36.44 & 33.24 & 36.64 & 27.64 \\
-AmazonReviewsClassification & zh & 30.89 & 36.45 & 35.26 & 37.74 & 32.63 \\
-MassiveIntentClassification & af & 38.01 & 56.12 & 45.88 & 52.32 & 47.85 \\
-MassiveIntentClassification & am & 12.7 & 55.71 & 36.75 & 41.55 & 33.3 \\
-MassiveIntentClassification & ar & 37.16 & 50.86 & 45.14 & 51.43 & 59.25 \\
-MassiveIntentClassification & az & 19.98 & 58.97 & 47.42 & 56.98 & 45.24 \\
-MassiveIntentClassification & bn & 42.51 & 58.22 & 35.34 & 48.79 & 61.59 \\
-MassiveIntentClassification & cy & 17.33 & 50.16 & 26.12 & 27.87 & 44.92 \\
-MassiveIntentClassification & da & 45.61 & 58.25 & 57.73 & 62.77 & 51.23 \\
-MassiveIntentClassification & de & 44.79 & 56.21 & 50.71 & 59.57 & 56.1 \\
-MassiveIntentClassification & el & 46.71 & 57.03 & 58.7 & 62.62 & 46.13 \\
-MassiveIntentClassification & es & 45.44 & 58.32 & 59.66 & 64.43 & 66.35 \\
-MassiveIntentClassification & fa & 45.01 & 62.33 & 61.02 & 65.34 & 51.2 \\
-MassiveIntentClassification & fi & 45.94 & 60.12 & 57.54 & 62.28 & 45.33 \\
-MassiveIntentClassification & fr & 46.13 & 60.47 & 60.25 & 64.82 & 66.95 \\
-MassiveIntentClassification & he & 42.55 & 56.55 & 52.51 & 58.21 & 43.18 \\
-MassiveIntentClassification & hi & 40.2 & 59.4 & 58.37 & 62.77 & 63.54 \\
-MassiveIntentClassification & hu & 42.77 & 59.52 & 60.41 & 63.87 & 44.73 \\
-MassiveIntentClassification & hy & 28.07 & 56.2 & 51.6 & 57.74 & 38.13 \\
-MassiveIntentClassification & id & 45.81 & 61.12 & 59.85 & 65.43 & 64.06 \\
-MassiveIntentClassification & is & 39.86 & 54.9 & 30.83 & 37.05 & 44.35 \\
-MassiveIntentClassification & it & 48.25 & 59.83 & 59.61 & 64.68 & 60.77 \\
-MassiveIntentClassification & ja & 45.3 & 63.11 & 60.89 & 63.74 & 61.22 \\
-MassiveIntentClassification & jv & 24.3 & 50.98 & 32.37 & 36.49 & 50.94 \\
-MassiveIntentClassification & ka & 22.7 & 48.35 & 43.03 & 49.85 & 33.84 \\
-MassiveIntentClassification & km & 22.48 & 48.55 & 40.04 & 45.47 & 37.34 \\
-MassiveIntentClassification & kn & 4.32 & 56.24 & 40.98 & 50.63 & 53.54 \\
-MassiveIntentClassification & ko & 44.26 & 60.99 & 50.3 & 61.82 & 53.36 \\
-MassiveIntentClassification & lv & 39.75 & 57.1 & 54.68 & 61.29 & 46.5 \\
-MassiveIntentClassification & ml & 41.33 & 57.91 & 42.41 & 54.34 & 58.27 \\
-MassiveIntentClassification & mn & 16.2 & 58.5 & 51.77 & 56.59 & 40.28 \\
-MassiveIntentClassification & ms & 43.23 & 58.6 & 54.76 & 60.7 & 59.65 \\
-MassiveIntentClassification & my & 25.37 & 57.35 & 52.01 & 57.09 & 37.42 \\
-MassiveIntentClassification & nb & 37.74 & 57.91 & 55.5 & 62.6 & 49.41 \\
-MassiveIntentClassification & nl & 45.0 & 59.37 & 59.51 & 63.57 & 52.09 \\
-MassiveIntentClassification & pl & 44.99 & 59.71 & 59.43 & 64.3 & 50.48 \\
-MassiveIntentClassification & pt & 48.55 & 60.16 & 61.27 & 64.89 & 66.69 \\
-MassiveIntentClassification & ro & 44.3 & 57.92 & 58.39 & 62.8 & 50.53 \\
-MassiveIntentClassification & ru & 44.29 & 60.67 & 59.04 & 63.26 & 58.32 \\
-MassiveIntentClassification & sl & 44.72 & 59.37 & 57.36 & 63.51 & 47.74 \\
-MassiveIntentClassification & sq & 46.12 & 58.03 & 56.59 & 62.49 & 48.94 \\
-MassiveIntentClassification & sv & 45.95 & 59.66 & 59.43 & 64.73 & 50.79 \\
-MassiveIntentClassification & sw & 31.89 & 51.62 & 29.57 & 31.95 & 49.81 \\
-MassiveIntentClassification & ta & 29.63 & 55.04 & 36.77 & 50.17 & 56.4 \\
-MassiveIntentClassification & te & 36.03 & 58.32 & 40.72 & 52.82 & 54.71 \\
-MassiveIntentClassification & th & 43.39 & 56.58 & 58.97 & 61.11 & 44.43 \\
-MassiveIntentClassification & tl & 29.73 & 55.28 & 33.67 & 38.83 & 50.21 \\
-MassiveIntentClassification & tr & 43.93 & 60.91 & 59.9 & 64.54 & 46.56 \\
-MassiveIntentClassification & ur & 26.11 & 56.7 & 52.8 & 56.37 & 56.75 \\
-MassiveIntentClassification & vi & 44.33 & 56.67 & 56.61 & 59.68 & 64.53 \\
-MassiveIntentClassification & zh-CN & 40.62 & 63.86 & 61.99 & 65.33 & 67.07 \\
-MassiveIntentClassification & zh-TW & 32.93 & 59.51 & 58.77 & 62.35 & 62.89 \\
-MassiveScenarioClassification & af & 47.1 & 63.39 & 53.64 & 59.67 & 51.47 \\
-MassiveScenarioClassification & am & 17.7 & 62.02 & 41.89 & 48.97 & 34.87 \\
-MassiveScenarioClassification & ar & 45.21 & 57.72 & 51.74 & 57.78 & 65.21 \\
-MassiveScenarioClassification & az & 28.21 & 63.48 & 52.06 & 61.53 & 45.58 \\
-MassiveScenarioClassification & bn & 50.52 & 61.84 & 41.17 & 54.53 & 67.3 \\
-MassiveScenarioClassification & cy & 22.58 & 56.13 & 31.72 & 35.26 & 46.29 \\
-MassiveScenarioClassification & da & 54.87 & 65.24 & 66.87 & 71.0 & 53.52 \\
-MassiveScenarioClassification & de & 54.34 & 62.39 & 57.4 & 67.34 & 61.74 \\
-MassiveScenarioClassification & el & 55.47 & 64.58 & 66.14 & 68.81 & 48.96 \\
-MassiveScenarioClassification & es & 52.77 & 63.61 & 65.04 & 70.42 & 73.34 \\
-MassiveScenarioClassification & fa & 52.5 & 67.46 & 65.86 & 69.88 & 53.17 \\
-MassiveScenarioClassification & fi & 52.63 & 64.58 & 63.75 & 67.6 & 44.69 \\
-MassiveScenarioClassification & fr & 54.32 & 65.1 & 66.06 & 70.69 & 72.91 \\
-MassiveScenarioClassification & he & 52.41 & 63.53 & 59.2 & 65.16 & 43.1 \\
-MassiveScenarioClassification & hi & 47.37 & 64.4 & 65.21 & 67.92 & 69.27 \\
-MassiveScenarioClassification & hu & 53.43 & 65.82 & 66.56 & 70.3 & 45.16 \\
-MassiveScenarioClassification & hy & 33.57 & 61.25 & 56.11 & 63.02 & 38.73 \\
-MassiveScenarioClassification & id & 54.38 & 65.84 & 66.16 & 70.73 & 70.13 \\
-MassiveScenarioClassification & is & 49.78 & 61.94 & 37.52 & 44.16 & 44.21 \\
-MassiveScenarioClassification & it & 54.84 & 64.09 & 65.0 & 69.73 & 65.57 \\
-MassiveScenarioClassification & ja & 54.12 & 67.72 & 66.5 & 69.69 & 65.76 \\
-MassiveScenarioClassification & jv & 32.71 & 58.29 & 38.6 & 44.2 & 54.79 \\
-MassiveScenarioClassification & ka & 26.92 & 53.38 & 50.66 & 57.3 & 32.99 \\
-MassiveScenarioClassification & km & 27.23 & 56.18 & 46.96 & 53.14 & 39.34 \\
-MassiveScenarioClassification & kn & 10.06 & 61.74 & 45.73 & 56.08 & 60.5 \\
-MassiveScenarioClassification & ko & 52.01 & 67.26 & 55.66 & 68.52 & 55.69 \\
-MassiveScenarioClassification & lv & 44.82 & 61.87 & 59.8 & 66.28 & 44.35 \\
-MassiveScenarioClassification & ml & 49.1 & 62.26 & 47.69 & 60.13 & 65.53 \\
-MassiveScenarioClassification & mn & 21.51 & 62.6 & 57.07 & 60.85 & 38.72 \\
-MassiveScenarioClassification & ms & 53.6 & 65.63 & 61.71 & 65.81 & 64.99 \\
-MassiveScenarioClassification & my & 29.72 & 62.94 & 59.1 & 63.03 & 36.84 \\
-MassiveScenarioClassification & nb & 43.9 & 64.29 & 64.25 & 70.24 & 51.8 \\
-MassiveScenarioClassification & nl & 53.33 & 65.16 & 65.52 & 70.37 & 56.32 \\
-MassiveScenarioClassification & pl & 52.92 & 64.56 & 65.04 & 68.99 & 49.98 \\
-MassiveScenarioClassification & pt & 53.41 & 63.28 & 65.79 & 70.09 & 71.46 \\
-MassiveScenarioClassification & ro & 50.48 & 62.41 & 64.17 & 67.95 & 53.69 \\
-MassiveScenarioClassification & ru & 51.84 & 65.25 & 65.24 & 69.92 & 61.6 \\
-MassiveScenarioClassification & sl & 51.29 & 64.25 & 64.01 & 70.81 & 48.04 \\
-MassiveScenarioClassification & sq & 55.65 & 64.54 & 64.31 & 69.63 & 50.06 \\
-MassiveScenarioClassification & sv & 54.64 & 66.01 & 67.14 & 71.6 & 51.73 \\
-MassiveScenarioClassification & sw & 42.04 & 58.36 & 34.86 & 37.29 & 54.22 \\
-MassiveScenarioClassification & ta & 36.72 & 59.08 & 42.62 & 55.96 & 62.77 \\
-MassiveScenarioClassification & te & 42.08 & 64.13 & 46.46 & 58.81 & 62.59 \\
-MassiveScenarioClassification & th & 52.15 & 64.34 & 67.01 & 69.44 & 45.18 \\
-MassiveScenarioClassification & tl & 37.34 & 60.23 & 37.37 & 43.99 & 52.06 \\
-MassiveScenarioClassification & tr & 52.56 & 65.43 & 66.55 & 70.4 & 47.21 \\
-MassiveScenarioClassification & ur & 32.6 & 61.52 & 60.43 & 62.9 & 64.26 \\
-MassiveScenarioClassification & vi & 50.97 & 61.05 & 60.72 & 65.71 & 70.61 \\
-MassiveScenarioClassification & zh-CN & 50.22 & 70.85 & 67.44 & 71.23 & 73.95 \\
-MassiveScenarioClassification & zh-TW & 42.32 & 67.08 & 65.7 & 68.73 & 70.3 \\
-MTOPDomainClassification & de & 74.08 & 86.95 & 79.2 & 85.73 & 82.05 \\
-MTOPDomainClassification & es & 73.47 & 84.07 & 83.04 & 86.96 & 93.55 \\
-MTOPDomainClassification & fr & 72.26 & 84.14 & 78.63 & 81.21 & 90.98 \\
-MTOPDomainClassification & hi & 72.95 & 85.11 & 81.36 & 84.76 & 89.33 \\
-MTOPDomainClassification & th & 72.68 & 81.24 & 79.99 & 82.51 & 60.49 \\
-MTOPIntentClassification & de & 51.62 & 63.42 & 54.23 & 61.27 & 61.92 \\
-MTOPIntentClassification & es & 52.75 & 64.44 & 60.28 & 66.59 & 74.49 \\
-MTOPIntentClassification & fr & 50.12 & 62.01 & 54.05 & 59.76 & 69.12 \\
-MTOPIntentClassification & hi & 45.55 & 62.58 & 59.9 & 62.37 & 64.85 \\
-MTOPIntentClassification & th & 50.07 & 64.61 & 61.96 & 64.8 & 49.36 \\
-Average & mix & 42.85 & 60.77 & 54.87 & 60.39 & 54.4 \\
diff --git a/plotstables/multilingsts.txt b/plotstables/multilingsts.txt
deleted file mode 100644
index 2cd0b762..00000000
--- a/plotstables/multilingsts.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-Dataset & Language & LASER2 & LaBSE & MiniLM-L12-multilingual & MPNet-multilingual & SGPT-BLOOM-7.1B-msmarco \\
-STS17 & ko-ko & 70.52 & 71.32 & 77.03 & 83.41 & 66.89 \\
-STS17 & ar-ar & 67.47 & 69.07 & 79.16 & 79.1 & 76.42 \\
-STS17 & en-ar & 65.05 & 74.51 & 81.22 & 80.85 & 78.07 \\
-STS17 & en-de & 66.66 & 73.85 & 84.22 & 83.28 & 59.1 \\
-STS17 & en-tr & 70.05 & 72.07 & 76.74 & 74.9 & 11.8 \\
-STS17 & es-en & 55.3 & 65.71 & 84.44 & 86.11 & 78.22 \\
-STS17 & es-es & 79.67 & 80.83 & 85.56 & 85.14 & 86.0 \\
-STS17 & fr-en & 70.82 & 76.98 & 76.59 & 81.17 & 80.46 \\
-STS17 & it-en & 70.98 & 76.99 & 82.35 & 84.24 & 51.58 \\
-STS17 & nl-en & 68.12 & 75.22 & 81.71 & 82.51 & 45.85 \\
-STS22 & de & 25.69 & 48.58 & 44.64 & 46.7 & 30.05 \\
-STS22 & es & 54.92 & 63.18 & 56.56 & 59.91 & 65.41 \\
-STS22 & pl & 18.34 & 39.3 & 33.74 & 33.65 & 31.13 \\
-STS22 & tr & 36.97 & 58.15 & 53.39 & 56.3 & 47.14 \\
-STS22 & ar & 42.57 & 57.67 & 46.2 & 52.19 & 58.67 \\
-STS22 & ru & 39.24 & 57.49 & 57.08 & 58.74 & 43.36 \\
-STS22 & zh & 49.41 & 63.02 & 58.75 & 61.75 & 66.78 \\
-STS22 & fr & 58.61 & 77.95 & 70.55 & 74.3 & 80.38 \\
-STS22 & de-en & 32.35 & 50.14 & 52.65 & 50.81 & 51.16 \\
-STS22 & es-en & 54.34 & 71.86 & 67.33 & 70.26 & 75.06 \\
-STS22 & it & 60.31 & 72.22 & 55.22 & 60.65 & 65.65 \\
-STS22 & pl-en & 53.63 & 69.41 & 69.02 & 73.07 & 53.31 \\
-STS22 & zh-en & 46.19 & 64.02 & 65.71 & 67.96 & 68.45 \\
-STS22 & es-it & 42.21 & 69.69 & 47.67 & 53.7 & 65.5 \\
-STS22 & de-fr & 37.41 & 53.28 & 51.73 & 62.34 & 53.28 \\
-STS22 & de-pl & 15.67 & 58.69 & 44.22 & 40.53 & 43.05 \\
-STS22 & fr-pl & 39.44 & 61.98 & 50.71 & 84.52 & 28.17 \\
-Average & mix & 51.55 & 65.67 & 64.23 & 67.71 & 57.81 \\
diff --git a/plotstables/multilingual.png b/plotstables/multilingual.png
deleted file mode 100644
index f457c4ed..00000000
Binary files a/plotstables/multilingual.png and /dev/null differ
diff --git a/plotstables/multilingual_clf.pdf b/plotstables/multilingual_clf.pdf
deleted file mode 100644
index e9e8a3a2..00000000
Binary files a/plotstables/multilingual_clf.pdf and /dev/null differ
diff --git a/plotstables/multilingual_clf.png b/plotstables/multilingual_clf.png
deleted file mode 100644
index 6a9a284e..00000000
Binary files a/plotstables/multilingual_clf.png and /dev/null differ
diff --git a/plotstables/multilingual_sts.pdf b/plotstables/multilingual_sts.pdf
deleted file mode 100644
index c0792d07..00000000
Binary files a/plotstables/multilingual_sts.pdf and /dev/null differ
diff --git a/plotstables/multilingual_sts.png b/plotstables/multilingual_sts.png
deleted file mode 100644
index 814cf7c3..00000000
Binary files a/plotstables/multilingual_sts.png and /dev/null differ
diff --git a/plotstables/multilingual_tatoeba.pdf b/plotstables/multilingual_tatoeba.pdf
deleted file mode 100644
index 1c9ec904..00000000
Binary files a/plotstables/multilingual_tatoeba.pdf and /dev/null differ
diff --git a/plotstables/multilingual_tatoeba.png b/plotstables/multilingual_tatoeba.png
deleted file mode 100644
index 30ea522a..00000000
Binary files a/plotstables/multilingual_tatoeba.png and /dev/null differ
diff --git a/plotstables/results_to_avg_table.py b/plotstables/results_to_avg_table.py
deleted file mode 100644
index 8ede5a2b..00000000
--- a/plotstables/results_to_avg_table.py
+++ /dev/null
@@ -1,259 +0,0 @@
-import json
-import os
-import sys
-
-from mteb import MTEB
-
-### GLOBAL VARIABLES ###
-
-TASK_LIST_CLASSIFICATION = [
- "AmazonCounterfactualClassification",
- "AmazonPolarityClassification",
- "AmazonReviewsClassification",
- "Banking77Classification",
- "EmotionClassification",
- "ImdbClassification",
- "MassiveIntentClassification",
- "MassiveScenarioClassification",
- "MTOPDomainClassification",
- "MTOPIntentClassification",
- "ToxicConversationsClassification",
- "TweetSentimentExtractionClassification",
-]
-
-TASK_LIST_CLUSTERING = [
- "ArxivClusteringP2P",
- "ArxivClusteringS2S",
- "BiorxivClusteringP2P",
- "BiorxivClusteringS2S",
- "MedrxivClusteringP2P",
- "MedrxivClusteringS2S",
- "RedditClustering",
- "RedditClusteringP2P",
- "StackExchangeClustering",
- "StackExchangeClusteringP2P",
- "TwentyNewsgroupsClustering",
-]
-
-TASK_LIST_PAIR_CLASSIFICATION = [
- "SprintDuplicateQuestions",
- "TwitterSemEval2015",
- "TwitterURLCorpus",
-]
-
-TASK_LIST_RERANKING = [
- "AskUbuntuDupQuestions",
- "MindSmallReranking",
- "SciDocsRR",
- "StackOverflowDupQuestions",
-]
-
-TASK_LIST_RETRIEVAL = [
- "ArguAna",
- "ClimateFEVER",
- "CQADupstackRetrieval",
- "DBPedia",
- "FEVER",
- "FiQA2018",
- "HotpotQA",
- "MSMARCO",
- "NFCorpus",
- "NQ",
- "QuoraRetrieval",
- "SCIDOCS",
- "SciFact",
- "Touche2020",
- "TRECCOVID",
-]
-
-TASK_LIST_STS = [
- "BIOSSES",
- "SICK-R",
- "STS12",
- "STS13",
- "STS14",
- "STS15",
- "STS16",
- "STS17",
- "STS22",
- "STSBenchmark",
-]
-
-
-TASK_LIST_SUMMARIZATION = [
- "SummEval",
-]
-
-TASK_LIST_EN = (
- TASK_LIST_CLASSIFICATION
- + TASK_LIST_CLUSTERING
- + TASK_LIST_PAIR_CLASSIFICATION
- + TASK_LIST_RERANKING
- + TASK_LIST_RETRIEVAL
- + TASK_LIST_STS
- + TASK_LIST_SUMMARIZATION
-)
-
-
-TASK_LIST_NAMES = [
- ("Class.", TASK_LIST_CLASSIFICATION, ["en", "en-en"]),
- ("Clust.", TASK_LIST_CLUSTERING, ["en", "en-en"]),
- ("PairClass.", TASK_LIST_PAIR_CLASSIFICATION, ["en", "en-en"]),
- ("Rerank.", TASK_LIST_RERANKING, ["en", "en-en"]),
- ("Retr.", TASK_LIST_RETRIEVAL, ["en", "en-en"]),
- ("STS", TASK_LIST_STS, ["en", "en-en"]),
- ("Summ.", TASK_LIST_SUMMARIZATION, ["en", "en-en"]),
- # ("BitextMining", TASK_LIST_BITEXT, []),
- ("Avg.", TASK_LIST_EN, ["en", "en-en"]),
-]
-
-SELFSUPERVISED_MODELS = [
- "glove.6B.300d",
- "komninos",
- "bert-base-uncased",
- "unsup-simcse-bert-base-uncased",
-]
-
-SUPERVISED_MODELS = [
- "sup-simcse-bert-base-uncased",
- "msmarco-bert-co-condensor",
- "contriever-base-msmarco",
- "allenai-specter",
- "LaBSE",
- "LASER2",
- "all-MiniLM-L6-v2",
- "all-MiniLM-L12-v2",
- "paraphrase-multilingual-MiniLM-L12-v2",
- "all-mpnet-base-v2",
- "paraphrase-multilingual-mpnet-base-v2",
- "text-similarity-ada-001",
- "SGPT-125M-weightedmean-nli-bitfit",
- "SGPT-5.8B-weightedmean-nli-bitfit",
- "SGPT-125M-weightedmean-msmarco-specb-bitfit",
- "SGPT-1.3B-weightedmean-msmarco-specb-bitfit",
- "SGPT-2.7B-weightedmean-msmarco-specb-bitfit",
- "SGPT-5.8B-weightedmean-msmarco-specb-bitfit",
- "sgpt-bloom-7b1-msmarco",
- "gtr-t5-base", # 110M
- "gtr-t5-large",
- "gtr-t5-xl",
- "gtr-t5-xxl", # 4.8B
- "sentence-t5-base", # 110M
- "sentence-t5-large",
- "sentence-t5-xl",
- "sentence-t5-xxl", # 4.8B
-]
-
-MODEL_TO_NAME = {
- "bert-base-uncased": "BERT",
- "gtr-t5-base": "GTR-Base",
- "gtr-t5-large": "GTR-Large",
- "gtr-t5-xl": "GTR-XL",
- "gtr-t5-xxl": "GTR-XXL",
- "sentence-t5-base": "ST5-Base",
- "sentence-t5-large": "ST5-Large",
- "sentence-t5-xl": "ST5-XL",
- "sentence-t5-xxl": "ST5-XXL",
- "SGPT-125M-weightedmean-msmarco-specb-bitfit": "SGPT-125M-msmarco",
- "SGPT-1.3B-weightedmean-msmarco-specb-bitfit": "SGPT-1.3B-msmarco",
- "SGPT-2.7B-weightedmean-msmarco-specb-bitfit": "SGPT-2.7B-msmarco",
- "SGPT-5.8B-weightedmean-msmarco-specb-bitfit": "SGPT-5.8B-msmarco",
- "sgpt-bloom-7b1-msmarco": "SGPT-BLOOM-7.1B-msmarco",
- "SGPT-125M-weightedmean-nli-bitfit": "SGPT-125M-nli",
- "SGPT-5.8B-weightedmean-nli-bitfit": "SGPT-5.8B-nli",
- "sup-simcse-bert-base-uncased": "SimCSE-BERT-sup",
- "contriever-base-msmarco": "Contriever",
- "msmarco-bert-co-condensor": "coCondenser-msmarco", # They write it as coCondenser in the paper
- "unsup-simcse-bert-base-uncased": "SimCSE-BERT-unsup",
- "glove.6B.300d": "Glove",
- "komninos": "Komninos",
- "all-MiniLM-L6-v2": "MiniLM-L6",
- "all-MiniLM-L12-v2": "MiniLM-L12",
- "paraphrase-multilingual-MiniLM-L12-v2": "MiniLM-L12-multilingual",
- "all-mpnet-base-v2": "MPNet",
- "paraphrase-multilingual-mpnet-base-v2": "MPNet-multilingual",
- "allenai-specter": "SPECTER",
- "text-similarity-ada-001": "Ada Similarity",
-}
-
-
-### LOGIC ###
-
-results_folder = sys.argv[1].strip("/")
-all_results = {}
-
-for model_name in os.listdir(results_folder):
- model_res_folder = os.path.join(results_folder, model_name)
- if os.path.isdir(model_res_folder):
- all_results.setdefault(model_name, {})
- for file_name in os.listdir(model_res_folder):
- if not file_name.endswith(".json"):
- print(f"Skipping non-json {file_name}")
- continue
- with open(os.path.join(model_res_folder, file_name), "r", encoding="utf-8") as f:
- results = json.load(f)
- all_results[model_name] = {**all_results[model_name], **{file_name.replace(".json", ""): results}}
-
-def get_row(dataset, model_name, limit_langs=[], skip_langs=[]):
- # CQADupstackRetrieval uses the same metric as its subsets
- tasks = MTEB(tasks=[dataset.replace("CQADupstackRetrieval", "CQADupstackTexRetrieval")]).tasks
- assert len(tasks) == 1, f"Found {len(tasks)} for {dataset}. Expected 1."
- main_metric = tasks[0].description["main_score"]
- test_result = all_results.get(model_name, {}). get(dataset, {})
-
- # Dev / Val set is used for MSMARCO (See BEIR paper)
- if "MSMARCO" in dataset:
- test_result = (
- test_result.get("dev") if "dev" in test_result else test_result.get("validation")
- )
- else:
- test_result = test_result.get("test")
-
- for lang in tasks[0].description["eval_langs"]:
- if (limit_langs and lang not in limit_langs) or (skip_langs and lang in skip_langs):
- continue
- elif test_result is None:
- raise NotImplementedError(f"Got no test result {test_result} for ds: {dataset} model: {model_name}")
-
- test_result_lang = test_result.get(lang, test_result)
- if main_metric == "cosine_spearman":
- test_result_lang = test_result_lang.get("cos_sim", {}).get("spearman")
- elif main_metric == "ap":
- test_result_lang = test_result_lang.get("cos_sim", {}).get("ap")
- else:
- test_result_lang = test_result_lang.get(main_metric)
-
- if test_result_lang is None:
- raise NotImplementedError
-
- return test_result_lang
- raise NotImplementedError
-
-
-table = "Task ($\ rightarrow$) & " + " & ".join([x[0] for x in TASK_LIST_NAMES]) + " \\\\" + "\n"
-table += "Num. Datasets ($\ rightarrow$) & " + " & ".join([str(len(x[1])) for x in TASK_LIST_NAMES]) + " \\\\" + "\n"
-table += "Model ($\downarrow$) & " + " & ".join([x[0] for x in TASK_LIST_NAMES]) + " \\\\" + "\n"
-
-
-def add_to_table(model_list, table):
- for model in model_list:
- results = []
- for (task_name, task_list, limit_langs) in TASK_LIST_NAMES:
- try:
- model_task_results = [get_row(task, model, limit_langs=limit_langs) for task in task_list]
- except:
- results.append("")
- continue
- results.append(str(round(100 * (sum(model_task_results) / len(model_task_results)), 2)))
-
- model_name = MODEL_TO_NAME.get(model, model)
- table += model_name + " & " + " & ".join(results) + " \\\\" + "\n"
- return table
-
-
-table = add_to_table(SELFSUPERVISED_MODELS, table)
-table = add_to_table(SUPERVISED_MODELS, table)
-
-with open("avg_table.txt", "w") as f:
- f.write(table)
-
diff --git a/plotstables/results_to_heatmap.py b/plotstables/results_to_heatmap.py
deleted file mode 100644
index 3d9461e7..00000000
--- a/plotstables/results_to_heatmap.py
+++ /dev/null
@@ -1,313 +0,0 @@
-"""
-Usage: python results_to_heatmap.py results_folder_path
-results_folder_path contains results of multiple models whose folders should be named after them
-Source: https://medium.com/@szabo.bibor/how-to-create-a-seaborn-correlation-heatmap-in-python-834c0686b88e
-"""
-import json
-import os
-import sys
-
-from mteb import MTEB
-import numpy as np
-import pandas as pd
-
-TASK_LIST_BITEXT = [
- "BUCC",
- "Tatoeba",
-]
-
-TASK_LIST_CLASSIFICATION = [
- "AmazonCounterfactualClassification",
- "AmazonPolarityClassification",
- "AmazonReviewsClassification",
- "Banking77Classification",
- "EmotionClassification",
- "ImdbClassification",
- "MassiveIntentClassification",
- "MassiveScenarioClassification",
- "MTOPDomainClassification",
- "MTOPIntentClassification",
- "ToxicConversationsClassification",
- "TweetSentimentExtractionClassification",
-]
-
-TASK_LIST_CLUSTERING = [
- "ArxivClusteringP2P",
- "ArxivClusteringS2S",
- "BiorxivClusteringP2P",
- "BiorxivClusteringS2S",
- "MedrxivClusteringP2P",
- "MedrxivClusteringS2S",
- "RedditClustering",
- "RedditClusteringP2P",
- "StackExchangeClustering",
- "StackExchangeClusteringP2P",
- "TwentyNewsgroupsClustering",
-]
-
-TASK_LIST_PAIR_CLASSIFICATION = [
- "SprintDuplicateQuestions",
- "TwitterSemEval2015",
- "TwitterURLCorpus",
-]
-
-TASK_LIST_RERANKING = [
- "AskUbuntuDupQuestions",
- "MindSmallReranking",
- "SciDocsRR",
- "StackOverflowDupQuestions",
-]
-
-TASK_LIST_RETRIEVAL = [
- "ArguAna",
- "ClimateFEVER",
- "CQADupstackRetrieval",
- "DBPedia",
- "FEVER",
- "FiQA2018",
- "HotpotQA",
- "MSMARCO",
- "NFCorpus",
- "NQ",
- "QuoraRetrieval",
- "SCIDOCS",
- "SciFact",
- "Touche2020",
- "TRECCOVID",
-]
-
-TASK_LIST_STS = [
- "BIOSSES",
- "SICK-R",
- "STS12",
- "STS13",
- "STS14",
- "STS15",
- "STS16",
- "STS17",
- "STS22",
- "STSBenchmark",
-]
-
-TASK_LIST_SUMMARIZATION = [
- "SummEval",
-]
-
-TASK_LIST = (
- TASK_LIST_BITEXT
- + TASK_LIST_CLASSIFICATION
- + TASK_LIST_CLUSTERING
- + TASK_LIST_PAIR_CLASSIFICATION
- + TASK_LIST_RERANKING
- + TASK_LIST_RETRIEVAL
- + TASK_LIST_STS
- + TASK_LIST_SUMMARIZATION
-)
-
-TASK_LIST_EN = (
- TASK_LIST_CLASSIFICATION
- + TASK_LIST_CLUSTERING
- + TASK_LIST_PAIR_CLASSIFICATION
- + TASK_LIST_RERANKING
- + TASK_LIST_RETRIEVAL
- + TASK_LIST_STS
- + TASK_LIST_SUMMARIZATION
-)
-
-TASK_LIST_NAMES = [
- ("Class.", TASK_LIST_CLASSIFICATION, ["en", "en-en"]),
- ("Clust.", TASK_LIST_CLUSTERING, ["en", "en-en"]),
- ("PairClass.", TASK_LIST_PAIR_CLASSIFICATION, ["en", "en-en"]),
- ("Rerank.", TASK_LIST_RERANKING, ["en", "en-en"]),
- ("Retr.", TASK_LIST_RETRIEVAL, ["en", "en-en"]),
- ("STS", TASK_LIST_STS, ["en", "en-en"]),
- ("Summ.", TASK_LIST_SUMMARIZATION, ["en", "en-en"]),
- # ("BitextMining", TASK_LIST_BITEXT, []),
- # ("Avg.", TASK_LIST_EN, ["en", "en-en"]),
-]
-
-MODEL_TO_NAME = {
- "bert-base-uncased": "BERT",
- "gtr-t5-base": "GTR-Base",
- "gtr-t5-large": "GTR-Large",
- "gtr-t5-xl": "GTR-XL",
- "gtr-t5-xxl": "GTR-XXL",
- "sentence-t5-base": "ST5-Base",
- "sentence-t5-large": "ST5-Large",
- "sentence-t5-xl": "ST5-XL",
- "sentence-t5-xxl": "ST5-XXL",
- "SGPT-125M-weightedmean-msmarco-specb-bitfit": "SGPT-125M-msmarco",
- "SGPT-1.3B-weightedmean-msmarco-specb-bitfit": "SGPT-1.3B-msmarco",
- "SGPT-2.7B-weightedmean-msmarco-specb-bitfit": "SGPT-2.7B-msmarco",
- "SGPT-5.8B-weightedmean-msmarco-specb-bitfit": "SGPT-5.8B-msmarco",
- "sgpt-bloom-7b1-msmarco": "SGPT-BLOOM-7.1B-msmarco",
- "SGPT-125M-weightedmean-nli-bitfit": "SGPT-125M-nli",
- "SGPT-5.8B-weightedmean-nli-bitfit": "SGPT-5.8B-nli",
- "sup-simcse-bert-base-uncased": "SimCSE-BERT-sup",
- "contriever-base-msmarco": "Contriever",
- "msmarco-bert-co-condensor": "coCondenser-msmarco", # They write it as coCondenser in the paper
- "unsup-simcse-bert-base-uncased": "SimCSE-BERT-unsup",
- "glove.6B.300d": "Glove",
- "komninos": "Komninos",
- "all-MiniLM-L6-v2": "MiniLM-L6",
- "all-MiniLM-L12-v2": "MiniLM-L12",
- "paraphrase-multilingual-MiniLM-L12-v2": "MiniLM-L12-multilingual",
- "all-mpnet-base-v2": "MPNet",
- "paraphrase-multilingual-mpnet-base-v2": "MPNet-multilingual",
- "allenai-specter": "SPECTER",
- # "text-similarity-ada-001": "Ada Similarity",
-}
-
-SELFSUPERVISED_MODELS = [
- "glove.6B.300d",
- "komninos",
- "bert-base-uncased",
- "unsup-simcse-bert-base-uncased",
-]
-
-SUPERVISED_MODELS = [
- "sup-simcse-bert-base-uncased",
- "msmarco-bert-co-condensor",
- "contriever-base-msmarco",
- "allenai-specter",
- "LaBSE",
- "LASER2",
- "all-MiniLM-L6-v2",
- "all-MiniLM-L12-v2",
- "paraphrase-multilingual-MiniLM-L12-v2",
- "all-mpnet-base-v2",
- "paraphrase-multilingual-mpnet-base-v2",
- # "text-similarity-ada-001",
- "SGPT-125M-weightedmean-nli-bitfit",
- "SGPT-5.8B-weightedmean-nli-bitfit",
- "SGPT-125M-weightedmean-msmarco-specb-bitfit",
- "SGPT-1.3B-weightedmean-msmarco-specb-bitfit",
- "SGPT-2.7B-weightedmean-msmarco-specb-bitfit",
- "SGPT-5.8B-weightedmean-msmarco-specb-bitfit",
- "sgpt-bloom-7b1-msmarco",
- "gtr-t5-base", # 110M
- "gtr-t5-large",
- "gtr-t5-xl",
- "gtr-t5-xxl", # 4.8B
- "sentence-t5-base", # 110M
- "sentence-t5-large",
- "sentence-t5-xl",
- "sentence-t5-xxl", # 4.8B
-]
-
-results_folder = sys.argv[1].strip("/")
-
-all_results = {}
-
-for model_name in os.listdir(results_folder):
- model_res_folder = os.path.join(results_folder, model_name)
- if os.path.isdir(model_res_folder):
- all_results.setdefault(model_name, {})
- for file_name in os.listdir(model_res_folder):
- if not file_name.endswith(".json"):
- print(f"Skipping non-json {file_name}")
- continue
- with open(os.path.join(model_res_folder, file_name), "r", encoding="utf-8") as f:
- results = json.load(f)
- all_results[model_name] = {**all_results[model_name], **{file_name.replace(".json", ""): results}}
-
-def get_row(dataset, model_name, limit_langs=[], skip_langs=[]):
- # CQADupstackRetrieval uses the same metric as its subsets
- tasks = MTEB(tasks=[dataset.replace("CQADupstackRetrieval", "CQADupstackTexRetrieval")]).tasks
- assert len(tasks) == 1, f"Found {len(tasks)} for {dataset}. Expected 1."
- main_metric = tasks[0].description["main_score"]
- test_result = all_results.get(model_name, {}). get(dataset, {})
-
- # Dev / Val set is used for MSMARCO (See BEIR paper)
- if "MSMARCO" in dataset:
- test_result = (
- test_result.get("dev") if "dev" in test_result else test_result.get("validation")
- )
- else:
- test_result = test_result.get("test")
-
- for lang in tasks[0].description["eval_langs"]:
- if (limit_langs and lang not in limit_langs) or (skip_langs and lang in skip_langs):
- continue
- elif test_result is None:
- raise NotImplementedError(f"Got no test result {test_result} for ds: {dataset} model: {model_name}")
-
- test_result_lang = test_result.get(lang, test_result)
- if main_metric == "cosine_spearman":
- test_result_lang = test_result_lang.get("cos_sim", {}).get("spearman")
- elif main_metric == "ap":
- test_result_lang = test_result_lang.get("cos_sim", {}).get("ap")
- else:
- test_result_lang = test_result_lang.get(main_metric)
-
- if test_result_lang is None:
- raise NotImplementedError
-
- return test_result_lang
- raise NotImplementedError
-
-
-### MODEL HEATMAP
-
-model_dict = []
-
-for ds in TASK_LIST_EN:
- model_dict.append({MODEL_TO_NAME.get(model,model): get_row(ds, model, limit_langs=["en", "en-en"]) for model in SELFSUPERVISED_MODELS + SUPERVISED_MODELS})
-
-model_df = pd.DataFrame(model_dict)
-import seaborn as sns
-import matplotlib.pyplot as plt
-
-plt.figure(figsize=(20, 10))
-
-model_df = (model_df.corr() * 100).round(0).astype(int)
-
-# define the mask to set the values in the upper triangle to True
-mask = np.triu(np.ones_like(model_df, dtype=np.bool))
-heatmap = sns.heatmap(model_df, mask=mask, vmin=model_df.values.min(), vmax=model_df.values.max(), annot=True, fmt='g', cmap='Blues')
-# heatmap.set_title('Pearson Correlations of scores on MTEB', fontdict={'fontsize':18}, pad=16);
-
-plt.savefig('heatmap_model.pdf', dpi=300, bbox_inches='tight')
-
-data_dict = []
-
-
-### TASK HEATMAP
-for model in MODEL_TO_NAME:
- results = {}
- for (task_name, task_list, limit_langs) in TASK_LIST_NAMES:
- model_task_results = [get_row(task, model, limit_langs=limit_langs) for task in task_list]
- results[task_name] = np.mean(model_task_results)
- data_dict.append(results)
-
-data_df = pd.DataFrame(data_dict)
-data_df = (data_df.corr() * 100).round(0).astype(int)
-
-plt.figure(figsize=(20, 10))
-# define the mask to set the values in the upper triangle to True
-mask = np.triu(np.ones_like(data_df, dtype=np.bool))
-heatmap = sns.heatmap(data_df, mask=mask, vmin=data_df.values.min(), vmax=data_df.values.max(), annot=True, fmt='g', cmap='Blues')
-# heatmap.set_title('Pearson Correlations of tasks on MTEB', fontdict={'fontsize':18}, pad=16)
-
-plt.savefig('heatmap_tasks.pdf', dpi=300, bbox_inches='tight')
-
-exit()
-# The last heatmap is not used
-
-### DATA HEATMAP
-# This is to be differentiated from a heatmap of actual data content (e.g. via unigram Jaccard similarity)
-# E.g. for BEIR SciFact & HotpotQA have very low unigram Jaccard similarity, but in this method,
-# they get a high similarity score, because model scores seem to correlate on the datasrt
-
-for model, name in MODEL_TO_NAME.items():
- data_dict.append({ds: get_row(ds, model, limit_langs=["en", "en-en"]) for ds in TASK_LIST_EN})
-
-data_df = pd.DataFrame(data_dict)
-
-plt.figure(figsize=(128, 48))
-# define the mask to set the values in the upper triangle to True
-mask = np.triu(np.ones_like(data_df.corr(), dtype=np.bool))
-heatmap = sns.heatmap(data_df.corr(), mask=mask, vmin=-1, vmax=1, annot=True, cmap='Blues')
-heatmap.set_title('Pearson Correlations of scores on MTEB', fontdict={'fontsize':18}, pad=16)
-
-plt.savefig('heatmap_data.pdf', dpi=300, bbox_inches='tight')
diff --git a/plotstables/results_to_multilingual.py b/plotstables/results_to_multilingual.py
deleted file mode 100644
index f89263f9..00000000
--- a/plotstables/results_to_multilingual.py
+++ /dev/null
@@ -1,329 +0,0 @@
-"""
-Usage: python results_to_multilingual.py results_folder_path
-Make sure the final directory results_folder_path is the name of your model
-"""
-import json
-import os
-import sys
-
-### GLOBAL VARIABLES ###
-
-TASK_LIST_BITEXT = [
- "BUCC",
- "Tatoeba",
-]
-
-BITEXT_MODELS = MULTILING_MODELS = [
- "LaBSE",
- "LASER2",
- "paraphrase-multilingual-MiniLM-L12-v2",
- "paraphrase-multilingual-mpnet-base-v2",
- "sgpt-bloom-7b1-msmarco",
- # "sgpt-bloom-1b3-nli", # Not too interesting
-]
-
-MODEL_TO_NAME = {
- "bert-base-uncased": "BERT",
- "gtr-t5-base": "GTR-Base",
- "gtr-t5-large": "GTR-Large",
- "gtr-t5-xl": "GTR-XL",
- "gtr-t5-xxl": "GTR-XXL",
- "sentence-t5-base": "ST5-Base",
- "sentence-t5-large": "ST5-Large",
- "sentence-t5-xl": "ST5-XL",
- "sentence-t5-xxl": "ST5-XXL",
- "SGPT-125M-weightedmean-msmarco-specb-bitfit": "SGPT-125M-msmarco",
- "SGPT-1.3B-weightedmean-msmarco-specb-bitfit": "SGPT-1.3B-msmarco",
- "SGPT-2.7B-weightedmean-msmarco-specb-bitfit": "SGPT-2.7B-msmarco",
- "SGPT-5.8B-weightedmean-msmarco-specb-bitfit": "SGPT-5.8B-msmarco",
- "sgpt-bloom-7b1-msmarco": "SGPT-BLOOM-7.1B-msmarco",
- "SGPT-125M-weightedmean-nli-bitfit": "SGPT-125M-nli",
- "SGPT-5.8B-weightedmean-nli-bitfit": "SGPT-5.8B-nli",
- "sup-simcse-bert-base-uncased": "SimCSE-BERT-sup",
- "contriever-base-msmarco": "Contriever",
- "msmarco-bert-co-condensor": "coCondenser-msmarco", # They write it as coCondenser in the paper
- "unsup-simcse-bert-base-uncased": "SimCSE-BERT-unsup",
- "glove.6B.300d": "Glove",
- "komninos": "Komninos",
- "all-MiniLM-L6-v2": "MiniLM-L6",
- "all-MiniLM-L12-v2": "MiniLM-L12",
- "paraphrase-multilingual-MiniLM-L12-v2": "MiniLM-L12-multilingual",
- "all-mpnet-base-v2": "MPNet",
- "paraphrase-multilingual-mpnet-base-v2": "MPNet-multilingual",
- "allenai-specter": "SPECTER",
- "text-similarity-ada-001": "Ada Similarity",
-}
-
-# Base from:
-# https://coolors.co/palette/ff5400-ff6d00-ff8500-ff9100-ff9e00-00b4d8-0096c7-0077b6-023e8a-03045e
-# Yellow tones from:
-# https://coolors.co/palette/6ab6dc-49a6d4-2f94c6-277ba5-1f6284-e0b700-ffd20a-ffda33-ffe15c-ffe570
-# Green from:
-# https://coolors.co/palette/f94144-f3722c-f8961e-f9844a-f9c74f-90be6d-43aa8b-4d908e-577590-277da1
-MODEL_TO_COLOR = {
- "MiniLM": "#BAF19C",#"#017600", # Green
- "MPNet": "#F94144",#"#007A7A", # Light Green
- "GTR": "#FF5400",#"#221D91", # Blue 1
- "ST5": "#FF9E00",#"#86D4F1", # Blue 2
- "SGPT": "#00B4D8",#"#7B3FB9", # Purple
- "SimCSE": "#F9C74F",#"#2070B4", # Blue 3
- "LaBSE": "#F9C74F",#"#2070B4", # Blue 3
- "SPECTER": "#E0B700", # Shade of #2070B4
- "Glove": "#023E8A",#"#9BC7DD", # Light Blue
- "LASER2": "#03045E", # Grey
-}
-
-
-MULTILINGUAL_CLF = [
- "AmazonCounterfactualClassification",
- "AmazonReviewsClassification",
- "MassiveIntentClassification",
- "MassiveScenarioClassification",
- "MTOPDomainClassification",
- "MTOPIntentClassification",
-]
-
-MULTILINGUAL_STS = [
- "STS17",
- "STS22",
-]
-
-### LOGIC ###
-
-results_folder = sys.argv[1].strip("/")
-all_results = {}
-
-for model_name in os.listdir(results_folder):
- model_res_folder = os.path.join(results_folder, model_name)
- if os.path.isdir(model_res_folder):
- all_results.setdefault(model_name, {})
- for file_name in os.listdir(model_res_folder):
- if not file_name.endswith(".json"):
- print(f"Skipping non-json {file_name}")
- continue
- with open(os.path.join(model_res_folder, file_name), "r", encoding="utf-8") as f:
- results = json.load(f)
- all_results[model_name] = {**all_results[model_name], **{file_name.replace(".json", ""): results}}
-
-
-
-# Create a plot for each task with scaling of the model performances on this task
-import matplotlib.pyplot as plt
-import numpy as np
-
-fig, ax = plt.subplots(figsize=(64,12))
-
-markers = ["x", "o", "v", "*", "p"]
-
-# Compute averages
-scores = {}
-for i, model in enumerate(BITEXT_MODELS):
- if not(all_results.get(model, []).get("Tatoeba")):
- continue
- for lang, res in all_results[model]["Tatoeba"]["test"].items():
- if lang == "evaluation_time":
- continue
- scores.setdefault(lang, [])
- scores[lang].append(res["f1"])
-# Average
-scores = {k: np.mean(v) for k,v in scores.items()}
-scores_sorted = sorted(scores.items(), key=lambda x: x[-1], reverse=True)
-langs_sorted = [x[0] for x in scores_sorted]
-global_idx = {lang: langs_sorted.index(lang) for lang in scores}
-
-for i, model in enumerate(BITEXT_MODELS):
- scores = {}
-
- if not(all_results.get(model, []).get("Tatoeba")):
- continue
-
- for lang, res in all_results[model]["Tatoeba"]["test"].items():
- if lang == "evaluation_time":
- continue
- scores[lang] = res["f1"]
-
- # Optionally sort by LaBSE scores
- if i == 0:
- assert model == "LaBSE"
- scores_sorted = sorted(scores.items(), key=lambda x: x[-1], reverse=True)
- langs_sorted = [x[0] for x in scores_sorted]
- global_idx = {lang: langs_sorted.index(lang) for lang in scores}
-
- # Reverse is already accounted for in global_idx
- scores_sorted = sorted(scores.items(), key=lambda x: global_idx[x[0]], reverse=False)
- x_langs = [x[0] for x in scores_sorted]
- y_scores = [x[1] for x in scores_sorted]
-
- model_name = MODEL_TO_NAME.get(model, model)
- ax.plot(x_langs, y_scores, linewidth=6.0, label=model_name, marker=markers[i], color=MODEL_TO_COLOR.get(model_name.split("-")[0]))
-
-ax.set_ylabel("F1 score", fontsize=22)
-ax.margins(x=0.01) # Reduce whitespace left & right
-
-plt.xticks(rotation=45, fontsize=20) #plt.xticks(rotation=90, ha='right')
-plt.legend(fontsize=25)
-plt.savefig('multilingual_tatoeba.pdf', dpi=300, bbox_inches='tight')
-
-
-### CLASSIFICATION ###
-
-# Compute averages
-scores = {}
-for i, model in enumerate(BITEXT_MODELS):
- for ds in MULTILINGUAL_CLF:
- if not(all_results.get(model, []).get(ds)):
- continue
- for lang, res in all_results[model][ds]["test"].items():
- if lang == "evaluation_time":
- continue
- elif lang == "en-ext":
- lang = "en"
-
- scores.setdefault(lang, [])
- scores[lang].append(res["accuracy"])
-# Average
-scores = {k: np.mean(v) for k,v in scores.items()}
-scores_sorted = sorted(scores.items(), key=lambda x: x[-1], reverse=True)
-langs_sorted = [x[0] for x in scores_sorted]
-global_idx = {lang: langs_sorted.index(lang) for lang in scores}
-
-
-fig, ax = plt.subplots(figsize=(32,8))
-
-
-for i, model in enumerate(BITEXT_MODELS):
- scores = {}
- for ds in MULTILINGUAL_CLF:
- if not(all_results.get(model, []).get(ds)):
- continue
- for lang, res in all_results[model][ds]["test"].items():
- if lang == "evaluation_time":
- continue
- elif lang == "en-ext":
- lang = "en"
-
- scores.setdefault(lang, [])
- scores[lang].append(res["accuracy"])
-
- # Average scores for langs
- scores = {k: np.mean(v) for k,v in scores.items()}
-
- # Reverse is already accounted for in global_idx
- scores_sorted = sorted(scores.items(), key=lambda x: global_idx[x[0]], reverse=False)
- x_langs = [x[0] for x in scores_sorted]
- y_scores = [x[1] for x in scores_sorted]
- model_name = MODEL_TO_NAME.get(model, model)
- ax.plot(x_langs, y_scores, linewidth=6.0, label=model_name, marker=markers[i], color=MODEL_TO_COLOR.get(model_name.split("-")[0]))
-
-ax.set_ylabel("Accuracy", fontsize=22)
-
-plt.xticks(rotation=45, fontsize=20) #plt.xticks(rotation=90, ha='right')
-plt.legend(fontsize=25)
-
-plt.savefig('multilingual_clf.pdf', dpi=300, bbox_inches='tight')
-
-
-
-### STS ###
-
-# Compute averages
-scores_multi = {}
-scores_cross = {}
-
-for i, model in enumerate(BITEXT_MODELS):
- for ds in MULTILINGUAL_STS:
- if not(all_results.get(model, []).get(ds)):
- continue
- for lang, res in all_results[model][ds]["test"].items():
- if lang == "evaluation_time":
- continue
- multi = True
- if "-" in lang:
- l1, l2 = lang.split("-")
- if l1 != l2:
- multi = False
- else:
- lang = l1
- if multi:
- scores_multi.setdefault(lang, [])
- scores_multi[lang].append(res["cos_sim"]["spearman"])
- else:
- scores_cross.setdefault(lang, [])
- scores_cross[lang].append(res["cos_sim"]["spearman"])
-
-# Average
-scores = {k: np.mean(v) for k,v in scores_multi.items()}
-scores_sorted = sorted(scores.items(), key=lambda x: x[-1], reverse=True)
-langs_sorted = [x[0] for x in scores_sorted]
-global_idx_multi = {lang: langs_sorted.index(lang) for lang in scores}
-
-scores = {k: np.mean(v) for k,v in scores_cross.items()}
-scores_sorted = sorted(scores.items(), key=lambda x: x[-1], reverse=True)
-langs_sorted = [x[0] for x in scores_sorted]
-global_idx_cross = {lang: langs_sorted.index(lang) for lang in scores}
-
-
-
-fig, axes = plt.subplots(figsize=(32,8), ncols=2, nrows=1, sharey=True)
-
-ax_multi, ax_cross = axes
-
-for i, model in enumerate(BITEXT_MODELS):
- scores_multi = {}
- scores_cross = {}
- for ds in MULTILINGUAL_STS:
- if not(all_results.get(model, []).get(ds)):
- continue
- for lang, res in all_results[model][ds]["test"].items():
- if lang == "evaluation_time":
- continue
- multi = True
- if "-" in lang:
- l1, l2 = lang.split("-")
- if l1 != l2:
- multi = False
- else:
- lang = l1
-
- if multi:
- scores_multi.setdefault(lang, [])
- scores_multi[lang].append(res["cos_sim"]["spearman"])
- else:
- scores_cross.setdefault(lang, [])
- scores_cross[lang].append(res["cos_sim"]["spearman"])
-
- scores_multi = {k: np.mean(v) for k,v in scores_multi.items()}
- scores_cross = {k: np.mean(v) for k,v in scores_cross.items()}
-
- scores_sorted_multi = sorted(scores_multi.items(), key=lambda x: global_idx_multi[x[0]], reverse=False)
- scores_sorted_cross = sorted(scores_cross.items(), key=lambda x: global_idx_cross[x[0]], reverse=False)
-
- model_name = MODEL_TO_NAME.get(model, model)
- model_color = MODEL_TO_COLOR.get(model_name.split("-")[0])
-
- ax_multi.plot(
- [x[0] for x in scores_sorted_multi],
- [x[1] for x in scores_sorted_multi],
- label=model_name,
- marker=markers[i],
- color=model_color,
- linewidth=6.0,
- )
-
- ax_cross.plot(
- [x[0] for x in scores_sorted_cross],
- [x[1] for x in scores_sorted_cross],
- label=model_name,
- marker=markers[i],
- color=model_color,
- linewidth=6.0,
- )
-
-ax_multi.set_ylabel("Cos. Sim. Spearman Corr.", fontsize=22)
-
-ax_multi.tick_params(axis='both', which='minor', labelsize=20)
-ax_multi.tick_params(axis='both', which='major', labelsize=20)
-ax_cross.tick_params(axis='both', which='minor', labelsize=20, rotation=45)
-ax_cross.tick_params(axis='both', which='major', labelsize=20, rotation=45)
-
-plt.savefig('multilingual_sts.pdf', dpi=300, bbox_inches='tight')
diff --git a/plotstables/results_to_scale.py b/plotstables/results_to_scale.py
deleted file mode 100644
index 36bf8013..00000000
--- a/plotstables/results_to_scale.py
+++ /dev/null
@@ -1,274 +0,0 @@
-"""
-Creates scaling graphs
-Usage: python results_to_scale.py results_folder_path
-results_folder_path contains results of multiple models whose folders should be named after them
-"""
-import json
-import os
-import sys
-
-from mteb import MTEB
-import numpy as np
-
-### GLOBAL VARIABLES ###
-
-TASK_LIST_CLASSIFICATION = [
- "AmazonCounterfactualClassification",
- "AmazonPolarityClassification",
- "AmazonReviewsClassification",
- "Banking77Classification",
- "EmotionClassification",
- "ImdbClassification",
- "MassiveIntentClassification",
- "MassiveScenarioClassification",
- "MTOPDomainClassification",
- "MTOPIntentClassification",
- "ToxicConversationsClassification",
- "TweetSentimentExtractionClassification",
-]
-
-TASK_LIST_CLUSTERING = [
- "ArxivClusteringP2P",
- "ArxivClusteringS2S",
- "BiorxivClusteringP2P",
- "BiorxivClusteringS2S",
- "MedrxivClusteringP2P",
- "MedrxivClusteringS2S",
- "RedditClustering",
- "RedditClusteringP2P",
- "StackExchangeClustering",
- "StackExchangeClusteringP2P",
- "TwentyNewsgroupsClustering",
-]
-
-TASK_LIST_PAIR_CLASSIFICATION = [
- "SprintDuplicateQuestions",
- "TwitterSemEval2015",
- "TwitterURLCorpus",
-]
-
-TASK_LIST_RERANKING = [
- "AskUbuntuDupQuestions",
- "MindSmallReranking",
- "SciDocsRR",
- "StackOverflowDupQuestions",
-]
-
-TASK_LIST_RETRIEVAL = [
- "ArguAna",
- "ClimateFEVER",
- "CQADupstackRetrieval",
- "DBPedia",
- "FEVER",
- "FiQA2018",
- "HotpotQA",
- "MSMARCO",
- "NFCorpus",
- "NQ",
- "QuoraRetrieval",
- "SCIDOCS",
- "SciFact",
- "Touche2020",
- "TRECCOVID",
-]
-
-TASK_LIST_STS = [
- "BIOSSES",
- "SICK-R",
- "STS12",
- "STS13",
- "STS14",
- "STS15",
- "STS16",
- "STS17",
- "STS22",
- "STSBenchmark",
-]
-
-# Parameter counts in millions
-MODELS = [
-# Doesnt add a lot of value to the figure
-# [
-# ("MiniLM-L6", "all-MiniLM-L6-v2", 22.713216), # 22.7 M
-# ("MiniLM-L12", "all-MiniLM-L12-v2", 33.360000), # 33.4 M
-# ],
- [
- ("GTR-Base", "gtr-t5-base", 110),
- ("GTR-Large", "gtr-t5-large", 335),
- ("GTR-XL", "gtr-t5-xl", 1240),
- ("GTR-XXL", "gtr-t5-xxl", 4800),
- ],
- [
- ("ST5-Base", "sentence-t5-base", 110),
- ("ST5-Large", "sentence-t5-large", 335),
- ("ST5-XL", "sentence-t5-xl", 1240),
- ("ST5-XXL", "sentence-t5-xxl", 4800),
- ],
- [
- ("SGPT-125M-msmarco", "SGPT-125M-weightedmean-msmarco-specb-bitfit", 125),
- ("SGPT-1.3B-msmarco", "SGPT-1.3B-weightedmean-msmarco-specb-bitfit", 1300),
- ("SGPT-2.7B-msmarco", "SGPT-2.7B-weightedmean-msmarco-specb-bitfit", 2700),
- ("SGPT-5.8B-msmarco", "SGPT-5.8B-weightedmean-msmarco-specb-bitfit", 5800),
- ],
-]
-
-# todo: remove
-lines = ["blue2", "blue", "purple"]
-shades = ["lightblue2", "lightblue", "lightpurple"]
-colors = {
- "purple": "#7B3FB9",
- "lightpurple": "#CBB3E3",
- "blue": "#221D91",
- "lightblue": "#B6B4DB",
- "blue2": "#86D4F1",
- "lightblue2": "#AAF2F2",
-}
-
-
-MODEL_TO_MARKER = {
- "MiniLM": "o",
- "GTR": "x",
- "ST5": "*",
- "SGPT": "v",
-}
-
-# Base from:
-# https://coolors.co/palette/ff5400-ff6d00-ff8500-ff9100-ff9e00-00b4d8-0096c7-0077b6-023e8a-03045e
-# Yellow tones from:
-# https://coolors.co/palette/6ab6dc-49a6d4-2f94c6-277ba5-1f6284-e0b700-ffd20a-ffda33-ffe15c-ffe570
-# Green from:
-# https://coolors.co/palette/f94144-f3722c-f8961e-f9844a-f9c74f-90be6d-43aa8b-4d908e-577590-277da1
-MODEL_TO_COLOR = {
- "MiniLM": "#BAF19C",#"#017600", # Green
- "MPNet": "#F94144",#"#007A7A", # Light Green
- "GTR": "#FF5400",#"#221D91", # Blue 1
- "ST5": "#FF9E00",#"#86D4F1", # Blue 2
- "SGPT": "#00B4D8",#"#7B3FB9", # Purple
- "SimCSE": "#F9C74F",#"#2070B4", # Blue 3
- "LaBSE": "#F9C74F",#"#2070B4", # Blue 3
- "SPECTER": "#E0B700", # Shade of #2070B4
- "Glove": "#023E8A",#"#9BC7DD", # Light Blue
- "LASER2": "#03045E", # Grey
-}
-
-
-TASK_LIST_NAMES = [
- ("Classification", TASK_LIST_CLASSIFICATION, ["en", "en-en"], "accuracy"),
- ("Clustering", TASK_LIST_CLUSTERING, ["en", "en-en"], "v_measure"),
- ("PairClassification", TASK_LIST_PAIR_CLASSIFICATION, ["en", "en-en"], "ap"),
- ("Reranking", TASK_LIST_RERANKING, ["en", "en-en"], "map"),
- ("Retrieval", TASK_LIST_RETRIEVAL, ["en", "en-en"], "nDCG@10"),
- ("STS", TASK_LIST_STS, ["en", "en-en"], "cos. sim. spearman corr."),
-]
-
-
-### LOGIC ###
-
-results_folder = sys.argv[1].strip("/")
-
-all_results = {}
-
-for model_name in os.listdir(results_folder):
- model_res_folder = os.path.join(results_folder, model_name)
- if os.path.isdir(model_res_folder):
- all_results.setdefault(model_name, {})
- for file_name in os.listdir(model_res_folder):
- if not file_name.endswith(".json"):
- print(f"Skipping non-json {file_name}")
- continue
- with open(os.path.join(model_res_folder, file_name), "r", encoding="utf-8") as f:
- results = json.load(f)
- all_results[model_name] = {**all_results[model_name], **{file_name.replace(".json", ""): results}}
-
-
-def get_row(dataset, model_name, limit_langs=[], skip_langs=[]):
- # CQADupstackRetrieval uses the same metric as its subsets
- tasks = MTEB(tasks=[dataset.replace("CQADupstackRetrieval", "CQADupstackTexRetrieval")]).tasks
- assert len(tasks) == 1, f"Found {len(tasks)} for {dataset}. Expected 1."
- main_metric = tasks[0].description["main_score"]
- test_result = all_results.get(model_name, {}). get(dataset, {})
-
- # Dev / Val set is used for MSMARCO (See BEIR paper)
- if "MSMARCO" in dataset:
- test_result = (
- test_result.get("dev") if "dev" in test_result else test_result.get("validation")
- )
- else:
- test_result = test_result.get("test")
-
- for lang in tasks[0].description["eval_langs"]:
- if (limit_langs and lang not in limit_langs) or (skip_langs and lang in skip_langs):
- continue
- elif test_result is None:
- raise NotImplementedError(f"Got no test result {test_result} for ds: {dataset} model: {model_name}")
-
- test_result_lang = test_result.get(lang, test_result)
- if main_metric == "cosine_spearman":
- test_result_lang = test_result_lang.get("cos_sim", {}).get("spearman")
- elif main_metric == "ap":
- test_result_lang = test_result_lang.get("cos_sim", {}).get("ap")
- else:
- test_result_lang = test_result_lang.get(main_metric)
-
- if test_result_lang is None:
- raise NotImplementedError
-
- return test_result_lang
- raise NotImplementedError
-
-
-# Create a plot for each task with scaling of the model performances on this task
-import matplotlib.pyplot as plt
-
-fig, axes = plt.subplots(figsize=(16, 20), facecolor='w', edgecolor='k', ncols=2, nrows=3, sharey=False)
-
-
-# Create each boxplot
-model_xticks_global = ['0.1B', '1B','2B','4B']
-model_xticks_num_global = [np.log10(100_000_000), np.log10(1_000_000_000), np.log10(2_000_000_000), np.log10(4_000_000_000)]
-
-for ax, (task_name, task_list, limit_langs, metric) in zip(axes.flatten(), TASK_LIST_NAMES):
- for i, model_group in enumerate(MODELS):
- model_xticks_num = [np.log10(x[-1] * 1_000_000) for x in model_group]
- avg_scores = []
- std_scores = []
- for model in model_group:
- model_name = model[0]
- try:
- model_task_results = [get_row(task, model[1], limit_langs=limit_langs) for task in task_list]
- except:
- model_task_results = [0.5]
-
- avg_scores.append(np.mean(np.array(model_task_results)).item())
- std_scores.append(np.std(np.array(model_task_results)).item())
-
- ax.plot(
- model_xticks_num,
- avg_scores,
- label=model_name.split("-")[0],
- color=MODEL_TO_COLOR.get(model_name.split("-")[0]),
- marker=MODEL_TO_MARKER.get(model_name.split("-")[0])
- )
-
- # Shade doesn't look good, as std is too big
- # ax.fill_between(model_xticks_num, [avg-std for avg, std in zip(avg_scores, std_scores)], [avg+std for avg, std in zip(avg_scores, std_scores)], color=colors.get(shades[i]), alpha=0.5)
-
- ax.set_ylabel(f"Average Performance ({metric})", fontsize=16)
- ax.set_xlabel("Model Parameters (Billions)", fontsize=16)
- ax.set_xticks(model_xticks_num_global, model_xticks_global)
- ax.set_title(task_name, fontweight="bold", fontsize=20)
- ax.grid(alpha=0.5)
-
-# Create deduplicated Global Legend
-handles, labels = plt.gca().get_legend_handles_labels()
-by_label = dict(zip(labels, handles))
-fig.legend(
- by_label.values(),
- by_label.keys(),
- loc=(0.35, 0.94), # "upper center",
- ncol=len(by_label),
- frameon=False,
- fontsize=15,
-)
-
-plt.savefig('scale.pdf', dpi=300, bbox_inches='tight')
diff --git a/plotstables/results_to_tex.py b/plotstables/results_to_tex.py
deleted file mode 100644
index aea339b1..00000000
--- a/plotstables/results_to_tex.py
+++ /dev/null
@@ -1,308 +0,0 @@
-"""
-Usage: python results_to_tex.py results_folder_path
-results_folder_path contains results of multiple models whose folders should be named after them
-"""
-import json
-import os
-import sys
-
-from mteb import MTEB
-import numpy as np
-
-
-### GLOBAL VARIABLES ###
-
-
-TASK_LIST_BITEXT = [
- "BUCC",
- "Tatoeba",
-]
-
-TASK_LIST_CLASSIFICATION = [
- "AmazonCounterfactualClassification",
- "AmazonPolarityClassification",
- "AmazonReviewsClassification",
- "Banking77Classification",
- "EmotionClassification",
- "ImdbClassification",
- "MassiveIntentClassification",
- "MassiveScenarioClassification",
- "MTOPDomainClassification",
- "MTOPIntentClassification",
- "ToxicConversationsClassification",
- "TweetSentimentExtractionClassification",
-]
-
-TASK_LIST_CLUSTERING = [
- "ArxivClusteringP2P",
- "ArxivClusteringS2S",
- "BiorxivClusteringP2P",
- "BiorxivClusteringS2S",
- "MedrxivClusteringP2P",
- "MedrxivClusteringS2S",
- "RedditClustering",
- "RedditClusteringP2P",
- "StackExchangeClustering",
- "StackExchangeClusteringP2P",
- "TwentyNewsgroupsClustering",
-]
-
-TASK_LIST_PAIR_CLASSIFICATION = [
- "SprintDuplicateQuestions",
- "TwitterSemEval2015",
- "TwitterURLCorpus",
-]
-
-TASK_LIST_RERANKING = [
- "AskUbuntuDupQuestions",
- "MindSmallReranking",
- "SciDocsRR",
- "StackOverflowDupQuestions",
-]
-
-TASK_LIST_RETRIEVAL = [
- "ArguAna",
- "ClimateFEVER",
- "CQADupstackRetrieval",
- "DBPedia",
- "FEVER",
- "FiQA2018",
- "HotpotQA",
- "MSMARCO",
- "NFCorpus",
- "NQ",
- "QuoraRetrieval",
- "SCIDOCS",
- "SciFact",
- "Touche2020",
- "TRECCOVID",
-]
-
-TASK_LIST_STS = [
- "BIOSSES",
- "SICK-R",
- "STS12",
- "STS13",
- "STS14",
- "STS15",
- "STS16",
- "STS17",
- "STS22",
- "STSBenchmark",
-]
-
-TASK_LIST_SUMMARIZATION = [
- "SummEval",
-]
-
-TASK_LIST = (
- TASK_LIST_BITEXT
- + TASK_LIST_CLASSIFICATION
- + TASK_LIST_CLUSTERING
- + TASK_LIST_PAIR_CLASSIFICATION
- + TASK_LIST_RERANKING
- + TASK_LIST_RETRIEVAL
- + TASK_LIST_STS
- + TASK_LIST_SUMMARIZATION
-)
-
-TASK_LIST_EN = (
- TASK_LIST_CLASSIFICATION
- + TASK_LIST_CLUSTERING
- + TASK_LIST_PAIR_CLASSIFICATION
- + TASK_LIST_RERANKING
- + TASK_LIST_RETRIEVAL
- + TASK_LIST_STS
- + TASK_LIST_SUMMARIZATION
-)
-
-TASK_LIST_NAMES = [
- ("Classification", TASK_LIST_CLASSIFICATION, ["en", "en-en"]),
- ("Clustering", TASK_LIST_CLUSTERING, ["en", "en-en"]),
- ("PairClassification", TASK_LIST_PAIR_CLASSIFICATION, ["en", "en-en"]),
- ("Reranking", TASK_LIST_RERANKING, ["en", "en-en"]),
- ("Retrieval", TASK_LIST_RETRIEVAL, ["en", "en-en"]),
- ("STS", TASK_LIST_STS, ["en", "en-en"]),
- ("all", TASK_LIST, ["en", "en-en"]),
- ("BitextMining", TASK_LIST_BITEXT, []),
-]
-
-BITEXT_MODELS = MULTILING_MODELS = [
- # "glove.6B.300d",
- # "komninos",
- "LASER2",
- "LaBSE",
- "paraphrase-multilingual-MiniLM-L12-v2",
- "paraphrase-multilingual-mpnet-base-v2",
- "sgpt-bloom-7b1-msmarco",
- # "sgpt-bloom-1b3-nli",
-]
-
-
-SELFSUPERVISED_MODELS = [
- "glove.6B.300d",
- "komninos",
- "bert-base-uncased",
- "unsup-simcse-bert-base-uncased",
-]
-
-SUPERVISED_MODELS = [
- "sup-simcse-bert-base-uncased",
- "msmarco-bert-co-condensor",
- "contriever-base-msmarco",
- "allenai-specter",
- "LaBSE",
- "LASER2",
- "all-MiniLM-L6-v2",
- "all-MiniLM-L12-v2",
- "paraphrase-multilingual-MiniLM-L12-v2",
- "all-mpnet-base-v2",
- "paraphrase-multilingual-mpnet-base-v2",
- "text-similarity-ada-001",
- "SGPT-125M-weightedmean-nli-bitfit",
- "SGPT-5.8B-weightedmean-nli-bitfit",
- "SGPT-125M-weightedmean-msmarco-specb-bitfit",
- "SGPT-1.3B-weightedmean-msmarco-specb-bitfit",
- "SGPT-2.7B-weightedmean-msmarco-specb-bitfit",
- "SGPT-5.8B-weightedmean-msmarco-specb-bitfit",
- "sgpt-bloom-7b1-msmarco",
- "gtr-t5-base", # 110M
- "gtr-t5-large",
- "gtr-t5-xl",
- "gtr-t5-xxl", # 4.8B
- "sentence-t5-base", # 110M
- "sentence-t5-large",
- "sentence-t5-xl",
- "sentence-t5-xxl", # 4.8B
-]
-
-
-MODEL_TO_NAME = {
- "bert-base-uncased": "BERT",
- "gtr-t5-base": "GTR-Base",
- "gtr-t5-large": "GTR-Large",
- "gtr-t5-xl": "GTR-XL",
- "gtr-t5-xxl": "GTR-XXL",
- "sentence-t5-base": "ST5-Base",
- "sentence-t5-large": "ST5-Large",
- "sentence-t5-xl": "ST5-XL",
- "sentence-t5-xxl": "ST5-XXL",
- "SGPT-125M-weightedmean-msmarco-specb-bitfit": "SGPT-125M-msmarco",
- "SGPT-1.3B-weightedmean-msmarco-specb-bitfit": "SGPT-1.3B-msmarco",
- "SGPT-2.7B-weightedmean-msmarco-specb-bitfit": "SGPT-2.7B-msmarco",
- "SGPT-5.8B-weightedmean-msmarco-specb-bitfit": "SGPT-5.8B-msmarco",
- "sgpt-bloom-7b1-msmarco": "SGPT-BLOOM-7.1B-msmarco",
- "SGPT-125M-weightedmean-nli-bitfit": "SGPT-125M-nli",
- "SGPT-5.8B-weightedmean-nli-bitfit": "SGPT-5.8B-nli",
- "sup-simcse-bert-base-uncased": "SimCSE-BERT-sup",
- "contriever-base-msmarco": "Contriever",
- "msmarco-bert-co-condensor": "coCondenser-msmarco", # They write it as coCondenser in the paper
- "unsup-simcse-bert-base-uncased": "SimCSE-BERT-unsup",
- "glove.6B.300d": "Glove",
- "komninos": "Komninos",
- "all-MiniLM-L6-v2": "MiniLM-L6",
- "all-MiniLM-L12-v2": "MiniLM-L12",
- "paraphrase-multilingual-MiniLM-L12-v2": "MiniLM-L12-multilingual",
- "all-mpnet-base-v2": "MPNet",
- "paraphrase-multilingual-mpnet-base-v2": "MPNet-multilingual",
- "allenai-specter": "SPECTER",
- "text-similarity-ada-001": "Ada Similarity",
- "text-search-ada-query-001": "Ada Search Query"
-}
-
-
-
-### LOGIC ###
-
-results_folder = sys.argv[1].strip("/")
-
-all_results = {}
-
-for model_name in os.listdir(results_folder):
- model_res_folder = os.path.join(results_folder, model_name)
- if os.path.isdir(model_res_folder):
- all_results.setdefault(model_name, {})
- for file_name in os.listdir(model_res_folder):
- if not file_name.endswith(".json"):
- print(f"Skipping non-json {file_name}")
- continue
- with open(os.path.join(model_res_folder, file_name), "r", encoding="utf-8") as f:
- results = json.load(f)
- all_results[model_name] = {**all_results[model_name], **{file_name.replace(".json", ""): results}}
-
-
-def get_rows(dataset, model_name, limit_langs=[], skip_langs=[]):
- rows = []
- # CQADupstackRetrieval uses the same metric as its subsets
- tasks = MTEB(tasks=[dataset.replace("CQADupstackRetrieval", "CQADupstackTexRetrieval")]).tasks
- assert len(tasks) == 1, f"Found {len(tasks)} for {dataset}. Expected 1."
- main_metric = tasks[0].description["main_score"]
- test_result = all_results.get(model_name, {}). get(dataset, {})
-
- # Dev / Val set is used for MSMARCO (See BEIR paper)
- if "MSMARCO" in dataset:
- test_result = (
- test_result.get("dev") if "dev" in test_result else test_result.get("validation")
- )
- else:
- test_result = test_result.get("test")
-
- for lang in tasks[0].description["eval_langs"]:
- if (limit_langs and lang not in limit_langs) or (skip_langs and lang in skip_langs):
- continue
- elif test_result is None:
- rows.append([lang, main_metric, None])
- continue
-
- test_result_lang = test_result.get(lang, test_result)
- if main_metric == "cosine_spearman":
- test_result_lang = test_result_lang.get("cos_sim", {}).get("spearman")
- elif main_metric == "ap":
- test_result_lang = test_result_lang.get("cos_sim", {}).get("ap")
- else:
- test_result_lang = test_result_lang.get(main_metric)
-
- if test_result_lang is None:
- rows.append([lang, main_metric, None])
- continue
-
- rows.append([lang, main_metric, test_result_lang])
- return rows
-
-
-def get_table(models, task_list, limit_langs=[], skip_langs=[], name="table"):
- TABLE = "Dataset & Language & " + " & ".join([MODEL_TO_NAME.get(model, model) for model in models]) + " \\\\" + "\n"
- scores_all = []
- for ds in task_list:
- results = [get_rows(dataset=ds, model_name=model, limit_langs=limit_langs, skip_langs=skip_langs) for model in models]
- assert all(len(sub) == len(results[0]) for sub in results)
- for lang_idx in range(len(results[0])):
- scores = [x[lang_idx][-1] for x in results]
- scores_all.append(scores)
- lang = results[0][lang_idx][0]
- one_line = " & ".join([ds, lang] + [str(round(x*100, 2)) if x is not None else "" for x in scores])
- TABLE += one_line + " \\\\" + "\n"
-
- arr = np.array(scores_all, dtype=np.float32)
- # Get an index of columns which has any NaN value
- index = np.isnan(arr).any(axis=0)
- # Delete columns (models) with any NaN value from 2D NumPy Array
- arr = np.delete(arr, index, axis=1)
- # Average
- scores_avg = list(np.mean(arr, axis=0))
- # Insert empty string for NaN columns
- for i, val in enumerate(index):
- if val == True:
- scores_avg.insert(i, "")
- lang = "mix" if not(limit_langs) else limit_langs[0]
- TABLE += " & ".join(["Average", lang] + [str(round(x*100, 2)) if x else "" for x in scores_avg]) + " \\\\" + "\n"
-
- with open(f"{name}.txt", "w") as f:
- f.write(TABLE)
-
-
-get_table(SELFSUPERVISED_MODELS + SUPERVISED_MODELS, TASK_LIST_EN, limit_langs=["en", "en-en",], name="all_en")
-get_table(BITEXT_MODELS, TASK_LIST_BITEXT, limit_langs=[], name="bitext")
-get_table(MULTILING_MODELS, TASK_LIST_CLASSIFICATION, limit_langs=[], skip_langs=["en", "en-en", "en-ext"], name="multilingclf")
-get_table(MULTILING_MODELS, TASK_LIST_STS, limit_langs=[], skip_langs=["en", "en-en", "en-ext"], name="multilingsts")
-
diff --git a/plotstables/scale.pdf b/plotstables/scale.pdf
deleted file mode 100644
index 0ce7ea0f..00000000
Binary files a/plotstables/scale.pdf and /dev/null differ
diff --git a/plotstables/scale.png b/plotstables/scale.png
deleted file mode 100644
index 170dc4dd..00000000
Binary files a/plotstables/scale.png and /dev/null differ
diff --git a/plotstables/sim_data.csv b/plotstables/sim_data.csv
deleted file mode 100644
index 13f03085..00000000
--- a/plotstables/sim_data.csv
+++ /dev/null
@@ -1,68 +0,0 @@
-,AmazonCounterfactualClassification,AmazonPolarityClassification,AmazonReviewsClassification,Banking77Classification,EmotionClassification,ImdbClassification,MassiveIntentClassification,MassiveScenarioClassification,MTOPDomainClassification,MTOPIntentClassification,ToxicConversationsClassification,TweetSentimentExtractionClassification,ArxivClusteringP2P,ArxivClusteringS2S,BiorxivClusteringP2P,BiorxivClusteringS2S,MedrxivClusteringP2P,MedrxivClusteringS2S,RedditClustering,RedditClusteringP2P,StackExchangeClustering,StackExchangeClusteringP2P,TwentyNewsgroupsClustering,SprintDuplicateQuestions,TwitterSemEval2015,TwitterURLCorpus,AskUbuntuDupQuestions,MindSmallReranking,SciDocsRR,StackOverflowDupQuestions,ArguAna,ClimateFEVER,CQADupstackAndroidRetrieval,CQADupstackEnglishRetrieval,CQADupstackGamingRetrieval,CQADupstackGisRetrieval,CQADupstackMathematicaRetrieval,CQADupstackPhysicsRetrieval,CQADupstackProgrammersRetrieval,CQADupstackStatsRetrieval,CQADupstackTexRetrieval,CQADupstackUnixRetrieval,CQADupstackWebmastersRetrieval,CQADupstackWordpressRetrieval,DBPedia,FEVER,FiQA2018,HotpotQA,MSMARCO,NFCorpus,NQ,QuoraRetrieval,SCIDOCS,SciFact,Touche2020,TRECCOVID,BIOSSES,SICK-R,STS12,STS13,STS14,STS15,STS16,STS17,STS22,STSBenchmark,SummEval
-AmazonCounterfactualClassification,0.9999998807907104,0.9725366830825806,0.8517286777496338,0.901214599609375,0.8986697196960449,0.9143805503845215,0.915182888507843,0.915182888507843,0.9136463403701782,0.9136463403701782,0.9268499612808228,0.9379464387893677,0.9061737060546875,0.9226172566413879,0.8816869854927063,0.9111778140068054,0.8716050982475281,0.8851615786552429,0.9361067414283752,0.9444894790649414,0.9205214381217957,0.8694825768470764,0.927936851978302,0.7448422908782959,0.8848876357078552,0.9181852340698242,0.8771646618843079,0.8432382345199585,0.9121214151382446,0.8809256553649902,0.9206087589263916,0.8687456250190735,0.8753382563591003,0.9054211974143982,0.9050220847129822,0.8575048446655273,0.8769376277923584,0.8788719773292542,0.8834834098815918,0.8732110261917114,0.8720847964286804,0.8778753876686096,0.8800479173660278,0.8742043972015381,0.8960760831832886,0.8687456250190735,0.9194890260696411,0.8983748555183411,0.9003165364265442,0.8930765986442566,0.9119835495948792,0.9182446002960205,0.889691174030304,0.892997145652771,0.9215371608734131,0.8833832144737244,0.8625791668891907,0.8299649357795715,0.9155313968658447,0.9190161824226379,0.9440998435020447,0.9286789298057556,0.9331569671630859,0.908374547958374,0.8873857855796814,0.933322012424469,0.9260546565055847
-AmazonPolarityClassification,0.9725366830825806,1.0000001192092896,0.8430818319320679,0.8914044499397278,0.8869525194168091,0.9380159974098206,0.9221842288970947,0.9221842288970947,0.9159311652183533,0.9159311652183533,0.9314469695091248,0.9369900822639465,0.9092541337013245,0.9261904954910278,0.8834359645843506,0.9139003753662109,0.8740972876548767,0.8870366811752319,0.9363611340522766,0.9457828998565674,0.9190340638160706,0.8677751421928406,0.9307129383087158,0.7418009638786316,0.8866813778877258,0.9222720861434937,0.8745728135108948,0.8573817014694214,0.9169661998748779,0.8784961700439453,0.911296546459198,0.8777851462364197,0.8737571239471436,0.9069114327430725,0.9039636254310608,0.8530020713806152,0.874477207660675,0.8754789233207703,0.8846838474273682,0.870514988899231,0.8698432445526123,0.8746495246887207,0.8826687932014465,0.8705847859382629,0.9033768773078918,0.8777851462364197,0.91923987865448,0.9041053652763367,0.9071121215820312,0.8932070136070251,0.9180147051811218,0.922761857509613,0.8992704749107361,0.8951342701911926,0.9281871914863586,0.8897658586502075,0.8574753999710083,0.8390056490898132,0.915518581867218,0.9148537516593933,0.93932044506073,0.9213941097259521,0.9270878434181213,0.904332160949707,0.8904899954795837,0.9300788640975952,0.924091637134552
-AmazonReviewsClassification,0.8517286777496338,0.8430818319320679,1.0,0.8274011611938477,0.8377322554588318,0.8060175180435181,0.8927789330482483,0.8927789330482483,0.8721326589584351,0.8721326589584351,0.8651610612869263,0.8857579231262207,0.8268718719482422,0.8719925284385681,0.8097570538520813,0.8545474410057068,0.8083184361457825,0.8327248692512512,0.8771207332611084,0.8643344044685364,0.8922589421272278,0.7934136986732483,0.8788571953773499,0.6903976202011108,0.829818844795227,0.8436835408210754,0.8549239039421082,0.7972355484962463,0.8631322979927063,0.8426644802093506,0.8367676734924316,0.8256171345710754,0.7958477735519409,0.8322862386703491,0.8206374049186707,0.7885207533836365,0.7998864650726318,0.8036501407623291,0.8091654181480408,0.7987720966339111,0.8001028299331665,0.8078638315200806,0.8033185601234436,0.7957671284675598,0.857245147228241,0.8256171345710754,0.8700592517852783,0.861984133720398,0.8672728538513184,0.8301711678504944,0.8646405339241028,0.8830270767211914,0.8478741645812988,0.831039309501648,0.8493306040763855,0.8365864753723145,0.797872006893158,0.8078639507293701,0.8909081220626831,0.8852756023406982,0.8971998691558838,0.8883463144302368,0.8896051645278931,0.8645649552345276,0.9054290056228638,0.8723177313804626,0.8509836196899414
-Banking77Classification,0.901214599609375,0.8914044499397278,0.8274011611938477,1.0000001192092896,0.8723608255386353,0.8470839262008667,0.9069743156433105,0.9069743156433105,0.9206961393356323,0.9206961393356323,0.8975388407707214,0.9144454002380371,0.8722931742668152,0.896972119808197,0.8474944233894348,0.8749076724052429,0.8394017219543457,0.8544522523880005,0.9231041669845581,0.925590991973877,0.9119146466255188,0.8638715744018555,0.9062290787696838,0.7831484079360962,0.8524944186210632,0.8785852193832397,0.8910298943519592,0.8121806979179382,0.8894634246826172,0.8714233040809631,0.868624746799469,0.8610327243804932,0.891751766204834,0.8907471895217896,0.9007715582847595,0.8569079041481018,0.8734966516494751,0.866807758808136,0.8690007328987122,0.8641847968101501,0.860200822353363,0.8753891587257385,0.8771096467971802,0.8702175617218018,0.8732821941375732,0.8610327243804932,0.9026527404785156,0.8878933191299438,0.9106489419937134,0.8546255826950073,0.8853597640991211,0.9191234707832336,0.85566246509552,0.8536851406097412,0.8839954733848572,0.8441707491874695,0.8158388733863831,0.8224376440048218,0.9016413688659668,0.8994392156600952,0.9188550710678101,0.903643786907196,0.9121543169021606,0.8826841711997986,0.8817184567451477,0.9049835801124573,0.8948301672935486
-EmotionClassification,0.8986697196960449,0.8869525194168091,0.8377322554588318,0.8723608255386353,0.9999998807907104,0.8484991192817688,0.8875608444213867,0.8875608444213867,0.8829092979431152,0.8829092979431152,0.8891539573669434,0.9219067692756653,0.8317292332649231,0.8686325550079346,0.8193415999412537,0.8483346104621887,0.8102065920829773,0.8282783031463623,0.9020230770111084,0.9169633984565735,0.8777654767036438,0.8238407969474792,0.8720272779464722,0.7159684896469116,0.8473405838012695,0.8711422085762024,0.8389461040496826,0.8027688264846802,0.8528178930282593,0.8284692168235779,0.8480105400085449,0.8250551819801331,0.8178394436836243,0.8577408790588379,0.847434937953949,0.7987120747566223,0.8172357678413391,0.8205375075340271,0.8239988684654236,0.8105478286743164,0.8049406409263611,0.8208318948745728,0.8235769867897034,0.8167813420295715,0.8425841331481934,0.8250551819801331,0.8733782172203064,0.8516587615013123,0.8590372204780579,0.840248167514801,0.8614929914474487,0.8839017748832703,0.8242060542106628,0.8306145071983337,0.8763502836227417,0.8282589912414551,0.7966073751449585,0.8122089505195618,0.8802585005760193,0.8810560703277588,0.9074888229370117,0.8856732845306396,0.9061300754547119,0.8690868020057678,0.8543643951416016,0.8943052887916565,0.8744396567344666
-ImdbClassification,0.9143805503845215,0.9380159974098206,0.8060175180435181,0.8470839262008667,0.8484991192817688,0.9999999403953552,0.8814461827278137,0.8814461827278137,0.8795197010040283,0.8795197010040283,0.8966728448867798,0.8972986340522766,0.8641327023506165,0.8861985206604004,0.8488188982009888,0.8755455613136292,0.8384004235267639,0.8527776598930359,0.8933579325675964,0.9051699042320251,0.8781872391700745,0.8293363451957703,0.8849860429763794,0.6948404908180237,0.8515918254852295,0.8860101699829102,0.8357095718383789,0.8353019952774048,0.877151370048523,0.8337087631225586,0.8854134678840637,0.839382529258728,0.8364291787147522,0.8700346946716309,0.8654309511184692,0.8066450357437134,0.8283482193946838,0.8346224427223206,0.8459446430206299,0.8239307999610901,0.8178578019142151,0.8369230031967163,0.8369060158729553,0.825982391834259,0.8585302829742432,0.839382529258728,0.8759719133377075,0.8769184350967407,0.8686230182647705,0.8685956597328186,0.8820010423660278,0.8861343860626221,0.8516706228256226,0.8627360463142395,0.8969246745109558,0.8567239046096802,0.8268139362335205,0.8363828063011169,0.8844971656799316,0.8787712454795837,0.9043587446212769,0.8874390721321106,0.8882074952125549,0.8814995884895325,0.8508477807044983,0.9050480723381042,0.9031330347061157
-MassiveIntentClassification,0.915182888507843,0.9221842288970947,0.8927789330482483,0.9069743156433105,0.8875608444213867,0.8814461827278137,0.9999998807907104,0.9999998807907104,0.9750701189041138,0.9750701189041138,0.9595458507537842,0.9734818339347839,0.8967283368110657,0.9652600884437561,0.8725947141647339,0.9290119409561157,0.8716907501220703,0.9026644825935364,0.9533762335777283,0.9527404308319092,0.9526662826538086,0.9006844758987427,0.9625997543334961,0.7721297740936279,0.9069065451622009,0.9225729703903198,0.921728789806366,0.8853965401649475,0.9450154900550842,0.9193835258483887,0.9042443037033081,0.9111378788948059,0.8964847922325134,0.9208599328994751,0.9270387291908264,0.8749427199363708,0.8865760564804077,0.8897450566291809,0.8953050374984741,0.8811706304550171,0.8818027973175049,0.8984538316726685,0.897138774394989,0.8913282155990601,0.9272512793540955,0.9111378788948059,0.9463034868240356,0.9470813274383545,0.9404415488243103,0.8946558833122253,0.9522876143455505,0.9682824611663818,0.9068668484687805,0.8946090340614319,0.9345963597297668,0.8986713886260986,0.8533274531364441,0.8902477025985718,0.9739812016487122,0.9600082635879517,0.9729264974594116,0.9575598835945129,0.9471473097801208,0.9507492780685425,0.9367305040359497,0.9577158689498901,0.9361376762390137
-MassiveScenarioClassification,0.915182888507843,0.9221842288970947,0.8927789330482483,0.9069743156433105,0.8875608444213867,0.8814461827278137,0.9999998807907104,0.9999998807907104,0.9750701189041138,0.9750701189041138,0.9595458507537842,0.9734818339347839,0.8967283368110657,0.9652600884437561,0.8725947141647339,0.9290119409561157,0.8716907501220703,0.9026644825935364,0.9533762335777283,0.9527404308319092,0.9526662826538086,0.9006844758987427,0.9625997543334961,0.7721297740936279,0.9069065451622009,0.9225729703903198,0.921728789806366,0.8853965401649475,0.9450154900550842,0.9193835258483887,0.9042443037033081,0.9111378788948059,0.8964847922325134,0.9208599328994751,0.9270387291908264,0.8749427199363708,0.8865760564804077,0.8897450566291809,0.8953050374984741,0.8811706304550171,0.8818027973175049,0.8984538316726685,0.897138774394989,0.8913282155990601,0.9272512793540955,0.9111378788948059,0.9463034868240356,0.9470813274383545,0.9404415488243103,0.8946558833122253,0.9522876143455505,0.9682824611663818,0.9068668484687805,0.8946090340614319,0.9345963597297668,0.8986713886260986,0.8533274531364441,0.8902477025985718,0.9739812016487122,0.9600082635879517,0.9729264974594116,0.9575598835945129,0.9471473097801208,0.9507492780685425,0.9367305040359497,0.9577158689498901,0.9361376762390137
-MTOPDomainClassification,0.9136463403701782,0.9159311652183533,0.8721326589584351,0.9206961393356323,0.8829092979431152,0.8795197010040283,0.9750701189041138,0.9750701189041138,1.000000238418579,1.000000238418579,0.9477394223213196,0.9620302319526672,0.8916090726852417,0.9507513046264648,0.8692935109138489,0.9198922514915466,0.8673161864280701,0.8943023681640625,0.9499332308769226,0.9520930647850037,0.9415560960769653,0.8926861882209778,0.9499171376228333,0.7859740853309631,0.9024993777275085,0.9156029224395752,0.9076282382011414,0.8778815865516663,0.9327757358551025,0.9088919162750244,0.8972400426864624,0.8968261480331421,0.900844395160675,0.917822539806366,0.922167956829071,0.8686424493789673,0.8871181607246399,0.8848662972450256,0.8901574015617371,0.8792017698287964,0.8758128881454468,0.892038881778717,0.8904502391815186,0.8847219944000244,0.9200598001480103,0.8968261480331421,0.9321253299713135,0.9370198249816895,0.9469432234764099,0.8838071823120117,0.9368411898612976,0.956068754196167,0.8929500579833984,0.8861535787582397,0.9225219488143921,0.8865776658058167,0.8508256077766418,0.8823556900024414,0.9523103833198547,0.9431838989257812,0.960503101348877,0.9454217553138733,0.9361295700073242,0.9454723000526428,0.918997585773468,0.9517570734024048,0.932615339756012
-MTOPIntentClassification,0.9136463403701782,0.9159311652183533,0.8721326589584351,0.9206961393356323,0.8829092979431152,0.8795197010040283,0.9750701189041138,0.9750701189041138,1.000000238418579,1.000000238418579,0.9477394223213196,0.9620302319526672,0.8916090726852417,0.9507513046264648,0.8692935109138489,0.9198922514915466,0.8673161864280701,0.8943023681640625,0.9499332308769226,0.9520930647850037,0.9415560960769653,0.8926861882209778,0.9499171376228333,0.7859740853309631,0.9024993777275085,0.9156029224395752,0.9076282382011414,0.8778815865516663,0.9327757358551025,0.9088919162750244,0.8972400426864624,0.8968261480331421,0.900844395160675,0.917822539806366,0.922167956829071,0.8686424493789673,0.8871181607246399,0.8848662972450256,0.8901574015617371,0.8792017698287964,0.8758128881454468,0.892038881778717,0.8904502391815186,0.8847219944000244,0.9200598001480103,0.8968261480331421,0.9321253299713135,0.9370198249816895,0.9469432234764099,0.8838071823120117,0.9368411898612976,0.956068754196167,0.8929500579833984,0.8861535787582397,0.9225219488143921,0.8865776658058167,0.8508256077766418,0.8823556900024414,0.9523103833198547,0.9431838989257812,0.960503101348877,0.9454217553138733,0.9361295700073242,0.9454723000526428,0.918997585773468,0.9517570734024048,0.932615339756012
-ToxicConversationsClassification,0.9268499612808228,0.9314469695091248,0.8651610612869263,0.8975388407707214,0.8891539573669434,0.8966728448867798,0.9595458507537842,0.9595458507537842,0.9477394223213196,0.9477394223213196,1.0,0.9791338443756104,0.8923237323760986,0.9557365775108337,0.8709883689880371,0.9188441038131714,0.871146023273468,0.8937941193580627,0.9509134292602539,0.9578840732574463,0.939154326915741,0.8850978016853333,0.9539644122123718,0.7365511655807495,0.9154583811759949,0.9275434613227844,0.8940021991729736,0.88185054063797,0.9295150637626648,0.8942537307739258,0.9117645025253296,0.8989847898483276,0.8807247281074524,0.9173315763473511,0.9148895144462585,0.8601332306861877,0.8703606724739075,0.8878904581069946,0.8843247294425964,0.8718209862709045,0.8644421100616455,0.8797895312309265,0.8873119354248047,0.8789352178573608,0.9204174280166626,0.8989847898483276,0.9462225437164307,0.9388979077339172,0.9315915107727051,0.8950788378715515,0.9423636198043823,0.9756679534912109,0.8898159265518188,0.8889317512512207,0.9373823404312134,0.8933861255645752,0.8529271483421326,0.8919810056686401,0.9546875357627869,0.9443492889404297,0.968707799911499,0.9534297585487366,0.9462233185768127,0.9519502520561218,0.9268797636032104,0.9642019867897034,0.9413120746612549
-TweetSentimentExtractionClassification,0.9379464387893677,0.9369900822639465,0.8857579231262207,0.9144454002380371,0.9219067692756653,0.8972986340522766,0.9734818339347839,0.9734818339347839,0.9620302319526672,0.9620302319526672,0.9791338443756104,0.9999998807907104,0.8941646814346313,0.9579667448997498,0.8712595701217651,0.9201945066452026,0.8699737787246704,0.89603590965271,0.9627991318702698,0.9685115814208984,0.9449712634086609,0.8842088580131531,0.957473874092102,0.7514926791191101,0.9206107258796692,0.9304682612419128,0.9066123366355896,0.8845974802970886,0.9322013854980469,0.902407705783844,0.9012815952301025,0.9066250324249268,0.8804208636283875,0.9191873669624329,0.9144876003265381,0.8603336215019226,0.8727058172225952,0.8835071921348572,0.8789454698562622,0.8683345913887024,0.8654515743255615,0.8802311420440674,0.8835926055908203,0.8761081099510193,0.9267283082008362,0.9066250324249268,0.9446496963500977,0.9437984228134155,0.9354615807533264,0.8943512439727783,0.9478681683540344,0.9679496884346008,0.8895792961120605,0.8876237869262695,0.9310782551765442,0.8951513767242432,0.8504247069358826,0.8916918635368347,0.9640517830848694,0.952302873134613,0.9752659201622009,0.960588812828064,0.9546923041343689,0.9575697183609009,0.9292759299278259,0.9668652415275574,0.9422594308853149
-ArxivClusteringP2P,0.9061737060546875,0.9092541337013245,0.8268718719482422,0.8722931742668152,0.8317292332649231,0.8641327023506165,0.8967283368110657,0.8967283368110657,0.8916090726852417,0.8916090726852417,0.8923237323760986,0.8941646814346313,1.0000001192092896,0.9296634197235107,0.9502661228179932,0.9453719854354858,0.9198501706123352,0.9335336685180664,0.9047669172286987,0.9180348515510559,0.9226759076118469,0.8873042464256287,0.9214056134223938,0.7285578846931458,0.849682629108429,0.8937832117080688,0.8850733637809753,0.8498433828353882,0.942658007144928,0.8952110409736633,0.9187605381011963,0.8781493902206421,0.8702689409255981,0.9049964547157288,0.8932080864906311,0.8815957903862,0.9128819108009338,0.9258798360824585,0.9027954936027527,0.916979193687439,0.9049314856529236,0.8934173583984375,0.8883233666419983,0.8882037997245789,0.9017259478569031,0.8781493902206421,0.9082144498825073,0.9008392095565796,0.8916330933570862,0.9167720675468445,0.910786509513855,0.9078540205955505,0.9503505229949951,0.9326635599136353,0.9119592308998108,0.9338494539260864,0.9067458510398865,0.8065285086631775,0.8979881405830383,0.912732720375061,0.9206886291503906,0.9100639820098877,0.9006866812705994,0.8714514374732971,0.8883957266807556,0.8937532901763916,0.9103951454162598
-ArxivClusteringS2S,0.9226172566413879,0.9261904954910278,0.8719925284385681,0.896972119808197,0.8686325550079346,0.8861985206604004,0.9652600884437561,0.9652600884437561,0.9507513046264648,0.9507513046264648,0.9557365775108337,0.9579667448997498,0.9296634197235107,1.0000001192092896,0.8961361050605774,0.9565531611442566,0.8940218091011047,0.9300719499588013,0.9504541754722595,0.9541244506835938,0.9593493342399597,0.9132787585258484,0.9759801030158997,0.7547072768211365,0.9124619960784912,0.9184166789054871,0.9116992354393005,0.8834333419799805,0.9691587686538696,0.9220894575119019,0.9125237464904785,0.9121813178062439,0.8931977152824402,0.9257218837738037,0.9272482991218567,0.888041615486145,0.9053133130073547,0.9213353395462036,0.9087139964103699,0.9069965481758118,0.8965097069740295,0.9025301933288574,0.9069535732269287,0.8937448859214783,0.9494740962982178,0.9121813178062439,0.947498619556427,0.956941545009613,0.9396511316299438,0.9095065593719482,0.9639177918434143,0.9652320742607117,0.9295390248298645,0.9176734685897827,0.9311457872390747,0.9181896448135376,0.8817151784896851,0.8841482996940613,0.9600266218185425,0.9491159915924072,0.9618067741394043,0.9499640464782715,0.9319314360618591,0.942611038684845,0.933143138885498,0.949581503868103,0.9360114932060242
-BiorxivClusteringP2P,0.8816869854927063,0.8834359645843506,0.8097570538520813,0.8474944233894348,0.8193415999412537,0.8488188982009888,0.8725947141647339,0.8725947141647339,0.8692935109138489,0.8692935109138489,0.8709883689880371,0.8712595701217651,0.9502661228179932,0.8961361050605774,0.9999998211860657,0.9384621381759644,0.956779956817627,0.9311316013336182,0.8793036341667175,0.8974093794822693,0.8921758532524109,0.8562670350074768,0.8946423530578613,0.7087208032608032,0.8287373781204224,0.8686756491661072,0.8585236072540283,0.8195667266845703,0.9144576191902161,0.8649075031280518,0.9078131318092346,0.8496026396751404,0.8520771265029907,0.8786687850952148,0.8723767995834351,0.8544682264328003,0.8758416771888733,0.8830129504203796,0.8758763670921326,0.8930484056472778,0.8660279512405396,0.8672094345092773,0.8678568005561829,0.8641262650489807,0.8874456882476807,0.8496026396751404,0.8788062334060669,0.8783058524131775,0.8714765310287476,0.9372479319572449,0.8799590468406677,0.8842073082923889,0.9247989654541016,0.9538105130195618,0.8883203864097595,0.942850649356842,0.9347233772277832,0.7822513580322266,0.8725536465644836,0.8873900175094604,0.8985730409622192,0.8854580521583557,0.8810415267944336,0.8527474999427795,0.8675625324249268,0.8723320960998535,0.8993033170700073
-BiorxivClusteringS2S,0.9111778140068054,0.9139003753662109,0.8545474410057068,0.8749076724052429,0.8483346104621887,0.8755455613136292,0.9290119409561157,0.9290119409561157,0.9198922514915466,0.9198922514915466,0.9188441038131714,0.9201945066452026,0.9453719854354858,0.9565531611442566,0.9384621381759644,0.9999999403953552,0.9279764294624329,0.9682707190513611,0.9273788332939148,0.9292674660682678,0.9397923350334167,0.9037898182868958,0.9507690668106079,0.7521305084228516,0.8774093389511108,0.9051491618156433,0.898175835609436,0.8788741827011108,0.9745520353317261,0.9197582602500916,0.9147593975067139,0.8964840173721313,0.8843629360198975,0.9122307896614075,0.9139900803565979,0.8944020867347717,0.913551926612854,0.9188753366470337,0.911128044128418,0.923565149307251,0.8992398977279663,0.9039843082427979,0.9041688442230225,0.8961097002029419,0.9335931539535522,0.8964840173721313,0.9220610857009888,0.9312097430229187,0.9096160531044006,0.9406470060348511,0.9374184012413025,0.930925726890564,0.9657474756240845,0.9617788791656494,0.915169358253479,0.957700252532959,0.9307554960250854,0.844598650932312,0.9283256530761719,0.932389497756958,0.9405863881111145,0.9268279671669006,0.9138600826263428,0.9094533324241638,0.9075512886047363,0.9221967458724976,0.921167254447937
-MedrxivClusteringP2P,0.8716050982475281,0.8740972876548767,0.8083184361457825,0.8394017219543457,0.8102065920829773,0.8384004235267639,0.8716907501220703,0.8716907501220703,0.8673161864280701,0.8673161864280701,0.871146023273468,0.8699737787246704,0.9198501706123352,0.8940218091011047,0.956779956817627,0.9279764294624329,0.9999998807907104,0.9557642936706543,0.8812946081161499,0.8938334584236145,0.894402801990509,0.8485972285270691,0.9003008604049683,0.7113694548606873,0.8280568718910217,0.87420654296875,0.8530390858650208,0.8247097730636597,0.9096218347549438,0.8582576513290405,0.9103243947029114,0.8590527176856995,0.8470963835716248,0.8777068257331848,0.8736319541931152,0.8565325140953064,0.8669453859329224,0.870018720626831,0.8732233643531799,0.9020135402679443,0.8579853177070618,0.8595551252365112,0.8668990731239319,0.8557989597320557,0.8835738897323608,0.8590527176856995,0.8795516490936279,0.8794113397598267,0.8779413104057312,0.9487459659576416,0.8801149129867554,0.8837840557098389,0.9216246008872986,0.9501500725746155,0.888805091381073,0.9613710641860962,0.9220839142799377,0.7837725281715393,0.8748796582221985,0.8852849006652832,0.899524986743927,0.8803415298461914,0.8795533776283264,0.853192925453186,0.8731187582015991,0.8726744651794434,0.9043846130371094
-MedrxivClusteringS2S,0.8851615786552429,0.8870366811752319,0.8327248692512512,0.8544522523880005,0.8282783031463623,0.8527776598930359,0.9026644825935364,0.9026644825935364,0.8943023681640625,0.8943023681640625,0.8937941193580627,0.89603590965271,0.9335336685180664,0.9300719499588013,0.9311316013336182,0.9682707190513611,0.9557642936706543,0.9999999403953552,0.9053035378456116,0.9091455340385437,0.9207715392112732,0.8806784749031067,0.9318292140960693,0.7369350790977478,0.8536560535430908,0.8926279544830322,0.8838703632354736,0.8627454042434692,0.953004002571106,0.8977246284484863,0.9049397110939026,0.884668231010437,0.8621524572372437,0.886515200138092,0.8903034329414368,0.8818444013595581,0.8924626708030701,0.8929807543754578,0.8892922401428223,0.9127675890922546,0.8801870942115784,0.8826228976249695,0.8879578709602356,0.8784561157226562,0.9113901853561401,0.884668231010437,0.9016450047492981,0.9183295369148254,0.8957593441009521,0.9393429756164551,0.9159761667251587,0.9054452776908875,0.9564609527587891,0.9464058876037598,0.896754264831543,0.9670438766479492,0.9164301753044128,0.8197333216667175,0.908024787902832,0.9073441624641418,0.9192919135093689,0.90470290184021,0.8936593532562256,0.8826584219932556,0.895809531211853,0.8965899348258972,0.9074312448501587
-RedditClustering,0.9361067414283752,0.9363611340522766,0.8771207332611084,0.9231041669845581,0.9020230770111084,0.8933579325675964,0.9533762335777283,0.9533762335777283,0.9499332308769226,0.9499332308769226,0.9509134292602539,0.9627991318702698,0.9047669172286987,0.9504541754722595,0.8793036341667175,0.9273788332939148,0.8812946081161499,0.9053035378456116,1.0,0.9636914730072021,0.9537168145179749,0.8936053514480591,0.9547697305679321,0.771812379360199,0.9131519198417664,0.9342699646949768,0.902423083782196,0.8822137117385864,0.935879111289978,0.9017000794410706,0.9085865616798401,0.9140297770500183,0.8902676105499268,0.928565263748169,0.9274815917015076,0.8728464841842651,0.8922913074493408,0.8964251279830933,0.8974623084068298,0.8893818259239197,0.8851460814476013,0.8965474367141724,0.8993308544158936,0.8881715536117554,0.9286068081855774,0.9140297770500183,0.9438943862915039,0.9391739964485168,0.9369815587997437,0.9039009809494019,0.9450920224189758,0.959994375705719,0.9040453433990479,0.8978421688079834,0.9354337453842163,0.89899742603302,0.8582507967948914,0.8789204955101013,0.9507595896720886,0.9454846978187561,0.964044988155365,0.9484242796897888,0.9516837000846863,0.9390854835510254,0.9249169826507568,0.9586203694343567,0.9392504096031189
-RedditClusteringP2P,0.9444894790649414,0.9457828998565674,0.8643344044685364,0.925590991973877,0.9169633984565735,0.9051699042320251,0.9527404308319092,0.9527404308319092,0.9520930647850037,0.9520930647850037,0.9578840732574463,0.9685115814208984,0.9180348515510559,0.9541244506835938,0.8974093794822693,0.9292674660682678,0.8938334584236145,0.9091455340385437,0.9636914730072021,1.0000001192092896,0.9489050507545471,0.9091170430183411,0.9504086375236511,0.7593399882316589,0.9181907773017883,0.9331505298614502,0.9009460210800171,0.8788705468177795,0.9390465617179871,0.902940571308136,0.9276478886604309,0.8984732627868652,0.9011673927307129,0.9325352311134338,0.9371761083602905,0.8817288279533386,0.8999888896942139,0.9109740257263184,0.9104718565940857,0.901867687702179,0.8874367475509644,0.9019376039505005,0.9086974263191223,0.8998266458511353,0.92087322473526,0.8984732627868652,0.9436620473861694,0.931955099105835,0.9319586157798767,0.9122331738471985,0.9413416981697083,0.9608737826347351,0.9078938364982605,0.9097794890403748,0.9485433101654053,0.9055797457695007,0.8730185031890869,0.8813309073448181,0.9435673952102661,0.9429996013641357,0.96527099609375,0.9469503164291382,0.9470174312591553,0.9395745992660522,0.9250169992446899,0.9560426473617554,0.9494283199310303
-StackExchangeClustering,0.9205214381217957,0.9190340638160706,0.8922589421272278,0.9119146466255188,0.8777654767036438,0.8781872391700745,0.9526662826538086,0.9526662826538086,0.9415560960769653,0.9415560960769653,0.939154326915741,0.9449712634086609,0.9226759076118469,0.9593493342399597,0.8921758532524109,0.9397923350334167,0.894402801990509,0.9207715392112732,0.9537168145179749,0.9489050507545471,0.9999999403953552,0.9163016676902771,0.9632638096809387,0.7653152346611023,0.8863519430160522,0.9227038025856018,0.920473575592041,0.8657011389732361,0.9519332051277161,0.9323137402534485,0.9170532822608948,0.9021029472351074,0.9093854427337646,0.9599207639694214,0.9356912970542908,0.9072699546813965,0.9264189600944519,0.9299076795578003,0.9368252158164978,0.9241059422492981,0.9255838990211487,0.9274458289146423,0.9290803074836731,0.9175654649734497,0.9289190769195557,0.9021029472351074,0.9526970386505127,0.9371076226234436,0.934260904788971,0.9154017567634583,0.9463439583778381,0.967335045337677,0.9253087043762207,0.9159756898880005,0.9310891032218933,0.9119280576705933,0.876883327960968,0.8612088561058044,0.9496986269950867,0.9500508308410645,0.9619574546813965,0.9489233493804932,0.9536707401275635,0.9245097041130066,0.9384429454803467,0.9446290731430054,0.9257869124412537
-StackExchangeClusteringP2P,0.8694825768470764,0.8677751421928406,0.7934136986732483,0.8638715744018555,0.8238407969474792,0.8293363451957703,0.9006844758987427,0.9006844758987427,0.8926861882209778,0.8926861882209778,0.8850978016853333,0.8842088580131531,0.8873042464256287,0.9132787585258484,0.8562670350074768,0.9037898182868958,0.8485972285270691,0.8806784749031067,0.8936053514480591,0.9091170430183411,0.9163016676902771,0.9999999403953552,0.9050739407539368,0.7441107630729675,0.8436607122421265,0.8557848334312439,0.8838653564453125,0.8219260573387146,0.9194225668907166,0.918297290802002,0.8716145753860474,0.8466858863830566,0.9168010950088501,0.9102783799171448,0.9504308104515076,0.9322190880775452,0.939926028251648,0.9218308925628662,0.9459747672080994,0.9272336959838867,0.9189807772636414,0.9332571029663086,0.9323667287826538,0.9188860058784485,0.865909218788147,0.8466858863830566,0.9015153050422668,0.8845800161361694,0.8738407492637634,0.865176260471344,0.8900197744369507,0.9089018702507019,0.899509608745575,0.8790194392204285,0.876009464263916,0.8632569313049316,0.8407849073410034,0.8076320886611938,0.8901703953742981,0.8968315720558167,0.9036004543304443,0.8906463384628296,0.8955281376838684,0.8716039061546326,0.8718999028205872,0.8934082984924316,0.87904953956604
-TwentyNewsgroupsClustering,0.927936851978302,0.9307129383087158,0.8788571953773499,0.9062290787696838,0.8720272779464722,0.8849860429763794,0.9625997543334961,0.9625997543334961,0.9499171376228333,0.9499171376228333,0.9539644122123718,0.957473874092102,0.9214056134223938,0.9759801030158997,0.8946423530578613,0.9507690668106079,0.9003008604049683,0.9318292140960693,0.9547697305679321,0.9504086375236511,0.9632638096809387,0.9050739407539368,1.0000001192092896,0.7655467987060547,0.9043189883232117,0.9265701770782471,0.9229085445404053,0.8874714374542236,0.962175726890564,0.9225630164146423,0.9156261682510376,0.9241911172866821,0.8967640995979309,0.9317466616630554,0.9298926591873169,0.888175904750824,0.901106059551239,0.9096159934997559,0.9153035879135132,0.9074497222900391,0.8974940180778503,0.9126835465431213,0.9110020995140076,0.8972079753875732,0.9430718421936035,0.9241911172866821,0.9498501420021057,0.9545242190361023,0.943306565284729,0.9156935811042786,0.9650863409042358,0.9623216986656189,0.9296791553497314,0.9186417460441589,0.9339285492897034,0.9249359369277954,0.8814013004302979,0.879280149936676,0.9567474722862244,0.9498928785324097,0.9652311205863953,0.9521862268447876,0.9399745464324951,0.9386094212532043,0.9347309470176697,0.9504886865615845,0.9389493465423584
-SprintDuplicateQuestions,0.7448422908782959,0.7418009638786316,0.6903976202011108,0.7831484079360962,0.7159684896469116,0.6948404908180237,0.7721297740936279,0.7721297740936279,0.7859740853309631,0.7859740853309631,0.7365511655807495,0.7514926791191101,0.7285578846931458,0.7547072768211365,0.7087208032608032,0.7521305084228516,0.7113694548606873,0.7369350790977478,0.771812379360199,0.7593399882316589,0.7653152346611023,0.7441107630729675,0.7655467987060547,1.0000001192092896,0.7108128070831299,0.7352480292320251,0.7720503807067871,0.6729485392570496,0.7625035047531128,0.7492141723632812,0.7167574167251587,0.7223891615867615,0.7940370440483093,0.7370885610580444,0.7504194378852844,0.7442988157272339,0.7660002708435059,0.7272855639457703,0.7467317581176758,0.7388317584991455,0.7514438033103943,0.7605757713317871,0.7364763021469116,0.7487673163414001,0.7269722819328308,0.7223891615867615,0.7529628872871399,0.7362186312675476,0.7674270272254944,0.7220373749732971,0.7399367094039917,0.7632731795310974,0.740402340888977,0.7349553108215332,0.7392125129699707,0.7177295684814453,0.712036669254303,0.6801100373268127,0.7566519975662231,0.7461093068122864,0.7562764286994934,0.74090975522995,0.7517213821411133,0.7253054976463318,0.7493941187858582,0.7513360977172852,0.7284443974494934
-TwitterSemEval2015,0.8848876357078552,0.8866813778877258,0.829818844795227,0.8524944186210632,0.8473405838012695,0.8515918254852295,0.9069065451622009,0.9069065451622009,0.9024993777275085,0.9024993777275085,0.9154583811759949,0.9206107258796692,0.849682629108429,0.9124619960784912,0.8287373781204224,0.8774093389511108,0.8280568718910217,0.8536560535430908,0.9131519198417664,0.9181907773017883,0.8863519430160522,0.8436607122421265,0.9043189883232117,0.7108128070831299,1.0,0.8765551447868347,0.8490984439849854,0.8362776637077332,0.8876252174377441,0.8500367999076843,0.8637065887451172,0.8542394042015076,0.8377655744552612,0.8568944334983826,0.8744352459907532,0.8095758557319641,0.8217236399650574,0.8345741033554077,0.8340392112731934,0.8295093774795532,0.8197506666183472,0.8345680236816406,0.8336871266365051,0.8268483281135559,0.8812722563743591,0.8542394042015076,0.8879868388175964,0.8985650539398193,0.8808208107948303,0.8472391366958618,0.8976626992225647,0.9045380353927612,0.8541281223297119,0.8465986251831055,0.8941338062286377,0.8519248962402344,0.8185410499572754,0.8878549337387085,0.9094604253768921,0.8995444178581238,0.9149953126907349,0.9032992720603943,0.8763034343719482,0.9204296469688416,0.8792893886566162,0.9244780540466309,0.920987069606781
-TwitterURLCorpus,0.9181852340698242,0.9222720861434937,0.8436835408210754,0.8785852193832397,0.8711422085762024,0.8860101699829102,0.9225729703903198,0.9225729703903198,0.9156029224395752,0.9156029224395752,0.9275434613227844,0.9304682612419128,0.8937832117080688,0.9184166789054871,0.8686756491661072,0.9051491618156433,0.87420654296875,0.8926279544830322,0.9342699646949768,0.9331505298614502,0.9227038025856018,0.8557848334312439,0.9265701770782471,0.7352480292320251,0.8765551447868347,0.9999999403953552,0.8714816570281982,0.8772052526473999,0.9152761697769165,0.8724372982978821,0.9144514799118042,0.8842594623565674,0.8573910593986511,0.9045571088790894,0.8917332291603088,0.8425986766815186,0.8575678467750549,0.8722454309463501,0.8732454776763916,0.8640527129173279,0.8527172803878784,0.8612546324729919,0.87152498960495,0.8667789697647095,0.8917800784111023,0.8842594623565674,0.9199483394622803,0.9096399545669556,0.9038676619529724,0.8938794136047363,0.9211317300796509,0.9303578734397888,0.8930266499519348,0.8879072666168213,0.9263610243797302,0.8916494846343994,0.8485643863677979,0.8542071580886841,0.9259677529335022,0.9181943535804749,0.9424399137496948,0.9249746799468994,0.9261443614959717,0.9087364673614502,0.9015825986862183,0.9283378720283508,0.926887035369873
-AskUbuntuDupQuestions,0.8771646618843079,0.8745728135108948,0.8549239039421082,0.8910298943519592,0.8389461040496826,0.8357095718383789,0.921728789806366,0.921728789806366,0.9076282382011414,0.9076282382011414,0.8940021991729736,0.9066123366355896,0.8850733637809753,0.9116992354393005,0.8585236072540283,0.898175835609436,0.8530390858650208,0.8838703632354736,0.902423083782196,0.9009460210800171,0.920473575592041,0.8838653564453125,0.9229085445404053,0.7720503807067871,0.8490984439849854,0.8714816570281982,0.9999999403953552,0.8271192312240601,0.9121510982513428,0.9237239956855774,0.874069333076477,0.8626706600189209,0.9023654460906982,0.8748583793640137,0.8971635103225708,0.8783930540084839,0.8864251375198364,0.8661698698997498,0.8830022215843201,0.8678272366523743,0.880608856678009,0.93036949634552,0.8850497603416443,0.8874524235725403,0.8765844106674194,0.8626706600189209,0.9054697155952454,0.8964870572090149,0.888830304145813,0.8649072647094727,0.8988544940948486,0.9126819968223572,0.8830411434173584,0.864525556564331,0.8846127390861511,0.8762941360473633,0.8330298662185669,0.8284757733345032,0.914065420627594,0.9060060381889343,0.9224478602409363,0.9142858386039734,0.9095703363418579,0.8892186880111694,0.8974651098251343,0.901841938495636,0.8834519982337952
-MindSmallReranking,0.8432382345199585,0.8573817014694214,0.7972355484962463,0.8121806979179382,0.8027688264846802,0.8353019952774048,0.8853965401649475,0.8853965401649475,0.8778815865516663,0.8778815865516663,0.88185054063797,0.8845974802970886,0.8498433828353882,0.8834333419799805,0.8195667266845703,0.8788741827011108,0.8247097730636597,0.8627454042434692,0.8822137117385864,0.8788705468177795,0.8657011389732361,0.8219260573387146,0.8874714374542236,0.6729485392570496,0.8362776637077332,0.8772052526473999,0.8271192312240601,0.9999998807907104,0.8903162479400635,0.8392180800437927,0.8564772009849548,0.8417803645133972,0.8068230152130127,0.8346021175384521,0.8371538519859314,0.7987034916877747,0.8081290125846863,0.8156949281692505,0.8110246062278748,0.8064138293266296,0.8085086941719055,0.8037967681884766,0.8270236253738403,0.810706615447998,0.8491460084915161,0.8417803645133972,0.8699422478675842,0.8755338788032532,0.8483620285987854,0.8392335772514343,0.8892091512680054,0.8765057325363159,0.8636199235916138,0.8338578343391418,0.8729779720306396,0.8616792559623718,0.802665114402771,0.8343176245689392,0.873137354850769,0.8696905374526978,0.889416515827179,0.875885009765625,0.8529878854751587,0.8793433308601379,0.8625198602676392,0.8832800984382629,0.8852307796478271
-SciDocsRR,0.9121214151382446,0.9169661998748779,0.8631322979927063,0.8894634246826172,0.8528178930282593,0.877151370048523,0.9450154900550842,0.9450154900550842,0.9327757358551025,0.9327757358551025,0.9295150637626648,0.9322013854980469,0.942658007144928,0.9691587686538696,0.9144576191902161,0.9745520353317261,0.9096218347549438,0.953004002571106,0.935879111289978,0.9390465617179871,0.9519332051277161,0.9194225668907166,0.962175726890564,0.7625035047531128,0.8876252174377441,0.9152761697769165,0.9121510982513428,0.8903162479400635,0.9999999403953552,0.9336423277854919,0.9188738465309143,0.8973691463470459,0.8964974880218506,0.9219396710395813,0.9212622046470642,0.9038124680519104,0.9215748310089111,0.9176410436630249,0.9275285005569458,0.9317771792411804,0.9046880602836609,0.9125511646270752,0.9187279343605042,0.907027006149292,0.932131826877594,0.8973691463470459,0.9359960556030273,0.9427277445793152,0.9200493097305298,0.918515145778656,0.94994056224823,0.943608820438385,0.9736051559448242,0.9353674650192261,0.9235967397689819,0.9372914433479309,0.8973731994628906,0.8585872650146484,0.9384756684303284,0.9367377161979675,0.9479785561561584,0.9355614185333252,0.9222978949546814,0.9234777092933655,0.9208407998085022,0.9328756928443909,0.9246395826339722
-StackOverflowDupQuestions,0.8809256553649902,0.8784961700439453,0.8426644802093506,0.8714233040809631,0.8284692168235779,0.8337087631225586,0.9193835258483887,0.9193835258483887,0.9088919162750244,0.9088919162750244,0.8942537307739258,0.902407705783844,0.8952110409736633,0.9220894575119019,0.8649075031280518,0.9197582602500916,0.8582576513290405,0.8977246284484863,0.9017000794410706,0.902940571308136,0.9323137402534485,0.918297290802002,0.9225630164146423,0.7492141723632812,0.8500367999076843,0.8724372982978821,0.9237239956855774,0.8392180800437927,0.9336423277854919,1.0,0.8813484311103821,0.862632155418396,0.8961885571479797,0.8927103877067566,0.9056882858276367,0.9124735593795776,0.9236457943916321,0.8843101859092712,0.9159318804740906,0.9021614193916321,0.9091837406158447,0.9131677150726318,0.9141953587532043,0.9168320894241333,0.8864024877548218,0.862632155418396,0.9124810099601746,0.9039829969406128,0.8921213150024414,0.8695310950279236,0.9073175191879272,0.9127946496009827,0.9117957949638367,0.8792681694030762,0.8867591619491577,0.8783340454101562,0.8480639457702637,0.82960045337677,0.9093490242958069,0.9070543050765991,0.9182902574539185,0.9101777076721191,0.9040267467498779,0.8930013179779053,0.8990249037742615,0.9047304391860962,0.8860877752304077
-ArguAna,0.9206087589263916,0.911296546459198,0.8367676734924316,0.868624746799469,0.8480105400085449,0.8854134678840637,0.9042443037033081,0.9042443037033081,0.8972400426864624,0.8972400426864624,0.9117645025253296,0.9012815952301025,0.9187605381011963,0.9125237464904785,0.9078131318092346,0.9147593975067139,0.9103243947029114,0.9049397110939026,0.9085865616798401,0.9276478886604309,0.9170532822608948,0.8716145753860474,0.9156261682510376,0.7167574167251587,0.8637065887451172,0.9144514799118042,0.874069333076477,0.8564772009849548,0.9188738465309143,0.8813484311103821,1.0,0.868762731552124,0.8723280429840088,0.9121270179748535,0.900233805179596,0.8606018424034119,0.8785495758056641,0.8957704305648804,0.9052194356918335,0.8878010511398315,0.8700098991394043,0.8799136877059937,0.8878820538520813,0.8797154426574707,0.8886457085609436,0.868762731552124,0.920708417892456,0.8948706984519958,0.8937736749649048,0.9279579520225525,0.9106296896934509,0.9145981669425964,0.9124789237976074,0.9248058795928955,0.9558223485946655,0.9113011956214905,0.8957175016403198,0.8236397504806519,0.914850115776062,0.9224849343299866,0.9421572685241699,0.9227895736694336,0.9244217872619629,0.8932615518569946,0.8931587934494019,0.9195448756217957,0.9351081848144531
-ClimateFEVER,0.8687456250190735,0.8777851462364197,0.8256171345710754,0.8610327243804932,0.8250551819801331,0.839382529258728,0.9111378788948059,0.9111378788948059,0.8968261480331421,0.8968261480331421,0.8989847898483276,0.9066250324249268,0.8781493902206421,0.9121813178062439,0.8496026396751404,0.8964840173721313,0.8590527176856995,0.884668231010437,0.9140297770500183,0.8984732627868652,0.9021029472351074,0.8466858863830566,0.9241911172866821,0.7223891615867615,0.8542394042015076,0.8842594623565674,0.8626706600189209,0.8417803645133972,0.8973691463470459,0.862632155418396,0.868762731552124,1.0000001192092896,0.8460091352462769,0.877137303352356,0.8844062089920044,0.8418534994125366,0.8496251106262207,0.8617082834243774,0.8505048155784607,0.8545312881469727,0.8441368937492371,0.8518548607826233,0.8488019108772278,0.8423066735267639,0.9115601778030396,1.0000001192092896,0.8865174651145935,0.9264897108078003,0.9099120497703552,0.8735563158988953,0.9296450614929199,0.9116727113723755,0.8757598400115967,0.872587263584137,0.8834298253059387,0.8787876963615417,0.8280983567237854,0.8355134129524231,0.9138962626457214,0.9049146771430969,0.9181155562400818,0.9026291370391846,0.8828028440475464,0.8914071917533875,0.8870561718940735,0.8974177241325378,0.9090380072593689
-CQADupstackAndroidRetrieval,0.8753382563591003,0.8737571239471436,0.7958477735519409,0.891751766204834,0.8178394436836243,0.8364291787147522,0.8964847922325134,0.8964847922325134,0.900844395160675,0.900844395160675,0.8807247281074524,0.8804208636283875,0.8702689409255981,0.8931977152824402,0.8520771265029907,0.8843629360198975,0.8470963835716248,0.8621524572372437,0.8902676105499268,0.9011673927307129,0.9093854427337646,0.9168010950088501,0.8967640995979309,0.7940370440483093,0.8377655744552612,0.8573910593986511,0.9023654460906982,0.8068230152130127,0.8964974880218506,0.8961885571479797,0.8723280429840088,0.8460091352462769,1.0000001192092896,0.9084342122077942,0.9370324015617371,0.9095420837402344,0.9166147112846375,0.9050965905189514,0.9206304550170898,0.9030284285545349,0.8972444534301758,0.9373556971549988,0.9290414452552795,0.9209067821502686,0.8635401725769043,0.8460091352462769,0.8912644982337952,0.8783244490623474,0.8861086964607239,0.8625217080116272,0.8842442631721497,0.9079585671424866,0.8729583621025085,0.8696499466896057,0.8786553740501404,0.8585020303726196,0.8368518948554993,0.8128756284713745,0.8859356641769409,0.8857338428497314,0.9007682204246521,0.8860815763473511,0.8972189426422119,0.8669255375862122,0.8668273091316223,0.8936954140663147,0.882964015007019
-CQADupstackEnglishRetrieval,0.9054211974143982,0.9069114327430725,0.8322862386703491,0.8907471895217896,0.8577408790588379,0.8700346946716309,0.9208599328994751,0.9208599328994751,0.917822539806366,0.917822539806366,0.9173315763473511,0.9191873669624329,0.9049964547157288,0.9257218837738037,0.8786687850952148,0.9122307896614075,0.8777068257331848,0.886515200138092,0.928565263748169,0.9325352311134338,0.9599207639694214,0.9102783799171448,0.9317466616630554,0.7370885610580444,0.8568944334983826,0.9045571088790894,0.8748583793640137,0.8346021175384521,0.9219396710395813,0.8927103877067566,0.9121270179748535,0.877137303352356,0.9084342122077942,1.0,0.9400819540023804,0.9009470343589783,0.9214543104171753,0.9414180517196655,0.9488763213157654,0.9316865801811218,0.9194899201393127,0.9315387010574341,0.9308363199234009,0.917028546333313,0.9058995842933655,0.877137303352356,0.9196183085441589,0.9052009582519531,0.9050914645195007,0.9017252922058105,0.9156660437583923,0.9440099596977234,0.9012807607650757,0.9038532376289368,0.9157229065895081,0.8888267874717712,0.8651591539382935,0.8234075903892517,0.924311637878418,0.9411693215370178,0.943746030330658,0.9186076521873474,0.9344898462295532,0.8920850157737732,0.8855765461921692,0.9203011989593506,0.9117947220802307
-CQADupstackGamingRetrieval,0.9050220847129822,0.9039636254310608,0.8206374049186707,0.9007715582847595,0.847434937953949,0.8654309511184692,0.9270387291908264,0.9270387291908264,0.922167956829071,0.922167956829071,0.9148895144462585,0.9144876003265381,0.8932080864906311,0.9272482991218567,0.8723767995834351,0.9139900803565979,0.8736319541931152,0.8903034329414368,0.9274815917015076,0.9371761083602905,0.9356912970542908,0.9504308104515076,0.9298926591873169,0.7504194378852844,0.8744352459907532,0.8917332291603088,0.8971635103225708,0.8371538519859314,0.9212622046470642,0.9056882858276367,0.900233805179596,0.8844062089920044,0.9370324015617371,0.9400819540023804,0.9999998807907104,0.9216479063034058,0.9330098628997803,0.9342014789581299,0.9393653869628906,0.9243125319480896,0.9149504899978638,0.940504789352417,0.9380438923835754,0.9254974722862244,0.8909529447555542,0.8844062089920044,0.9199414849281311,0.9049500823020935,0.9040770530700684,0.889488935470581,0.9149694442749023,0.9382474422454834,0.8936588168144226,0.8950048685073853,0.9083070158958435,0.882118821144104,0.858853280544281,0.8333576321601868,0.9167808890342712,0.9232589602470398,0.9353626370429993,0.9158080816268921,0.9224990010261536,0.9030084609985352,0.8916919231414795,0.9242655038833618,0.9156969785690308
-CQADupstackGisRetrieval,0.8575048446655273,0.8530020713806152,0.7885207533836365,0.8569079041481018,0.7987120747566223,0.8066450357437134,0.8749427199363708,0.8749427199363708,0.8686424493789673,0.8686424493789673,0.8601332306861877,0.8603336215019226,0.8815957903862,0.888041615486145,0.8544682264328003,0.8944020867347717,0.8565325140953064,0.8818444013595581,0.8728464841842651,0.8817288279533386,0.9072699546813965,0.9322190880775452,0.888175904750824,0.7442988157272339,0.8095758557319641,0.8425986766815186,0.8783930540084839,0.7987034916877747,0.9038124680519104,0.9124735593795776,0.8606018424034119,0.8418534994125366,0.9095420837402344,0.9009470343589783,0.9216479063034058,0.9999998807907104,0.9448469281196594,0.903142511844635,0.9301380515098572,0.9305801391601562,0.9221766591072083,0.9310668110847473,0.9317100048065186,0.918504536151886,0.8589087724685669,0.8418534994125366,0.8800798058509827,0.8755719661712646,0.8662577867507935,0.854289710521698,0.8728598952293396,0.8888506889343262,0.8968186974525452,0.8719730377197266,0.8571727871894836,0.8625155687332153,0.8343672752380371,0.7701677680015564,0.8710922002792358,0.8752908706665039,0.8871220350265503,0.8685832023620605,0.8740650415420532,0.8476601839065552,0.8595708608627319,0.8642983436584473,0.8621535301208496
-CQADupstackMathematicaRetrieval,0.8769376277923584,0.874477207660675,0.7998864650726318,0.8734966516494751,0.8172357678413391,0.8283482193946838,0.8865760564804077,0.8865760564804077,0.8871181607246399,0.8871181607246399,0.8703606724739075,0.8727058172225952,0.9128819108009338,0.9053133130073547,0.8758416771888733,0.913551926612854,0.8669453859329224,0.8924626708030701,0.8922913074493408,0.8999888896942139,0.9264189600944519,0.939926028251648,0.901106059551239,0.7660002708435059,0.8217236399650574,0.8575678467750549,0.8864251375198364,0.8081290125846863,0.9215748310089111,0.9236457943916321,0.8785495758056641,0.8496251106262207,0.9166147112846375,0.9214543104171753,0.9330098628997803,0.9448469281196594,1.0,0.9277931451797485,0.9406094551086426,0.9559803009033203,0.9574756026268005,0.9464672207832336,0.9315505027770996,0.9326183199882507,0.8738807439804077,0.8496251106262207,0.8993229269981384,0.8777073621749878,0.8827200531959534,0.8768882155418396,0.8850938081741333,0.9020552039146423,0.9163784980773926,0.8997095227241516,0.8748701810836792,0.8762643337249756,0.8645408153533936,0.7892798185348511,0.8838201761245728,0.889945387840271,0.8978254199028015,0.8780002593994141,0.8911046385765076,0.8559651970863342,0.8719625473022461,0.8818624019622803,0.8755237460136414
-CQADupstackPhysicsRetrieval,0.8788719773292542,0.8754789233207703,0.8036501407623291,0.866807758808136,0.8205375075340271,0.8346224427223206,0.8897450566291809,0.8897450566291809,0.8848662972450256,0.8848662972450256,0.8878904581069946,0.8835071921348572,0.9258798360824585,0.9213353395462036,0.8830129504203796,0.9188753366470337,0.870018720626831,0.8929807543754578,0.8964251279830933,0.9109740257263184,0.9299076795578003,0.9218308925628662,0.9096159934997559,0.7272855639457703,0.8345741033554077,0.8722454309463501,0.8661698698997498,0.8156949281692505,0.9176410436630249,0.8843101859092712,0.8957704305648804,0.8617082834243774,0.9050965905189514,0.9414180517196655,0.9342014789581299,0.903142511844635,0.9277931451797485,0.9999999403953552,0.9381371736526489,0.9415716528892517,0.91279536485672,0.9261923432350159,0.9212412238121033,0.9054916501045227,0.8781377077102661,0.8617082834243774,0.8984333276748657,0.8824734091758728,0.8800548315048218,0.8915558457374573,0.8959563970565796,0.922303318977356,0.9024848937988281,0.9060347676277161,0.8913036584854126,0.8834498524665833,0.8713915348052979,0.8070156574249268,0.892284631729126,0.9096480011940002,0.9132447838783264,0.903823971748352,0.899546205997467,0.8698975443840027,0.869685173034668,0.8923594355583191,0.8838998079299927
-CQADupstackProgrammersRetrieval,0.8834834098815918,0.8846838474273682,0.8091654181480408,0.8690007328987122,0.8239988684654236,0.8459446430206299,0.8953050374984741,0.8953050374984741,0.8901574015617371,0.8901574015617371,0.8843247294425964,0.8789454698562622,0.9027954936027527,0.9087139964103699,0.8758763670921326,0.911128044128418,0.8732233643531799,0.8892922401428223,0.8974623084068298,0.9104718565940857,0.9368252158164978,0.9459747672080994,0.9153035879135132,0.7467317581176758,0.8340392112731934,0.8732454776763916,0.8830022215843201,0.8110246062278748,0.9275285005569458,0.9159318804740906,0.9052194356918335,0.8505048155784607,0.9206304550170898,0.9488763213157654,0.9393653869628906,0.9301380515098572,0.9406094551086426,0.9381371736526489,1.0000001192092896,0.9467190504074097,0.9259558320045471,0.9530035257339478,0.9550228714942932,0.9337886571884155,0.8744832277297974,0.8505048155784607,0.9110053181648254,0.8822770714759827,0.8804248571395874,0.8899571895599365,0.8902618885040283,0.9181737303733826,0.9190900325775146,0.9017314910888672,0.8959858417510986,0.8842517137527466,0.8579108119010925,0.8032951951026917,0.8920109868049622,0.9053783416748047,0.9127124547958374,0.89433753490448,0.9096778035163879,0.8691129088401794,0.8748772144317627,0.8964371681213379,0.8863435387611389
-CQADupstackStatsRetrieval,0.8732110261917114,0.870514988899231,0.7987720966339111,0.8641847968101501,0.8105478286743164,0.8239307999610901,0.8811706304550171,0.8811706304550171,0.8792017698287964,0.8792017698287964,0.8718209862709045,0.8683345913887024,0.916979193687439,0.9069965481758118,0.8930484056472778,0.923565149307251,0.9020135402679443,0.9127675890922546,0.8893818259239197,0.901867687702179,0.9241059422492981,0.9272336959838867,0.9074497222900391,0.7388317584991455,0.8295093774795532,0.8640527129173279,0.8678272366523743,0.8064138293266296,0.9317771792411804,0.9021614193916321,0.8878010511398315,0.8545312881469727,0.9030284285545349,0.9316865801811218,0.9243125319480896,0.9305801391601562,0.9559803009033203,0.9415716528892517,0.9467190504074097,1.0000001192092896,0.9336675405502319,0.9365205764770508,0.930739164352417,0.9157169461250305,0.8788878917694092,0.8545312881469727,0.897214412689209,0.8763482570648193,0.8824243545532227,0.895236074924469,0.8826169371604919,0.9051744341850281,0.9343346357345581,0.9186573028564453,0.8812727332115173,0.8962944149971008,0.8794823884963989,0.7893253564834595,0.8797302842140198,0.8905830383300781,0.8992865085601807,0.878086507320404,0.8895502090454102,0.8537505865097046,0.8672860264778137,0.879453182220459,0.8792035579681396
-CQADupstackTexRetrieval,0.8720847964286804,0.8698432445526123,0.8001028299331665,0.860200822353363,0.8049406409263611,0.8178578019142151,0.8818027973175049,0.8818027973175049,0.8758128881454468,0.8758128881454468,0.8644421100616455,0.8654515743255615,0.9049314856529236,0.8965097069740295,0.8660279512405396,0.8992398977279663,0.8579853177070618,0.8801870942115784,0.8851460814476013,0.8874367475509644,0.9255838990211487,0.9189807772636414,0.8974940180778503,0.7514438033103943,0.8197506666183472,0.8527172803878784,0.880608856678009,0.8085086941719055,0.9046880602836609,0.9091837406158447,0.8700098991394043,0.8441368937492371,0.8972444534301758,0.9194899201393127,0.9149504899978638,0.9221766591072083,0.9574756026268005,0.91279536485672,0.9259558320045471,0.9336675405502319,1.0,0.9387102723121643,0.9309241771697998,0.9312326312065125,0.8696514964103699,0.8441368937492371,0.8951022624969482,0.8696152567863464,0.8712650537490845,0.8649015426635742,0.8828864097595215,0.8967395424842834,0.8998434543609619,0.8823222517967224,0.876192569732666,0.874040961265564,0.8481307625770569,0.7818848490715027,0.8804553747177124,0.8856875896453857,0.8912297487258911,0.8708868026733398,0.885320246219635,0.843300998210907,0.8693545460700989,0.8727013468742371,0.8655053973197937
-CQADupstackUnixRetrieval,0.8778753876686096,0.8746495246887207,0.8078638315200806,0.8753891587257385,0.8208318948745728,0.8369230031967163,0.8984538316726685,0.8984538316726685,0.892038881778717,0.892038881778717,0.8797895312309265,0.8802311420440674,0.8934173583984375,0.9025301933288574,0.8672094345092773,0.9039843082427979,0.8595551252365112,0.8826228976249695,0.8965474367141724,0.9019376039505005,0.9274458289146423,0.9332571029663086,0.9126835465431213,0.7605757713317871,0.8345680236816406,0.8612546324729919,0.93036949634552,0.8037967681884766,0.9125511646270752,0.9131677150726318,0.8799136877059937,0.8518548607826233,0.9373556971549988,0.9315387010574341,0.940504789352417,0.9310668110847473,0.9464672207832336,0.9261923432350159,0.9530035257339478,0.9365205764770508,0.9387102723121643,0.9999998211860657,0.9425334334373474,0.9361576437950134,0.8726716041564941,0.8518548607826233,0.9011945724487305,0.8829306364059448,0.8788676857948303,0.8737623691558838,0.8897589445114136,0.9121820330619812,0.8979055881500244,0.8855134844779968,0.8814226984977722,0.8759041428565979,0.8541761040687561,0.8000640273094177,0.8939105272293091,0.8999499082565308,0.9092024564743042,0.8901516199111938,0.9038639068603516,0.8625014424324036,0.8721633553504944,0.8905507326126099,0.873734712600708
-CQADupstackWebmastersRetrieval,0.8800479173660278,0.8826687932014465,0.8033185601234436,0.8771096467971802,0.8235769867897034,0.8369060158729553,0.897138774394989,0.897138774394989,0.8904502391815186,0.8904502391815186,0.8873119354248047,0.8835926055908203,0.8883233666419983,0.9069535732269287,0.8678568005561829,0.9041688442230225,0.8668990731239319,0.8879578709602356,0.8993308544158936,0.9086974263191223,0.9290803074836731,0.9323667287826538,0.9110020995140076,0.7364763021469116,0.8336871266365051,0.87152498960495,0.8850497603416443,0.8270236253738403,0.9187279343605042,0.9141953587532043,0.8878820538520813,0.8488019108772278,0.9290414452552795,0.9308363199234009,0.9380438923835754,0.9317100048065186,0.9315505027770996,0.9212412238121033,0.9550228714942932,0.930739164352417,0.9309241771697998,0.9425334334373474,0.9999998807907104,0.9648351073265076,0.8810744285583496,0.8488019108772278,0.9122025966644287,0.8880552053451538,0.8899712562561035,0.8789516091346741,0.8950729966163635,0.9161275029182434,0.9036228060722351,0.8879486918449402,0.888551652431488,0.8810858726501465,0.8505773544311523,0.7996203303337097,0.8922863602638245,0.89543616771698,0.9074068665504456,0.8887396454811096,0.9075014591217041,0.8670238256454468,0.8819707036018372,0.8923331499099731,0.8873686790466309
-CQADupstackWordpressRetrieval,0.8742043972015381,0.8705847859382629,0.7957671284675598,0.8702175617218018,0.8167813420295715,0.825982391834259,0.8913282155990601,0.8913282155990601,0.8847219944000244,0.8847219944000244,0.8789352178573608,0.8761081099510193,0.8882037997245789,0.8937448859214783,0.8641262650489807,0.8961097002029419,0.8557989597320557,0.8784561157226562,0.8881715536117554,0.8998266458511353,0.9175654649734497,0.9188860058784485,0.8972079753875732,0.7487673163414001,0.8268483281135559,0.8667789697647095,0.8874524235725403,0.810706615447998,0.907027006149292,0.9168320894241333,0.8797154426574707,0.8423066735267639,0.9209067821502686,0.917028546333313,0.9254974722862244,0.918504536151886,0.9326183199882507,0.9054916501045227,0.9337886571884155,0.9157169461250305,0.9312326312065125,0.9361576437950134,0.9648351073265076,1.0000001192092896,0.8684195876121521,0.8423066735267639,0.9004048705101013,0.8746203780174255,0.8731535077095032,0.8685926198959351,0.8860312104225159,0.9065831899642944,0.8904937505722046,0.8772619366645813,0.8826008439064026,0.8709307909011841,0.8430542945861816,0.793935239315033,0.8826149106025696,0.8886222243309021,0.8997173309326172,0.8829312920570374,0.8971396088600159,0.8602017760276794,0.8727341890335083,0.886046290397644,0.8786991834640503
-DBPedia,0.8960760831832886,0.9033768773078918,0.857245147228241,0.8732821941375732,0.8425841331481934,0.8585302829742432,0.9272512793540955,0.9272512793540955,0.9200598001480103,0.9200598001480103,0.9204174280166626,0.9267283082008362,0.9017259478569031,0.9494740962982178,0.8874456882476807,0.9335931539535522,0.8835738897323608,0.9113901853561401,0.9286068081855774,0.92087322473526,0.9289190769195557,0.865909218788147,0.9430718421936035,0.7269722819328308,0.8812722563743591,0.8917800784111023,0.8765844106674194,0.8491460084915161,0.932131826877594,0.8864024877548218,0.8886457085609436,0.9115601778030396,0.8635401725769043,0.9058995842933655,0.8909529447555542,0.8589087724685669,0.8738807439804077,0.8781377077102661,0.8744832277297974,0.8788878917694092,0.8696514964103699,0.8726716041564941,0.8810744285583496,0.8684195876121521,1.0000001192092896,0.9115601778030396,0.9128028154373169,0.9553118348121643,0.9282314777374268,0.9043858051300049,0.9467520117759705,0.9322096705436707,0.908551037311554,0.9075112342834473,0.9047530293464661,0.908862829208374,0.8717136979103088,0.84193354845047,0.9324358105659485,0.9196749925613403,0.9341995120048523,0.9155308604240417,0.9073795676231384,0.9020201563835144,0.9053149819374084,0.9130927324295044,0.9138416051864624
-FEVER,0.8687456250190735,0.8777851462364197,0.8256171345710754,0.8610327243804932,0.8250551819801331,0.839382529258728,0.9111378788948059,0.9111378788948059,0.8968261480331421,0.8968261480331421,0.8989847898483276,0.9066250324249268,0.8781493902206421,0.9121813178062439,0.8496026396751404,0.8964840173721313,0.8590527176856995,0.884668231010437,0.9140297770500183,0.8984732627868652,0.9021029472351074,0.8466858863830566,0.9241911172866821,0.7223891615867615,0.8542394042015076,0.8842594623565674,0.8626706600189209,0.8417803645133972,0.8973691463470459,0.862632155418396,0.868762731552124,1.0000001192092896,0.8460091352462769,0.877137303352356,0.8844062089920044,0.8418534994125366,0.8496251106262207,0.8617082834243774,0.8505048155784607,0.8545312881469727,0.8441368937492371,0.8518548607826233,0.8488019108772278,0.8423066735267639,0.9115601778030396,1.0000001192092896,0.8865174651145935,0.9264897108078003,0.9099120497703552,0.8735563158988953,0.9296450614929199,0.9116727113723755,0.8757598400115967,0.872587263584137,0.8834298253059387,0.8787876963615417,0.8280983567237854,0.8355134129524231,0.9138962626457214,0.9049146771430969,0.9181155562400818,0.9026291370391846,0.8828028440475464,0.8914071917533875,0.8870561718940735,0.8974177241325378,0.9090380072593689
-FiQA2018,0.9194890260696411,0.91923987865448,0.8700592517852783,0.9026527404785156,0.8733782172203064,0.8759719133377075,0.9463034868240356,0.9463034868240356,0.9321253299713135,0.9321253299713135,0.9462225437164307,0.9446496963500977,0.9082144498825073,0.947498619556427,0.8788062334060669,0.9220610857009888,0.8795516490936279,0.9016450047492981,0.9438943862915039,0.9436620473861694,0.9526970386505127,0.9015153050422668,0.9498501420021057,0.7529628872871399,0.8879868388175964,0.9199483394622803,0.9054697155952454,0.8699422478675842,0.9359960556030273,0.9124810099601746,0.920708417892456,0.8865174651145935,0.8912644982337952,0.9196183085441589,0.9199414849281311,0.8800798058509827,0.8993229269981384,0.8984333276748657,0.9110053181648254,0.897214412689209,0.8951022624969482,0.9011945724487305,0.9122025966644287,0.9004048705101013,0.9128028154373169,0.8865174651145935,1.0,0.9253550171852112,0.9273801445960999,0.8983956575393677,0.9334579706192017,0.9525591731071472,0.9080317616462708,0.8984257578849792,0.932041347026825,0.8989304900169373,0.8627818822860718,0.852931559085846,0.9391583204269409,0.9346805214881897,0.9548579454421997,0.9363479018211365,0.9465791583061218,0.9146230816841125,0.9739243984222412,0.9389200806617737,0.9216462969779968
-HotpotQA,0.8983748555183411,0.9041053652763367,0.861984133720398,0.8878933191299438,0.8516587615013123,0.8769184350967407,0.9470813274383545,0.9470813274383545,0.9370198249816895,0.9370198249816895,0.9388979077339172,0.9437984228134155,0.9008392095565796,0.956941545009613,0.8783058524131775,0.9312097430229187,0.8794113397598267,0.9183295369148254,0.9391739964485168,0.931955099105835,0.9371076226234436,0.8845800161361694,0.9545242190361023,0.7362186312675476,0.8985650539398193,0.9096399545669556,0.8964870572090149,0.8755338788032532,0.9427277445793152,0.9039829969406128,0.8948706984519958,0.9264897108078003,0.8783244490623474,0.9052009582519531,0.9049500823020935,0.8755719661712646,0.8777073621749878,0.8824734091758728,0.8822770714759827,0.8763482570648193,0.8696152567863464,0.8829306364059448,0.8880552053451538,0.8746203780174255,0.9553118348121643,0.9264897108078003,0.9253550171852112,0.9999999403953552,0.9338226914405823,0.8949758410453796,0.9652247428894043,0.9462618827819824,0.9095838069915771,0.8932918310165405,0.9121882319450378,0.9082680940628052,0.8506495952606201,0.8844336271286011,0.950919508934021,0.933860719203949,0.9507559537887573,0.9376870393753052,0.9176111817359924,0.9365106821060181,0.9216817021369934,0.9391409754753113,0.9349841475486755
-MSMARCO,0.9003165364265442,0.9071121215820312,0.8672728538513184,0.9106489419937134,0.8590372204780579,0.8686230182647705,0.9404415488243103,0.9404415488243103,0.9469432234764099,0.9469432234764099,0.9315915107727051,0.9354615807533264,0.8916330933570862,0.9396511316299438,0.8714765310287476,0.9096160531044006,0.8779413104057312,0.8957593441009521,0.9369815587997437,0.9319586157798767,0.934260904788971,0.8738407492637634,0.943306565284729,0.7674270272254944,0.8808208107948303,0.9038676619529724,0.888830304145813,0.8483620285987854,0.9200493097305298,0.8921213150024414,0.8937736749649048,0.9099120497703552,0.8861086964607239,0.9050914645195007,0.9040770530700684,0.8662577867507935,0.8827200531959534,0.8800548315048218,0.8804248571395874,0.8824243545532227,0.8712650537490845,0.8788676857948303,0.8899712562561035,0.8731535077095032,0.9282314777374268,0.9099120497703552,0.9273801445960999,0.9338226914405823,0.9999999403953552,0.8933760523796082,0.9218379259109497,0.9464716911315918,0.896026611328125,0.8949978947639465,0.9115307927131653,0.890006422996521,0.8536749482154846,0.8565343618392944,0.9360795021057129,0.923584520816803,0.9442033767700195,0.9270609021186829,0.9261489510536194,0.9158554077148438,0.922019362449646,0.9279426336288452,0.9159611463546753
-NFCorpus,0.8930765986442566,0.8932070136070251,0.8301711678504944,0.8546255826950073,0.840248167514801,0.8685956597328186,0.8946558833122253,0.8946558833122253,0.8838071823120117,0.8838071823120117,0.8950788378715515,0.8943512439727783,0.9167720675468445,0.9095065593719482,0.9372479319572449,0.9406470060348511,0.9487459659576416,0.9393429756164551,0.9039009809494019,0.9122331738471985,0.9154017567634583,0.865176260471344,0.9156935811042786,0.7220373749732971,0.8472391366958618,0.8938794136047363,0.8649072647094727,0.8392335772514343,0.918515145778656,0.8695310950279236,0.9279579520225525,0.8735563158988953,0.8625217080116272,0.9017252922058105,0.889488935470581,0.854289710521698,0.8768882155418396,0.8915558457374573,0.8899571895599365,0.895236074924469,0.8649015426635742,0.8737623691558838,0.8789516091346741,0.8685926198959351,0.9043858051300049,0.8735563158988953,0.8983956575393677,0.8949758410453796,0.8933760523796082,0.9999999403953552,0.9013606905937195,0.9077708125114441,0.9226983189582825,0.9740851521492004,0.9084823131561279,0.9550403952598572,0.9347903728485107,0.8200613260269165,0.9014897346496582,0.9092106819152832,0.921718180179596,0.9010535478591919,0.9073488116264343,0.877545177936554,0.8832513093948364,0.9017101526260376,0.9184467196464539
-NQ,0.9119835495948792,0.9180147051811218,0.8646405339241028,0.8853597640991211,0.8614929914474487,0.8820010423660278,0.9522876143455505,0.9522876143455505,0.9368411898612976,0.9368411898612976,0.9423636198043823,0.9478681683540344,0.910786509513855,0.9639177918434143,0.8799590468406677,0.9374184012413025,0.8801149129867554,0.9159761667251587,0.9450920224189758,0.9413416981697083,0.9463439583778381,0.8900197744369507,0.9650863409042358,0.7399367094039917,0.8976626992225647,0.9211317300796509,0.8988544940948486,0.8892091512680054,0.94994056224823,0.9073175191879272,0.9106296896934509,0.9296450614929199,0.8842442631721497,0.9156660437583923,0.9149694442749023,0.8728598952293396,0.8850938081741333,0.8959563970565796,0.8902618885040283,0.8826169371604919,0.8828864097595215,0.8897589445114136,0.8950729966163635,0.8860312104225159,0.9467520117759705,0.9296450614929199,0.9334579706192017,0.9652247428894043,0.9218379259109497,0.9013606905937195,1.0,0.9495320916175842,0.9153639674186707,0.8995381593704224,0.9279997944831848,0.9119896292686462,0.8595485091209412,0.876109778881073,0.9519914984703064,0.9385321140289307,0.9532462358474731,0.9365876317024231,0.9189303517341614,0.931150496006012,0.9250473976135254,0.9387162327766418,0.9347662329673767
-QuoraRetrieval,0.9182446002960205,0.922761857509613,0.8830270767211914,0.9191234707832336,0.8839017748832703,0.8861343860626221,0.9682824611663818,0.9682824611663818,0.956068754196167,0.956068754196167,0.9756679534912109,0.9679496884346008,0.9078540205955505,0.9652320742607117,0.8842073082923889,0.930925726890564,0.8837840557098389,0.9054452776908875,0.959994375705719,0.9608737826347351,0.967335045337677,0.9089018702507019,0.9623216986656189,0.7632731795310974,0.9045380353927612,0.9303578734397888,0.9126819968223572,0.8765057325363159,0.943608820438385,0.9127946496009827,0.9145981669425964,0.9116727113723755,0.9079585671424866,0.9440099596977234,0.9382474422454834,0.8888506889343262,0.9020552039146423,0.922303318977356,0.9181737303733826,0.9051744341850281,0.8967395424842834,0.9121820330619812,0.9161275029182434,0.9065831899642944,0.9322096705436707,0.9116727113723755,0.9525591731071472,0.9462618827819824,0.9464716911315918,0.9077708125114441,0.9495320916175842,1.0,0.9058340191841125,0.905327320098877,0.9388357400894165,0.9015847444534302,0.8601654171943665,0.8889256119728088,0.9628604054450989,0.9531316161155701,0.9722914695739746,0.9573034644126892,0.9578181505203247,0.9456358551979065,0.9389108419418335,0.9596331715583801,0.9374577403068542
-SCIDOCS,0.889691174030304,0.8992704749107361,0.8478741645812988,0.85566246509552,0.8242060542106628,0.8516706228256226,0.9068668484687805,0.9068668484687805,0.8929500579833984,0.8929500579833984,0.8898159265518188,0.8895792961120605,0.9503505229949951,0.9295390248298645,0.9247989654541016,0.9657474756240845,0.9216246008872986,0.9564609527587891,0.9040453433990479,0.9078938364982605,0.9253087043762207,0.899509608745575,0.9296791553497314,0.740402340888977,0.8541281223297119,0.8930266499519348,0.8830411434173584,0.8636199235916138,0.9736051559448242,0.9117957949638367,0.9124789237976074,0.8757598400115967,0.8729583621025085,0.9012807607650757,0.8936588168144226,0.8968186974525452,0.9163784980773926,0.9024848937988281,0.9190900325775146,0.9343346357345581,0.8998434543609619,0.8979055881500244,0.9036228060722351,0.8904937505722046,0.908551037311554,0.8757598400115967,0.9080317616462708,0.9095838069915771,0.896026611328125,0.9226983189582825,0.9153639674186707,0.9058340191841125,1.0,0.9472094178199768,0.9022283554077148,0.9487838745117188,0.9116275906562805,0.8131711483001709,0.9032444953918457,0.9103025197982788,0.9158501029014587,0.8992156982421875,0.8955920934677124,0.8796117305755615,0.89765465259552,0.8925430774688721,0.9049012660980225
-SciFact,0.892997145652771,0.8951342701911926,0.831039309501648,0.8536851406097412,0.8306145071983337,0.8627360463142395,0.8946090340614319,0.8946090340614319,0.8861535787582397,0.8861535787582397,0.8889317512512207,0.8876237869262695,0.9326635599136353,0.9176734685897827,0.9538105130195618,0.9617788791656494,0.9501500725746155,0.9464058876037598,0.8978421688079834,0.9097794890403748,0.9159756898880005,0.8790194392204285,0.9186417460441589,0.7349553108215332,0.8465986251831055,0.8879072666168213,0.864525556564331,0.8338578343391418,0.9353674650192261,0.8792681694030762,0.9248058795928955,0.872587263584137,0.8696499466896057,0.9038532376289368,0.8950048685073853,0.8719730377197266,0.8997095227241516,0.9060347676277161,0.9017314910888672,0.9186573028564453,0.8823222517967224,0.8855134844779968,0.8879486918449402,0.8772619366645813,0.9075112342834473,0.872587263584137,0.8984257578849792,0.8932918310165405,0.8949978947639465,0.9740851521492004,0.8995381593704224,0.905327320098877,0.9472094178199768,0.9999998807907104,0.9017122983932495,0.9554412961006165,0.968241274356842,0.8046634197235107,0.8989851474761963,0.907640814781189,0.9171600937843323,0.8982743620872498,0.8993921875953674,0.8695809245109558,0.8843533992767334,0.8911031484603882,0.9126039147377014
-Touche2020,0.9215371608734131,0.9281871914863586,0.8493306040763855,0.8839954733848572,0.8763502836227417,0.8969246745109558,0.9345963597297668,0.9345963597297668,0.9225219488143921,0.9225219488143921,0.9373823404312134,0.9310782551765442,0.9119592308998108,0.9311457872390747,0.8883203864097595,0.915169358253479,0.888805091381073,0.896754264831543,0.9354337453842163,0.9485433101654053,0.9310891032218933,0.876009464263916,0.9339285492897034,0.7392125129699707,0.8941338062286377,0.9263610243797302,0.8846127390861511,0.8729779720306396,0.9235967397689819,0.8867591619491577,0.9558223485946655,0.8834298253059387,0.8786553740501404,0.9157229065895081,0.9083070158958435,0.8571727871894836,0.8748701810836792,0.8913036584854126,0.8959858417510986,0.8812727332115173,0.876192569732666,0.8814226984977722,0.888551652431488,0.8826008439064026,0.9047530293464661,0.8834298253059387,0.932041347026825,0.9121882319450378,0.9115307927131653,0.9084823131561279,0.9279997944831848,0.9388357400894165,0.9022283554077148,0.9017122983932495,1.000000238418579,0.9011086225509644,0.8702623248100281,0.8584712743759155,0.9317410588264465,0.9341952204704285,0.9546455144882202,0.9370003938674927,0.9341966509819031,0.9161267876625061,0.9047480821609497,0.9399805665016174,0.9332844614982605
-TRECCOVID,0.8833832144737244,0.8897658586502075,0.8365864753723145,0.8441707491874695,0.8282589912414551,0.8567239046096802,0.8986713886260986,0.8986713886260986,0.8865776658058167,0.8865776658058167,0.8933861255645752,0.8951513767242432,0.9338494539260864,0.9181896448135376,0.942850649356842,0.957700252532959,0.9613710641860962,0.9670438766479492,0.89899742603302,0.9055797457695007,0.9119280576705933,0.8632569313049316,0.9249359369277954,0.7177295684814453,0.8519248962402344,0.8916494846343994,0.8762941360473633,0.8616792559623718,0.9372914433479309,0.8783340454101562,0.9113011956214905,0.8787876963615417,0.8585020303726196,0.8888267874717712,0.882118821144104,0.8625155687332153,0.8762643337249756,0.8834498524665833,0.8842517137527466,0.8962944149971008,0.874040961265564,0.8759041428565979,0.8810858726501465,0.8709307909011841,0.908862829208374,0.8787876963615417,0.8989304900169373,0.9082680940628052,0.890006422996521,0.9550403952598572,0.9119896292686462,0.9015847444534302,0.9487838745117188,0.9554412961006165,0.9011086225509644,1.0000001192092896,0.9231286644935608,0.8148664236068726,0.8992998003959656,0.9077374935150146,0.9175513982772827,0.9003238081932068,0.8937297463417053,0.8788232803344727,0.8935161828994751,0.8917135000228882,0.9123128652572632
-BIOSSES,0.8625791668891907,0.8574753999710083,0.797872006893158,0.8158388733863831,0.7966073751449585,0.8268139362335205,0.8533274531364441,0.8533274531364441,0.8508256077766418,0.8508256077766418,0.8529271483421326,0.8504247069358826,0.9067458510398865,0.8817151784896851,0.9347233772277832,0.9307554960250854,0.9220839142799377,0.9164301753044128,0.8582507967948914,0.8730185031890869,0.876883327960968,0.8407849073410034,0.8814013004302979,0.712036669254303,0.8185410499572754,0.8485643863677979,0.8330298662185669,0.802665114402771,0.8973731994628906,0.8480639457702637,0.8957175016403198,0.8280983567237854,0.8368518948554993,0.8651591539382935,0.858853280544281,0.8343672752380371,0.8645408153533936,0.8713915348052979,0.8579108119010925,0.8794823884963989,0.8481307625770569,0.8541761040687561,0.8505773544311523,0.8430542945861816,0.8717136979103088,0.8280983567237854,0.8627818822860718,0.8506495952606201,0.8536749482154846,0.9347903728485107,0.8595485091209412,0.8601654171943665,0.9116275906562805,0.968241274356842,0.8702623248100281,0.9231286644935608,1.0000001192092896,0.7632718682289124,0.8584350943565369,0.8718514442443848,0.8818195462226868,0.8666396141052246,0.8593403100967407,0.8286240100860596,0.848635196685791,0.8512697219848633,0.8796348571777344
-SICK-R,0.8299649357795715,0.8390056490898132,0.8078639507293701,0.8224376440048218,0.8122089505195618,0.8363828063011169,0.8902477025985718,0.8902477025985718,0.8823556900024414,0.8823556900024414,0.8919810056686401,0.8916918635368347,0.8065285086631775,0.8841482996940613,0.7822513580322266,0.844598650932312,0.7837725281715393,0.8197333216667175,0.8789204955101013,0.8813309073448181,0.8612088561058044,0.8076320886611938,0.879280149936676,0.6801100373268127,0.8878549337387085,0.8542071580886841,0.8284757733345032,0.8343176245689392,0.8585872650146484,0.82960045337677,0.8236397504806519,0.8355134129524231,0.8128756284713745,0.8234075903892517,0.8333576321601868,0.7701677680015564,0.7892798185348511,0.8070156574249268,0.8032951951026917,0.7893253564834595,0.7818848490715027,0.8000640273094177,0.7996203303337097,0.793935239315033,0.84193354845047,0.8355134129524231,0.852931559085846,0.8844336271286011,0.8565343618392944,0.8200613260269165,0.876109778881073,0.8889256119728088,0.8131711483001709,0.8046634197235107,0.8584712743759155,0.8148664236068726,0.7632718682289124,1.0,0.9038611650466919,0.8643434643745422,0.8853167295455933,0.885067880153656,0.8466845154762268,0.9286790490150452,0.8497613668441772,0.9365238547325134,0.8939717411994934
-STS12,0.9155313968658447,0.915518581867218,0.8909081220626831,0.9016413688659668,0.8802585005760193,0.8844971656799316,0.9739812016487122,0.9739812016487122,0.9523103833198547,0.9523103833198547,0.9546875357627869,0.9640517830848694,0.8979881405830383,0.9600266218185425,0.8725536465644836,0.9283256530761719,0.8748796582221985,0.908024787902832,0.9507595896720886,0.9435673952102661,0.9496986269950867,0.8901703953742981,0.9567474722862244,0.7566519975662231,0.9094604253768921,0.9259677529335022,0.914065420627594,0.873137354850769,0.9384756684303284,0.9093490242958069,0.914850115776062,0.9138962626457214,0.8859356641769409,0.924311637878418,0.9167808890342712,0.8710922002792358,0.8838201761245728,0.892284631729126,0.8920109868049622,0.8797302842140198,0.8804553747177124,0.8939105272293091,0.8922863602638245,0.8826149106025696,0.9324358105659485,0.9138962626457214,0.9391583204269409,0.950919508934021,0.9360795021057129,0.9014897346496582,0.9519914984703064,0.9628604054450989,0.9032444953918457,0.8989851474761963,0.9317410588264465,0.8992998003959656,0.8584350943565369,0.9038611650466919,0.9999998807907104,0.9665259122848511,0.9781090617179871,0.9599546194076538,0.949619710445404,0.9488744735717773,0.929383397102356,0.9622442126274109,0.9399190545082092
-STS13,0.9190161824226379,0.9148537516593933,0.8852756023406982,0.8994392156600952,0.8810560703277588,0.8787712454795837,0.9600082635879517,0.9600082635879517,0.9431838989257812,0.9431838989257812,0.9443492889404297,0.952302873134613,0.912732720375061,0.9491159915924072,0.8873900175094604,0.932389497756958,0.8852849006652832,0.9073441624641418,0.9454846978187561,0.9429996013641357,0.9500508308410645,0.8968315720558167,0.9498928785324097,0.7461093068122864,0.8995444178581238,0.9181943535804749,0.9060060381889343,0.8696905374526978,0.9367377161979675,0.9070543050765991,0.9224849343299866,0.9049146771430969,0.8857338428497314,0.9411693215370178,0.9232589602470398,0.8752908706665039,0.889945387840271,0.9096480011940002,0.9053783416748047,0.8905830383300781,0.8856875896453857,0.8999499082565308,0.89543616771698,0.8886222243309021,0.9196749925613403,0.9049146771430969,0.9346805214881897,0.933860719203949,0.923584520816803,0.9092106819152832,0.9385321140289307,0.9531316161155701,0.9103025197982788,0.907640814781189,0.9341952204704285,0.9077374935150146,0.8718514442443848,0.8643434643745422,0.9665259122848511,1.0000001192092896,0.9773168563842773,0.9567533135414124,0.944054901599884,0.934114933013916,0.9183123111724854,0.9466428756713867,0.9373435378074646
-STS14,0.9440998435020447,0.93932044506073,0.8971998691558838,0.9188550710678101,0.9074888229370117,0.9043587446212769,0.9729264974594116,0.9729264974594116,0.960503101348877,0.960503101348877,0.968707799911499,0.9752659201622009,0.9206886291503906,0.9618067741394043,0.8985730409622192,0.9405863881111145,0.899524986743927,0.9192919135093689,0.964044988155365,0.96527099609375,0.9619574546813965,0.9036004543304443,0.9652311205863953,0.7562764286994934,0.9149953126907349,0.9424399137496948,0.9224478602409363,0.889416515827179,0.9479785561561584,0.9182902574539185,0.9421572685241699,0.9181155562400818,0.9007682204246521,0.943746030330658,0.9353626370429993,0.8871220350265503,0.8978254199028015,0.9132447838783264,0.9127124547958374,0.8992865085601807,0.8912297487258911,0.9092024564743042,0.9074068665504456,0.8997173309326172,0.9341995120048523,0.9181155562400818,0.9548579454421997,0.9507559537887573,0.9442033767700195,0.921718180179596,0.9532462358474731,0.9722914695739746,0.9158501029014587,0.9171600937843323,0.9546455144882202,0.9175513982772827,0.8818195462226868,0.8853167295455933,0.9781090617179871,0.9773168563842773,0.9999998807907104,0.9775272607803345,0.9703587293624878,0.9547243118286133,0.9364654421806335,0.9683411121368408,0.9549727439880371
-STS15,0.9286789298057556,0.9213941097259521,0.8883463144302368,0.903643786907196,0.8856732845306396,0.8874390721321106,0.9575598835945129,0.9575598835945129,0.9454217553138733,0.9454217553138733,0.9534297585487366,0.960588812828064,0.9100639820098877,0.9499640464782715,0.8854580521583557,0.9268279671669006,0.8803415298461914,0.90470290184021,0.9484242796897888,0.9469503164291382,0.9489233493804932,0.8906463384628296,0.9521862268447876,0.74090975522995,0.9032992720603943,0.9249746799468994,0.9142858386039734,0.875885009765625,0.9355614185333252,0.9101777076721191,0.9227895736694336,0.9026291370391846,0.8860815763473511,0.9186076521873474,0.9158080816268921,0.8685832023620605,0.8780002593994141,0.903823971748352,0.89433753490448,0.878086507320404,0.8708868026733398,0.8901516199111938,0.8887396454811096,0.8829312920570374,0.9155308604240417,0.9026291370391846,0.9363479018211365,0.9376870393753052,0.9270609021186829,0.9010535478591919,0.9365876317024231,0.9573034644126892,0.8992156982421875,0.8982743620872498,0.9370003938674927,0.9003238081932068,0.8666396141052246,0.885067880153656,0.9599546194076538,0.9567533135414124,0.9775272607803345,1.0000001192092896,0.9542839527130127,0.9541418552398682,0.9206722378730774,0.9615832567214966,0.9395099878311157
-STS16,0.9331569671630859,0.9270878434181213,0.8896051645278931,0.9121543169021606,0.9061300754547119,0.8882074952125549,0.9471473097801208,0.9471473097801208,0.9361295700073242,0.9361295700073242,0.9462233185768127,0.9546923041343689,0.9006866812705994,0.9319314360618591,0.8810415267944336,0.9138600826263428,0.8795533776283264,0.8936593532562256,0.9516837000846863,0.9470174312591553,0.9536707401275635,0.8955281376838684,0.9399745464324951,0.7517213821411133,0.8763034343719482,0.9261443614959717,0.9095703363418579,0.8529878854751587,0.9222978949546814,0.9040267467498779,0.9244217872619629,0.8828028440475464,0.8972189426422119,0.9344898462295532,0.9224990010261536,0.8740650415420532,0.8911046385765076,0.899546205997467,0.9096778035163879,0.8895502090454102,0.885320246219635,0.9038639068603516,0.9075014591217041,0.8971396088600159,0.9073795676231384,0.8828028440475464,0.9465791583061218,0.9176111817359924,0.9261489510536194,0.9073488116264343,0.9189303517341614,0.9578181505203247,0.8955920934677124,0.8993921875953674,0.9341966509819031,0.8937297463417053,0.8593403100967407,0.8466845154762268,0.949619710445404,0.944054901599884,0.9703587293624878,0.9542839527130127,0.9999997019767761,0.9166147112846375,0.9188820123672485,0.9595922827720642,0.9186667799949646
-STS17,0.908374547958374,0.904332160949707,0.8645649552345276,0.8826841711997986,0.8690868020057678,0.8814995884895325,0.9507492780685425,0.9507492780685425,0.9454723000526428,0.9454723000526428,0.9519502520561218,0.9575697183609009,0.8714514374732971,0.942611038684845,0.8527474999427795,0.9094533324241638,0.853192925453186,0.8826584219932556,0.9390854835510254,0.9395745992660522,0.9245097041130066,0.8716039061546326,0.9386094212532043,0.7253054976463318,0.9204296469688416,0.9087364673614502,0.8892186880111694,0.8793433308601379,0.9234777092933655,0.8930013179779053,0.8932615518569946,0.8914071917533875,0.8669255375862122,0.8920850157737732,0.9030084609985352,0.8476601839065552,0.8559651970863342,0.8698975443840027,0.8691129088401794,0.8537505865097046,0.843300998210907,0.8625014424324036,0.8670238256454468,0.8602017760276794,0.9020201563835144,0.8914071917533875,0.9146230816841125,0.9365106821060181,0.9158554077148438,0.877545177936554,0.931150496006012,0.9456358551979065,0.8796117305755615,0.8695809245109558,0.9161267876625061,0.8788232803344727,0.8286240100860596,0.9286790490150452,0.9488744735717773,0.934114933013916,0.9547243118286133,0.9541418552398682,0.9166147112846375,0.9999997615814209,0.9029736518859863,0.9728071689605713,0.9457882046699524
-STS22,0.8873857855796814,0.8904899954795837,0.9054290056228638,0.8817184567451477,0.8543643951416016,0.8508477807044983,0.9367305040359497,0.9367305040359497,0.918997585773468,0.918997585773468,0.9268797636032104,0.9292759299278259,0.8883957266807556,0.933143138885498,0.8675625324249268,0.9075512886047363,0.8731187582015991,0.895809531211853,0.9249169826507568,0.9250169992446899,0.9384429454803467,0.8718999028205872,0.9347309470176697,0.7493941187858582,0.8792893886566162,0.9015825986862183,0.8974651098251343,0.8625198602676392,0.9208407998085022,0.8990249037742615,0.8931587934494019,0.8870561718940735,0.8668273091316223,0.8855765461921692,0.8916919231414795,0.8595708608627319,0.8719625473022461,0.869685173034668,0.8748772144317627,0.8672860264778137,0.8693545460700989,0.8721633553504944,0.8819707036018372,0.8727341890335083,0.9053149819374084,0.8870561718940735,0.9739243984222412,0.9216817021369934,0.922019362449646,0.8832513093948364,0.9250473976135254,0.9389108419418335,0.89765465259552,0.8843533992767334,0.9047480821609497,0.8935161828994751,0.848635196685791,0.8497613668441772,0.929383397102356,0.9183123111724854,0.9364654421806335,0.9206722378730774,0.9188820123672485,0.9029736518859863,1.0,0.9147948026657104,0.9118971228599548
-STSBenchmark,0.933322012424469,0.9300788640975952,0.8723177313804626,0.9049835801124573,0.8943052887916565,0.9050480723381042,0.9577158689498901,0.9577158689498901,0.9517570734024048,0.9517570734024048,0.9642019867897034,0.9668652415275574,0.8937532901763916,0.949581503868103,0.8723320960998535,0.9221967458724976,0.8726744651794434,0.8965899348258972,0.9586203694343567,0.9560426473617554,0.9446290731430054,0.8934082984924316,0.9504886865615845,0.7513360977172852,0.9244780540466309,0.9283378720283508,0.901841938495636,0.8832800984382629,0.9328756928443909,0.9047304391860962,0.9195448756217957,0.8974177241325378,0.8936954140663147,0.9203011989593506,0.9242655038833618,0.8642983436584473,0.8818624019622803,0.8923594355583191,0.8964371681213379,0.879453182220459,0.8727013468742371,0.8905507326126099,0.8923331499099731,0.886046290397644,0.9130927324295044,0.8974177241325378,0.9389200806617737,0.9391409754753113,0.9279426336288452,0.9017101526260376,0.9387162327766418,0.9596331715583801,0.8925430774688721,0.8911031484603882,0.9399805665016174,0.8917135000228882,0.8512697219848633,0.9365238547325134,0.9622442126274109,0.9466428756713867,0.9683411121368408,0.9615832567214966,0.9595922827720642,0.9728071689605713,0.9147948026657104,0.9999998807907104,0.9490109086036682
-SummEval,0.9260546565055847,0.924091637134552,0.8509836196899414,0.8948301672935486,0.8744396567344666,0.9031330347061157,0.9361376762390137,0.9361376762390137,0.932615339756012,0.932615339756012,0.9413120746612549,0.9422594308853149,0.9103951454162598,0.9360114932060242,0.8993033170700073,0.921167254447937,0.9043846130371094,0.9074312448501587,0.9392504096031189,0.9494283199310303,0.9257869124412537,0.87904953956604,0.9389493465423584,0.7284443974494934,0.920987069606781,0.926887035369873,0.8834519982337952,0.8852307796478271,0.9246395826339722,0.8860877752304077,0.9351081848144531,0.9090380072593689,0.882964015007019,0.9117947220802307,0.9156969785690308,0.8621535301208496,0.8755237460136414,0.8838998079299927,0.8863435387611389,0.8792035579681396,0.8655053973197937,0.873734712600708,0.8873686790466309,0.8786991834640503,0.9138416051864624,0.9090380072593689,0.9216462969779968,0.9349841475486755,0.9159611463546753,0.9184467196464539,0.9347662329673767,0.9374577403068542,0.9049012660980225,0.9126039147377014,0.9332844614982605,0.9123128652572632,0.8796348571777344,0.8939717411994934,0.9399190545082092,0.9373435378074646,0.9549727439880371,0.9395099878311157,0.9186667799949646,0.9457882046699524,0.9118971228599548,0.9490109086036682,1.0
diff --git a/plotstables/thumbnail.png b/plotstables/thumbnail.png
deleted file mode 100644
index 27bc78c8..00000000
Binary files a/plotstables/thumbnail.png and /dev/null differ
diff --git a/plotstables/thumbnail_v1.drawio b/plotstables/thumbnail_v1.drawio
deleted file mode 100644
index b025fbca..00000000
--- a/plotstables/thumbnail_v1.drawio
+++ /dev/null
@@ -1 +0,0 @@
-
\ No newline at end of file
diff --git a/results_to_csv.bash b/results_to_csv.bash
deleted file mode 100644
index 17ac0355..00000000
--- a/results_to_csv.bash
+++ /dev/null
@@ -1,31 +0,0 @@
-
-results=(
-LASER2
-SGPT-125M-weightedmean-msmarco-specb-bitfit
-SGPT-125M-weightedmean-msmarco-specb-bitfit-doc
-SGPT-125M-weightedmean-msmarco-specb-bitfit-que
-SGPT-125M-weightedmean-nli-bitfit
-SGPT-5.8B-weightedmean-msmarco-specb-bitfit
-SGPT-5.8B-weightedmean-nli-bitfit
-all-MiniLM-L6-v2
-all-mpnet-base-v2
-bert-base-uncased
-contriever-base-msmarco
-glove.6B.300d
-gtr-t5-base
-gtr-t5-xxl
-komninos
-msmarco-bert-co-condensor
-sentence-t5-base
-sentence-t5-xxl
-sgpt-bloom-1b3-nli
-sgpt-bloom-7b1-msmarco
-sup-simcse-bert-base-uncased
-unsup-simcse-bert-base-uncased
-)
-
-for i in "${results[@]}"
-do
- echo "$i"
- python results_to_csv.py results/$i
-done
diff --git a/results_to_csv.py b/results_to_csv.py
deleted file mode 100644
index 993dbbc6..00000000
--- a/results_to_csv.py
+++ /dev/null
@@ -1,212 +0,0 @@
-"""
-Usage: python results_to_csv.py results_folder_path
-Make sure the final directory results_folder_path is the name of your model
-"""
-import csv
-import json
-import os
-import sys
-
-from mteb import MTEB
-
-TASK_LIST_CLASSIFICATION = [
- "AmazonCounterfactualClassification",
- "AmazonPolarityClassification",
- "AmazonReviewsClassification",
- "Banking77Classification",
- "EmotionClassification",
- "ImdbClassification",
- "MassiveIntentClassification",
- "MassiveScenarioClassification",
- "MTOPDomainClassification",
- "MTOPIntentClassification",
- "ToxicConversationsClassification",
- "TweetSentimentExtractionClassification",
-]
-
-TASK_LIST_CLUSTERING = [
- "ArxivClusteringP2P",
- "ArxivClusteringS2S",
- "BiorxivClusteringP2P",
- "BiorxivClusteringS2S",
- "MedrxivClusteringP2P",
- "MedrxivClusteringS2S",
- "RedditClustering",
- "RedditClusteringP2P",
- "StackExchangeClustering",
- "StackExchangeClusteringP2P",
- "TwentyNewsgroupsClustering",
-]
-
-TASK_LIST_PAIR_CLASSIFICATION = [
- "SprintDuplicateQuestions",
- "TwitterSemEval2015",
- "TwitterURLCorpus",
-]
-
-TASK_LIST_RERANKING = [
- "AskUbuntuDupQuestions",
- "MindSmallReranking",
- "SciDocsRR",
- "StackOverflowDupQuestions",
-]
-
-TASK_LIST_RETRIEVAL = [
- "ArguAna",
- "ClimateFEVER",
- "CQADupstackRetrieval",
- "DBPedia",
- "FEVER",
- "FiQA2018",
- "HotpotQA",
- "MSMARCO",
- "NFCorpus",
- "NQ",
- "QuoraRetrieval",
- "SCIDOCS",
- "SciFact",
- "Touche2020",
- "TRECCOVID",
-]
-
-TASK_LIST_STS = [
- "BIOSSES",
- "SICK-R",
- "STS12",
- "STS13",
- "STS14",
- "STS15",
- "STS16",
- "STS17",
- "STS22",
- "STSBenchmark",
-]
-
-TASK_LIST_SUMMARIZATION = [
- "SummEval",
-]
-
-TASK_LIST_BITEXTMINING = [
- "BUCC",
- "Tatoeba",
-]
-
-TASK_LIST = (
- TASK_LIST_BITEXTMINING
- + TASK_LIST_CLASSIFICATION
- + TASK_LIST_CLUSTERING
- + TASK_LIST_PAIR_CLASSIFICATION
- + TASK_LIST_RERANKING
- + TASK_LIST_RETRIEVAL
- + TASK_LIST_STS
- + TASK_LIST_SUMMARIZATION
-)
-
-TASK_LIST_NAMES = [
- ("Classification", TASK_LIST_CLASSIFICATION, ["en", "en-en"]),
- ("Clustering", TASK_LIST_CLUSTERING, ["en", "en-en"]),
- ("PairClassification", TASK_LIST_PAIR_CLASSIFICATION, ["en", "en-en"]),
- ("Reranking", TASK_LIST_RERANKING, ["en", "en-en"]),
- ("Retrieval", TASK_LIST_RETRIEVAL, ["en", "en-en"]),
- ("STS", TASK_LIST_STS, ["en", "en-en"]),
- ("all", TASK_LIST, ["en", "en-en"]),
- ("BitextMining", TASK_LIST_BITEXTMINING, []),
-]
-
-results_folder = sys.argv[1]
-results_folder = results_folder.strip("/")
-model_name = results_folder.split("/")[-1]
-print(f"Using model name {model_name}")
-
-all_results = {}
-
-for file_name in os.listdir(results_folder):
- if not file_name.endswith(".json"):
- print(f"Skipping non-json {file_name}")
- continue
- with open(os.path.join(results_folder, file_name), "r", encoding="utf-8") as f:
- results = json.load(f)
- all_results = {**all_results, **{file_name.replace(".json", ""): results}}
-
-csv_file = f"{results_folder}_results.csv"
-print(f"Converting {results_folder} to {csv_file}")
-
-NOT_FOUND = []
-
-
-def get_rows(task, dataset, limit_langs=[]):
- rows = []
- # CQADupstackRetrieval uses the same metric as its subsets
- tasks = MTEB(tasks=[dataset.replace("CQADupstackRetrieval", "CQADupstackTexRetrieval")]).tasks
- assert len(tasks) == 1, f"Found {len(tasks)} for {dataset}. Expected 1."
- main_metric = tasks[0].description["main_score"]
- test_result = all_results.get(dataset, {})
-
- # Dev / Val set is used for MSMARCO (See BEIR paper)
- if "MSMARCO" in dataset:
- test_result = (
- test_result.get("dev") if "dev" in test_result else test_result.get("validation")
- )
- else:
- test_result = test_result.get("test")
- if test_result is None:
- print(f"{dataset} - test set not found")
- NOT_FOUND.append(dataset)
- return [[model_name, task, dataset, "", main_metric, ""]]
-
- for lang in tasks[0].description["eval_langs"]:
- if limit_langs and lang not in limit_langs:
- continue
- test_result_lang = test_result.get(lang, test_result)
- if main_metric == "cosine_spearman":
- test_result_lang = test_result_lang.get("cos_sim", {}).get("spearman")
- elif main_metric == "ap":
- test_result_lang = test_result_lang.get("cos_sim", {}).get("ap")
- else:
- test_result_lang = test_result_lang.get(main_metric)
-
- if test_result_lang is None:
- print(f"{lang} & {main_metric} not found for task {dataset}.")
- rows.append([model_name, task, dataset, lang, main_metric, ""])
- rows.append([model_name, task, dataset, lang, main_metric, test_result_lang])
- return rows
-
-
-with open(csv_file, "w", encoding="utf-8") as f:
- writer = csv.writer(f)
- writer.writerow(["model", "task", "dataset", "language", "metric", "value"])
- for task, dataset_list in [
- ("BitextMining", TASK_LIST_BITEXTMINING),
- ("Classification", TASK_LIST_CLASSIFICATION),
- ("Clustering", TASK_LIST_CLUSTERING),
- ("PairClassification", TASK_LIST_PAIR_CLASSIFICATION),
- ("Reranking", TASK_LIST_RERANKING),
- ("Retrieval", TASK_LIST_RETRIEVAL),
- ("STS", TASK_LIST_STS),
- ("Summarization", TASK_LIST_SUMMARIZATION),
- ]:
- for dataset in dataset_list:
- writer.writerows(get_rows(task, dataset))
-
- # Add average scores
- for task, dataset_list, limit_langs in [
- ("BitextMining", TASK_LIST_BITEXTMINING, []),
- ("Classification", TASK_LIST_CLASSIFICATION, ["en", "en-en"]),
- ("Clustering", TASK_LIST_CLUSTERING, ["en", "en-en"]),
- ("PairClassification", TASK_LIST_PAIR_CLASSIFICATION, ["en", "en-en"]),
- ("Reranking", TASK_LIST_RERANKING, ["en", "en-en"]),
- ("Retrieval", TASK_LIST_RETRIEVAL, ["en", "en-en"]),
- ("STS", TASK_LIST_STS, ["en", "en-en"]),
- ("all", TASK_LIST, ["en", "en-en"]),
- ]:
- if all([x in all_results for x in dataset_list]):
- rows = [y for x in dataset_list for y in get_rows(task, x, limit_langs=limit_langs)]
- try:
- avg = sum([float(x[-1]) for x in rows]) / len(rows)
- except:
- continue
- metric = "multiple" if task == "all" else rows[-1][-2]
- writer.writerow([model_name, task, "average", "en", metric, avg])
-
-if NOT_FOUND:
- print("Not found: " + "'" + "','".join(NOT_FOUND) + "'", len(NOT_FOUND))
diff --git a/run_array.py b/run_array.py
deleted file mode 100644
index ac172e82..00000000
--- a/run_array.py
+++ /dev/null
@@ -1,247 +0,0 @@
-import argparse
-import logging
-import os
-from typing import Dict, List, Union
-
-logging.basicConfig(level=logging.INFO)
-
-logger = logging.getLogger("main")
-
-os.environ["HF_DATASETS_OFFLINE"]="1" # 1 for offline
-os.environ["TRANSFORMERS_OFFLINE"]="1" # 1 for offline
-os.environ["TRANSFORMERS_CACHE"]="/gpfswork/rech/six/commun/models"
-os.environ["HF_DATASETS_CACHE"]="/gpfswork/rech/six/commun/datasets"
-os.environ["HF_MODULES_CACHE"]="/gpfswork/rech/six/commun/modules"
-os.environ["HF_METRICS_CACHE"]="/gpfswork/rech/six/commun/metrics"
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-
-from mteb import MTEB
-import numpy as np
-from sentence_transformers import SentenceTransformer
-from torch import Tensor
-import torch.multiprocessing as mp
-
-
-TASK_LIST_CLASSIFICATION = [
- "AmazonCounterfactualClassification",
- "AmazonPolarityClassification",
- "AmazonReviewsClassification",
- "Banking77Classification",
- "EmotionClassification",
- "ImdbClassification",
- "MassiveIntentClassification",
- "MassiveScenarioClassification",
- "MTOPDomainClassification",
- "MTOPIntentClassification",
- "ToxicConversationsClassification",
- "TweetSentimentExtractionClassification",
-]
-
-TASK_LIST_CLUSTERING = [
- "ArxivClusteringP2P",
- "ArxivClusteringS2S",
- "BiorxivClusteringP2P",
- "BiorxivClusteringS2S",
- "MedrxivClusteringP2P",
- "MedrxivClusteringS2S",
- "RedditClustering",
- "RedditClusteringP2P",
- "StackExchangeClustering",
- "StackExchangeClusteringP2P",
- "TwentyNewsgroupsClustering",
-]
-
-TASK_LIST_PAIR_CLASSIFICATION = [
- "SprintDuplicateQuestions",
- "TwitterSemEval2015",
- "TwitterURLCorpus",
-]
-
-TASK_LIST_RERANKING = [
- "AskUbuntuDupQuestions",
- "MindSmallReranking",
- "SciDocsRR",
- "StackOverflowDupQuestions",
-]
-
-TASK_LIST_RETRIEVAL = [
- "ArguAna",
- "ClimateFEVER",
- "CQADupstackAndroidRetrieval",
- "CQADupstackEnglishRetrieval",
- "CQADupstackGamingRetrieval",
- "CQADupstackGisRetrieval",
- "CQADupstackMathematicaRetrieval",
- "CQADupstackPhysicsRetrieval",
- "CQADupstackProgrammersRetrieval",
- "CQADupstackStatsRetrieval",
- "CQADupstackTexRetrieval",
- "CQADupstackUnixRetrieval",
- "CQADupstackWebmastersRetrieval",
- "CQADupstackWordpressRetrieval",
- "DBPedia",
- "FEVER",
- "FiQA2018",
- "HotpotQA",
- "MSMARCO",
- "NFCorpus",
- "NQ",
- "QuoraRetrieval",
- "SCIDOCS",
- "SciFact",
- "Touche2020",
- "TRECCOVID",
-]
-
-TASK_LIST_STS = [
- "BIOSSES",
- "SICK-R",
- "STS12",
- "STS13",
- "STS14",
- "STS15",
- "STS16",
- "STS17",
- "STS22",
- "STSBenchmark",
- "SummEval",
-]
-
-TASK_LIST = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS
-
-
-class SentenceTransformerSpecb:
- # Requires:
- # https://github.com/Muennighoff/sentence-transformers/tree/sgpt_poolings_specb
- # pip install git+https://github.com/Muennighoff/sentence-transformers.git@sgpt_poolings_specb
- def __init__(self, model):
- self.model = SentenceTransformer(model)
- self.sep = " "
- tokens = ["[SOS]", "{SOS}"]
- self.model._first_module().tokenizer.add_tokens(tokens, special_tokens=True)
- self.model._first_module().auto_model.resize_token_embeddings(len(self.model._first_module().tokenizer))
- # Will be replaced with the rep tokens in the model ones
- # The problem is we don't know if a text is query or document when tokenizing in the Transformer.py module,
- # so we use the SOS tokens as an identifier if we have a query or document at hand & then replace them
- # If we would directly use the brackets here, they may become part of another token
- self.model._first_module().bos_spec_token_q = self.model._first_module().tokenizer.encode("[SOS]", add_special_tokens=False)[0]
- self.model._first_module().bos_spec_token_d = self.model._first_module().tokenizer.encode("{SOS}", add_special_tokens=False)[0]
- self.model._first_module().bos_spec_token_q_rep = self.model._first_module().tokenizer.encode("[", add_special_tokens=False)[0]
- self.model._first_module().eos_spec_token_q = self.model._first_module().tokenizer.encode("]", add_special_tokens=False)[0]
- self.model._first_module().bos_spec_token_d_rep = self.model._first_module().tokenizer.encode("{", add_special_tokens=False)[0]
- self.model._first_module().eos_spec_token_d = self.model._first_module().tokenizer.encode("}", add_special_tokens=False)[0]
- self.model._first_module().replace_bos = True
-
- def encode(self, sentences, **kwargs):
- """Returns a list of embeddings for the given sentences.
- Args:
- sentences (`List[str]`): List of sentences to encode
- batch_size (`int`): Batch size for the encoding
-
- Returns:
- `List[np.ndarray]` or `List[tensor]`: List of embeddings for the given sentences
- """
- # Add specb query token
- sentences = ["[SOS]" + sent for sent in sentences]
- return self.model.encode(sentences, **kwargs)
-
- def encode_queries(self, queries: List[str], batch_size: int = 16, **kwargs) -> Union[List[Tensor], np.ndarray, Tensor]:
- # Will be replaced with [ in the models tokenization
- # If we would put [ here, there is a risk of it getting chained with a different token when encoding
- queries = ["[SOS]" + q for q in queries]
- return self.model.encode(queries, batch_size=batch_size, **kwargs)
-
- def encode_corpus(self, corpus: List[Dict[str, str]], batch_size: int = 8, **kwargs) -> Union[List[Tensor], np.ndarray, Tensor]:
- # Will be replaced with { in the models tokenization
- # If we would put { here, there is a risk of it getting chained with a different token when encoding
- sentences = [("{SOS}" + doc["title"] + self.sep + doc["text"]).strip() if "title" in doc else "{SOS}" + doc["text"].strip() for doc in corpus]
- return self.model.encode(sentences, batch_size=batch_size, **kwargs)
-
- def encode_corpus_parallel(
- self, corpus: List[Dict[str, str]], pool: Dict[str, object], batch_size: int, chunk_id: int, **kwargs
- ):
- if type(corpus) is dict:
- sentences = [
- ("{SOS}" + corpus["title"][i] + self.sep + corpus["text"][i]).strip()
- if "title" in corpus
- else "{SOS}" + corpus["text"][i].strip()
- for i in range(len(corpus["text"]))
- ]
- else:
- sentences = [
- ("{SOS}" + doc["title"] + self.sep + doc["text"]).strip() if "title" in doc else "{SOS}" + doc["text"].strip()
- for doc in corpus
- ]
-
- if chunk_id is not None and chunk_id >= len(pool["processes"]):
- output_queue = pool["output"]
- output_queue.get()
-
- input_queue = pool["input"]
- input_queue.put([chunk_id, batch_size, sentences])
-
-
- def start_multi_process_pool(self, target_devices: List[str] = None) -> Dict[str, object]:
- logger.info("Start multi-process pool on devices: {}".format(", ".join(map(str, target_devices))))
-
- ctx = mp.get_context("spawn")
- input_queue = ctx.Queue()
- output_queue = ctx.Queue()
- processes = []
-
- for process_id, device_name in enumerate(target_devices):
- p = ctx.Process(
- target=SentenceTransformer._encode_multi_process_worker,
- args=(process_id, device_name, self.model, input_queue, output_queue),
- daemon=True,
- )
- p.start()
- processes.append(p)
-
- return {"input": input_queue, "output": output_queue, "processes": processes}
-
- def stop_multi_process_pool(self, pool: Dict[str, object]):
- output_queue = pool["output"]
- [output_queue.get() for _ in range(len(pool["processes"]))]
- return self.model.stop_multi_process_pool(pool)
-
-
-def parse_args():
- # Parse command line arguments
- parser = argparse.ArgumentParser()
- parser.add_argument("--startid", type=int)
- parser.add_argument("--endid", type=int)
- parser.add_argument("--addspecbdoc", action='store_true')
- parser.add_argument("--addspecbquery", action='store_true')
- parser.add_argument("--modelpath", type=str, default="/gpfswork/rech/six/commun/models/sentence-transformers_sentence-t5-base")
- parser.add_argument("--lang", type=str, default="en")
- parser.add_argument("--taskname", type=str, default=None)
- parser.add_argument("--batchsize", type=int, default=128)
- args = parser.parse_args()
- return args
-
-def main(args):
-
- if args.addspecbdoc or args.addspecbquery:
- model = SentenceTransformerSpecb(args.modelpath) # Only used for SGPT-msmarco models
- else:
- model = SentenceTransformer(args.modelpath)
-
- if args.taskname is not None:
- task = args.taskname
- model_name = args.modelpath.split("/")[-1].split("_")[-1]
- eval_splits = ["validation"] if task == "MSMARCO" else ["test"]
- evaluation = MTEB(tasks=[task], task_langs=[args.lang])
- evaluation.run(model, output_folder=f"results/{model_name}", batch_size=args.batchsize, eval_splits=eval_splits)
- exit()
-
- for task in TASK_LIST[args.startid:args.endid]:
- print("Running task: ", task)
- eval_splits = ["validation"] if task == "MSMARCO" else ["test"]
- model_name = args.modelpath.split("/")[-1].split("_")[-1]
- evaluation = MTEB(tasks=[task], task_langs=[args.lang])
- evaluation.run(model, output_folder=f"results/{model_name}", batch_size=args.batchsize, eval_splits=eval_splits)
-
-if __name__ == "__main__":
- args = parse_args()
- main(args)
diff --git a/run_array_laser.py b/run_array_laser.py
deleted file mode 100644
index a3f61208..00000000
--- a/run_array_laser.py
+++ /dev/null
@@ -1,178 +0,0 @@
-"""
-See https://github.com/facebookresearch/LASER/issues/211
-"""
-
-import argparse
-import logging
-import os
-
-import numpy as np
-import subprocess
-
-logging.basicConfig(level=logging.INFO)
-
-os.environ["HF_DATASETS_OFFLINE"]="1" # 1 for offline
-os.environ["TRANSFORMERS_OFFLINE"]="1" # 1 for offline
-os.environ["TRANSFORMERS_CACHE"]="/gpfswork/rech/six/commun/models"
-os.environ["HF_DATASETS_CACHE"]="/gpfswork/rech/six/commun/datasets"
-os.environ["HF_MODULES_CACHE"]="/gpfswork/rech/six/commun/modules"
-os.environ["HF_METRICS_CACHE"]="/gpfswork/rech/six/commun/metrics"
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-
-from mteb import MTEB
-
-TASK_LIST_CLASSIFICATION = [
- "AmazonCounterfactualClassification",
- "AmazonPolarityClassification",
- "AmazonReviewsClassification",
- "Banking77Classification",
- "EmotionClassification",
- "ImdbClassification",
- "MassiveIntentClassification",
- "MassiveScenarioClassification",
- "MTOPDomainClassification",
- "MTOPIntentClassification",
- "ToxicConversationsClassification",
- "TweetSentimentExtractionClassification",
-]
-
-TASK_LIST_CLUSTERING = [
- "ArxivClusteringP2P",
- "ArxivClusteringS2S",
- "BiorxivClusteringP2P",
- "BiorxivClusteringS2S",
- "MedrxivClusteringP2P",
- "MedrxivClusteringS2S",
- "RedditClustering",
- "RedditClusteringP2P",
- "StackExchangeClustering",
- "StackExchangeClusteringP2P",
- "TwentyNewsgroupsClustering",
-]
-
-TASK_LIST_PAIR_CLASSIFICATION = [
- "SprintDuplicateQuestions",
- "TwitterSemEval2015",
- "TwitterURLCorpus",
-]
-
-TASK_LIST_RERANKING = [
- "AskUbuntuDupQuestions",
- "MindSmallReranking",
- "SciDocsRR",
- "StackOverflowDupQuestions",
-]
-
-TASK_LIST_RETRIEVAL = [
- "ArguAna",
- "ClimateFEVER",
- "CQADupstackAndroidRetrieval",
- "CQADupstackEnglishRetrieval",
- "CQADupstackGamingRetrieval",
- "CQADupstackGisRetrieval",
- "CQADupstackMathematicaRetrieval",
- "CQADupstackPhysicsRetrieval",
- "CQADupstackProgrammersRetrieval",
- "CQADupstackStatsRetrieval",
- "CQADupstackTexRetrieval",
- "CQADupstackUnixRetrieval",
- "CQADupstackWebmastersRetrieval",
- "CQADupstackWordpressRetrieval",
- "DBPedia",
- "FEVER",
- "FiQA2018",
- "HotpotQA",
- "MSMARCO",
- "NFCorpus",
- "NQ",
- "QuoraRetrieval",
- "SCIDOCS",
- "SciFact",
- "Touche2020",
- "TRECCOVID",
-]
-
-TASK_LIST_STS = [
- "BIOSSES",
- "SICK-R",
- "STS12",
- "STS13",
- "STS14",
- "STS15",
- "STS16",
- "STS17",
- "STS22",
- "STSBenchmark",
- "SummEval",
-]
-
-TASK_LIST = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS
-
-### Setup prior to running ###
-#with open("LASER_script.sh", "w") as f:
-# f.write("LASER=/content/LASER ./LASER/tasks/embed/embed.sh tmp.txt tmp.bin")
-# Run `chmod u+rx LASER_script.sh` to give permissions
-# !chmod u+rx LASER_script.sh
-
-class LASER():
- def encode(self, sentences, batch_size=32, **kwargs):
- """
- Returns a list of embeddings for the given sentences.
- Args:
- sentences (`List[str]`): List of sentences to encode
- batch_size (`int`): Batch size for the encoding
-
- Returns:
- `List[np.ndarray]` or `List[tensor]`: List of embeddings for the given sentences
- """
- if os.path.exists("tmp.txt"):
- os.remove("tmp.txt")
- if os.path.exists("tmp.bin"):
- os.remove("tmp.bin")
-
- # LASER expects one text per line, so we need to replace newlines
- sentences = [s.replace("\n", " ") for s in sentences]
- with open("tmp.txt", "w") as f:
- f.write("\n".join(sentences))
-
- print(len(sentences))
- rc = subprocess.call("./LASER_script.sh", shell=True)
-
- dim = 1024
- X = np.fromfile("tmp.bin", dtype=np.float32, count=-1)
- X.resize(X.shape[0] // dim, dim)
- print(X.shape)
- return X
-
-def parse_args():
- # Parse command line arguments
- parser = argparse.ArgumentParser()
- parser.add_argument("--startid", type=int)
- parser.add_argument("--endid", type=int)
- parser.add_argument("--lang", type=str, default="en")
- parser.add_argument("--taskname", type=str, default=None)
- parser.add_argument("--batchsize", type=int, default=128)
- args = parser.parse_args()
- return args
-
-def main(args):
-
- model = LASER()
- model_name = "LASER2"
-
- if args.taskname is not None:
- task = args.taskname
- eval_splits = ["validation"] if task == "MSMARCO" else ["test"]
- evaluation = MTEB(tasks=[task], task_langs=[args.lang])
- evaluation.run(model, output_folder=f"results/{model_name}", batch_size=args.batchsize, eval_splits=eval_splits)
- exit()
-
- for task in TASK_LIST[args.startid:args.endid]:
- print("Running task: ", task)
- eval_splits = ["validation"] if task == "MSMARCO" else ["test"]
- evaluation = MTEB(tasks=[task], task_langs=[args.lang])
- evaluation.run(model, output_folder=f"results/{model_name}", batch_size=args.batchsize, eval_splits=eval_splits)
-
-if __name__ == "__main__":
- args = parse_args()
- main(args)
diff --git a/run_array_openai.py b/run_array_openai.py
deleted file mode 100644
index c3a95d55..00000000
--- a/run_array_openai.py
+++ /dev/null
@@ -1,220 +0,0 @@
-"""
-openai==0.11.4
-"""
-import argparse
-import logging
-import os
-import pathlib
-import pickle
-
-import openai
-from transformers import GPT2TokenizerFast
-
-logging.basicConfig(level=logging.INFO)
-
-os.environ["HF_DATASETS_OFFLINE"]="1" # 1 for offline
-os.environ["TRANSFORMERS_OFFLINE"]="1" # 1 for offline
-os.environ["TRANSFORMERS_CACHE"]="/gpfswork/rech/six/commun/models"
-os.environ["HF_DATASETS_CACHE"]="/gpfswork/rech/six/commun/datasets"
-os.environ["HF_MODULES_CACHE"]="/gpfswork/rech/six/commun/modules"
-os.environ["HF_METRICS_CACHE"]="/gpfswork/rech/six/commun/metrics"
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-
-API_KEY = "YOUR_KEY"
-
-from mteb import MTEB
-
-
-TASK_LIST_CLASSIFICATION = [
- "AmazonCounterfactualClassification",
- "AmazonPolarityClassification",
- "AmazonReviewsClassification",
- "Banking77Classification",
- "EmotionClassification",
- "ImdbClassification",
- "MassiveIntentClassification",
- "MassiveScenarioClassification",
- "MTOPDomainClassification",
- "MTOPIntentClassification",
- "ToxicConversationsClassification",
- "TweetSentimentExtractionClassification",
-]
-
-TASK_LIST_CLUSTERING = [
- "ArxivClusteringP2P",
- "ArxivClusteringS2S",
- "BiorxivClusteringP2P",
- "BiorxivClusteringS2S",
- "MedrxivClusteringP2P",
- "MedrxivClusteringS2S",
- "RedditClustering",
- "RedditClusteringP2P",
- "StackExchangeClustering",
- "StackExchangeClusteringP2P",
- "TwentyNewsgroupsClustering",
-]
-
-TASK_LIST_PAIR_CLASSIFICATION = [
- "SprintDuplicateQuestions",
- "TwitterSemEval2015",
- "TwitterURLCorpus",
-]
-
-TASK_LIST_RERANKING = [
- "AskUbuntuDupQuestions",
- "MindSmallReranking",
- "SciDocsRR",
- "StackOverflowDupQuestions",
-]
-
-TASK_LIST_RETRIEVAL = [
- "ArguAna",
- "ClimateFEVER",
- "CQADupstackAndroidRetrieval",
- "CQADupstackEnglishRetrieval",
- "CQADupstackGamingRetrieval",
- "CQADupstackGisRetrieval",
- "CQADupstackMathematicaRetrieval",
- "CQADupstackPhysicsRetrieval",
- "CQADupstackProgrammersRetrieval",
- "CQADupstackStatsRetrieval",
- "CQADupstackTexRetrieval",
- "CQADupstackUnixRetrieval",
- "CQADupstackWebmastersRetrieval",
- "CQADupstackWordpressRetrieval",
- "DBPedia",
- "FEVER",
- "FiQA2018",
- "HotpotQA",
- "MSMARCO",
- "NFCorpus",
- "NQ",
- "QuoraRetrieval",
- "SCIDOCS",
- "SciFact",
- "Touche2020",
- "TRECCOVID",
-]
-
-TASK_LIST_STS = [
- "BIOSSES",
- "SICK-R",
- "STS12",
- "STS13",
- "STS14",
- "STS15",
- "STS16",
- "STS17",
- "STS22",
- "STSBenchmark",
- "SummEval",
-]
-
-TASK_LIST = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS
-class OpenAIEmbedder:
- """
- Benchmark OpenAIs embeddings endpoint on USEB.
- """
- def __init__(self, engine, task_name=None, batch_size=32, save_emb=False, **kwargs):
- self.engine = engine
- self.max_token_len = 2046 # 2048 - 2 special tokens
- self.batch_size = batch_size
- self.save_emb = False # Problematic as the filenames end up being the same
- self.base_path = f"embeddings/{engine.split('/')[-1]}/"
- self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
- self.task_name = task_name
-
- if save_emb:
- assert self.task_name is not None
-
- pathlib.Path(self.base_path).mkdir(parents=True, exist_ok=True)
-
- def encode(self,
- sentences,
- decode=True,
- idx=None,
- **kwargs
- ):
-
- openai.api_key = API_KEY
-
- fin_embeddings = []
-
- embedding_path = f"{self.base_path}/{self.task_name}_{sentences[0][:5]}_{sentences[-1][-5:]}.pickle"
- if sentences and os.path.exists(embedding_path):
- loaded = pickle.load(open(embedding_path, "rb"))
- fin_embeddings = loaded["fin_embeddings"]
- else:
- for i in range(0, len(sentences), self.batch_size):
- batch = sentences[i : i + self.batch_size]
-
- all_tokens = []
- used_indices = []
- for j, txt in enumerate(batch):
- tokens = self.tokenizer.encode(txt, add_special_tokens=False)
- token_len = len(tokens)
- if token_len == 0:
- raise ValueError("Empty items should be cleaned prior to running")
- if token_len > self.max_token_len:
- tokens = tokens[:self.max_token_len]
- # For some characters the API raises weird errors, e.g. input=[[126]]
- if decode:
- tokens = self.tokenizer.decode(tokens)
- all_tokens.append(tokens)
- used_indices.append(j)
-
- out = [[]] * len(batch)
- if all_tokens:
- response = openai.Engine(id=self.engine).embeddings(input=all_tokens)
- assert len(response["data"]) == len(
- all_tokens
- ), f"Sent {len(all_tokens)}, got {len(response['data'])}"
-
- for data in response["data"]:
- idx = data["index"]
- # OpenAI seems to return them ordered, but to be save use the index and insert
- idx = used_indices[idx]
- embedding = data["embedding"]
- out[idx] = embedding
-
- fin_embeddings.extend(out)
- # Save embeddings
- if fin_embeddings and self.save_emb:
- dump = {
- "fin_embeddings": fin_embeddings,
- }
- pickle.dump(dump, open(embedding_path, "wb"))
-
- assert len(sentences) == len(fin_embeddings)
- return fin_embeddings
-
-
-def parse_args():
- # Parse command line arguments
- parser = argparse.ArgumentParser()
- parser.add_argument("--startid", type=int)
- parser.add_argument("--endid", type=int)
- parser.add_argument("--engine", type=str, default="text-similarity-ada-001")
- parser.add_argument("--lang", type=str, default="en")
- parser.add_argument("--taskname", type=str, default=None)
- parser.add_argument("--batchsize", type=int, default=2048)
- args = parser.parse_args()
- return args
-
-def main(args):
-
- # Different batch size than the arg
- # The below is used to send X embeddings to the API
- # The CLI arg is how much will be saved / pickle file
-
- for task in TASK_LIST[args.startid:args.endid]:
- print("Running task: ", task)
- model = OpenAIEmbedder(args.engine, task_name=task, batchsize=256, save_emb=True)
- eval_splits = ["validation"] if task == "MSMARCO" else ["test"]
- model_name = args.engine.split("/")[-1].split("_")[-1]
- evaluation = MTEB(tasks=[task], task_langs=[args.lang])
- evaluation.run(model, output_folder=f"results/{model_name}", batch_size=args.batchsize, eval_splits=eval_splits)
-
-if __name__ == "__main__":
- args = parse_args()
- main(args)
diff --git a/run_array_openaiv2.py b/run_array_openaiv2.py
deleted file mode 100644
index d29c6cc4..00000000
--- a/run_array_openaiv2.py
+++ /dev/null
@@ -1,226 +0,0 @@
-"""
-openai==0.26.4
-tiktoken==0.2.0
-"""
-import argparse
-import logging
-import os
-import pathlib
-import pickle
-
-from mteb import MTEB
-import openai
-import tiktoken
-from transformers import GPT2TokenizerFast
-
-logging.basicConfig(level=logging.INFO)
-
-os.environ["HF_DATASETS_OFFLINE"]="1" # 1 for offline
-os.environ["TRANSFORMERS_OFFLINE"]="1" # 1 for offline
-os.environ["TRANSFORMERS_CACHE"]="/gpfswork/rech/six/commun/models"
-os.environ["HF_DATASETS_CACHE"]="/gpfswork/rech/six/commun/datasets"
-os.environ["HF_MODULES_CACHE"]="/gpfswork/rech/six/commun/modules"
-os.environ["HF_METRICS_CACHE"]="/gpfswork/rech/six/commun/metrics"
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-
-API_KEY = "YOUR_KEY"
-
-TASK_LIST_CLASSIFICATION = [
- "AmazonCounterfactualClassification",
- "AmazonPolarityClassification",
- "AmazonReviewsClassification",
- "Banking77Classification",
- "EmotionClassification",
- "ImdbClassification",
- "MassiveIntentClassification",
- "MassiveScenarioClassification",
- "MTOPDomainClassification",
- "MTOPIntentClassification",
- "ToxicConversationsClassification",
- "TweetSentimentExtractionClassification",
-]
-
-TASK_LIST_CLUSTERING = [
- "ArxivClusteringP2P",
- "ArxivClusteringS2S",
- "BiorxivClusteringP2P",
- "BiorxivClusteringS2S",
- "MedrxivClusteringP2P",
- "MedrxivClusteringS2S",
- "RedditClustering",
- "RedditClusteringP2P",
- "StackExchangeClustering",
- "StackExchangeClusteringP2P",
- "TwentyNewsgroupsClustering",
-]
-
-TASK_LIST_PAIR_CLASSIFICATION = [
- "SprintDuplicateQuestions",
- "TwitterSemEval2015",
- "TwitterURLCorpus",
-]
-
-TASK_LIST_RERANKING = [
- "AskUbuntuDupQuestions",
- "MindSmallReranking",
- "SciDocsRR",
- "StackOverflowDupQuestions",
-]
-
-TASK_LIST_RETRIEVAL = [
- "ArguAna",
- "ClimateFEVER",
- "CQADupstackAndroidRetrieval",
- "CQADupstackEnglishRetrieval",
- "CQADupstackGamingRetrieval",
- "CQADupstackGisRetrieval",
- "CQADupstackMathematicaRetrieval",
- "CQADupstackPhysicsRetrieval",
- "CQADupstackProgrammersRetrieval",
- "CQADupstackStatsRetrieval",
- "CQADupstackTexRetrieval",
- "CQADupstackUnixRetrieval",
- "CQADupstackWebmastersRetrieval",
- "CQADupstackWordpressRetrieval",
- "DBPedia",
- "FEVER",
- "FiQA2018",
- "HotpotQA",
- "MSMARCO",
- "NFCorpus",
- "NQ",
- "QuoraRetrieval",
- "SCIDOCS",
- "SciFact",
- "Touche2020",
- "TRECCOVID",
-]
-
-TASK_LIST_STS = [
- "BIOSSES",
- "SICK-R",
- "STS12",
- "STS13",
- "STS14",
- "STS15",
- "STS16",
- "STS17",
- "STS22",
- "STSBenchmark",
- "SummEval",
-]
-
-TASK_LIST = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS
-
-class OpenAIEmbedder:
- """
- Benchmark OpenAIs embeddings endpoint.
- """
- def __init__(self, engine, task_name=None, batch_size=32, save_emb=False, **kwargs):
- self.engine = engine
- self.max_token_len = 8191
- self.batch_size = batch_size
- self.save_emb = save_emb # Problematic as the filenames may end up being the same
- self.base_path = f"embeddings/{engine.split('/')[-1]}/"
- # self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
- self.tokenizer = tiktoken.encoding_for_model(engine)
- self.task_name = task_name
-
- if save_emb:
- assert self.task_name is not None
-
- pathlib.Path(self.base_path).mkdir(parents=True, exist_ok=True)
-
- def encode(self,
- sentences,
- decode=True,
- idx=None,
- **kwargs
- ):
-
- openai.api_key = API_KEY
-
- fin_embeddings = []
-
- embedding_path = f"{self.base_path}/{self.task_name}_{sentences[0][:10]}_{sentences[-1][-10:]}.pickle"
- if sentences and os.path.exists(embedding_path):
- loaded = pickle.load(open(embedding_path, "rb"))
- fin_embeddings = loaded["fin_embeddings"]
- else:
- for i in range(0, len(sentences), self.batch_size):
- batch = sentences[i : i + self.batch_size]
-
- all_tokens = []
- used_indices = []
- for j, txt in enumerate(batch):
- # tokens = self.tokenizer.encode(txt, add_special_tokens=False)
- if not(txt):
- print("Detected empty item, which is not allowed by the OpenAI API - Replacing with empty space")
- txt = " "
- tokens = self.tokenizer.encode(txt)
- token_len = len(tokens)
- if token_len > self.max_token_len:
- tokens = tokens[:self.max_token_len]
- # For some characters the API raises weird errors, e.g. input=[[126]]
- if decode:
- tokens = self.tokenizer.decode(tokens)
- all_tokens.append(tokens)
- used_indices.append(j)
-
- out = [[]] * len(batch)
- if all_tokens:
- response = openai.Embedding.create(input=all_tokens, model=self.engine)
- # May want to sleep here to avoid getting too many requests error
- # time.sleep(1)
- assert len(response["data"]) == len(
- all_tokens
- ), f"Sent {len(all_tokens)}, got {len(response['data'])}"
-
- for data in response["data"]:
- idx = data["index"]
- # OpenAI seems to return them ordered, but to be save use the index and insert
- idx = used_indices[idx]
- embedding = data["embedding"]
- out[idx] = embedding
-
- fin_embeddings.extend(out)
- # Save embeddings
- if fin_embeddings and self.save_emb:
- dump = {
- "fin_embeddings": fin_embeddings,
- }
- pickle.dump(dump, open(embedding_path, "wb"))
-
- assert len(sentences) == len(fin_embeddings)
- return fin_embeddings
-
-
-def parse_args():
- # Parse command line arguments
- parser = argparse.ArgumentParser()
- parser.add_argument("--startid", type=int)
- parser.add_argument("--endid", type=int)
- parser.add_argument("--engine", type=str, default="text-embedding-ada-002")
- parser.add_argument("--lang", type=str, default="en")
- parser.add_argument("--taskname", type=str, default=None)
- parser.add_argument("--batchsize", type=int, default=2048)
- args = parser.parse_args()
- return args
-
-def main(args):
-
- # There are two different batch sizes
- # OpenAIEmbedder(...) batch size arg is used to send X embeddings to the API
- # evaluation.run(...) batch size arg is how much will be saved / pickle file (as it's the total sent to the embed function)
-
- for task in TASK_LIST[args.startid:args.endid]:
- print("Running task: ", task)
- model = OpenAIEmbedder(args.engine, task_name=task, batch_size=args.batchsize, save_emb=True)
- eval_splits = ["validation"] if task == "MSMARCO" else ["test"]
- model_name = args.engine.split("/")[-1].split("_")[-1]
- evaluation = MTEB(tasks=[task], task_langs=[args.lang])
- evaluation.run(model, output_folder=f"results/{model_name}", batch_size=args.batchsize, eval_splits=eval_splits, corpus_chunk_size=10000)
-
-if __name__ == "__main__":
- args = parse_args()
- main(args)
diff --git a/run_array_sgpt.py b/run_array_sgpt.py
deleted file mode 100644
index f95da0a6..00000000
--- a/run_array_sgpt.py
+++ /dev/null
@@ -1,247 +0,0 @@
-import argparse
-import logging
-import os
-from typing import Dict, List, Union
-
-from mteb import MTEB
-import numpy as np
-from sentence_transformers import SentenceTransformer
-import torch.multiprocessing as mp
-from torch import Tensor
-
-logging.basicConfig(level=logging.INFO)
-
-logger = logging.getLogger("main")
-
-os.environ["HF_DATASETS_OFFLINE"]="1" # 1 for offline
-os.environ["TRANSFORMERS_OFFLINE"]="1" # 1 for offline
-os.environ["TRANSFORMERS_CACHE"]="/gpfswork/rech/six/commun/models"
-os.environ["HF_DATASETS_CACHE"]="/gpfswork/rech/six/commun/datasets"
-os.environ["HF_MODULES_CACHE"]="/gpfswork/rech/six/commun/modules"
-os.environ["HF_METRICS_CACHE"]="/gpfswork/rech/six/commun/metrics"
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-
-
-TASK_LIST_CLASSIFICATION = [
- "AmazonCounterfactualClassification",
- "AmazonPolarityClassification",
- "AmazonReviewsClassification",
- "Banking77Classification",
- "EmotionClassification",
- "ImdbClassification",
- "MassiveIntentClassification",
- "MassiveScenarioClassification",
- "MTOPDomainClassification",
- "MTOPIntentClassification",
- "ToxicConversationsClassification",
- "TweetSentimentExtractionClassification",
-]
-
-TASK_LIST_CLUSTERING = [
- "ArxivClusteringP2P",
- "ArxivClusteringS2S",
- "BiorxivClusteringP2P",
- "BiorxivClusteringS2S",
- "MedrxivClusteringP2P",
- "MedrxivClusteringS2S",
- "RedditClustering",
- "RedditClusteringP2P",
- "StackExchangeClustering",
- "StackExchangeClusteringP2P",
- "TwentyNewsgroupsClustering",
-]
-
-TASK_LIST_PAIR_CLASSIFICATION = [
- "SprintDuplicateQuestions",
- "TwitterSemEval2015",
- "TwitterURLCorpus",
-]
-
-TASK_LIST_RERANKING = [
- "AskUbuntuDupQuestions",
- "MindSmallReranking",
- "SciDocsRR",
- "StackOverflowDupQuestions",
-]
-
-TASK_LIST_RETRIEVAL = [
- "ArguAna",
- "ClimateFEVER",
- "CQADupstackAndroidRetrieval",
- "CQADupstackEnglishRetrieval",
- "CQADupstackGamingRetrieval",
- "CQADupstackGisRetrieval",
- "CQADupstackMathematicaRetrieval",
- "CQADupstackPhysicsRetrieval",
- "CQADupstackProgrammersRetrieval",
- "CQADupstackStatsRetrieval",
- "CQADupstackTexRetrieval",
- "CQADupstackUnixRetrieval",
- "CQADupstackWebmastersRetrieval",
- "CQADupstackWordpressRetrieval",
- "DBPedia",
- "FEVER",
- "FiQA2018",
- "HotpotQA",
- "MSMARCO",
- "NFCorpus",
- "NQ",
- "QuoraRetrieval",
- "SCIDOCS",
- "SciFact",
- "Touche2020",
- "TRECCOVID",
-]
-
-TASK_LIST_STS = [
- "BIOSSES",
- "SICK-R",
- "STS12",
- "STS13",
- "STS14",
- "STS15",
- "STS16",
- "STS17",
- "STS22",
- "STSBenchmark",
- "SummEval",
-]
-
-TASK_LIST = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS
-
-
-class SentenceTransformerSpecb(SentenceTransformer):
- # Requires:
- # https://github.com/Muennighoff/sentence-transformers/tree/sgpt_poolings_specb
- # pip install git+https://github.com/Muennighoff/sentence-transformers.git@sgpt_poolings_specb
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- tokens = ["[SOS]", "{SOS}"]
- self.sep = " "
- self._first_module().tokenizer.add_tokens(tokens, special_tokens=True)
- self._first_module().auto_model.resize_token_embeddings(len(self._first_module().tokenizer))
- # Will be replaced with the rep tokens in the model ones
- # The problem is we don't know if a text is query or document when tokenizing in the Transformer.py module,
- # so we use the SOS tokens as an identifier if we have a query or document at hand & then replace them
- # If we would directly use the brackets here, they may become part of another token
- self._first_module().bos_spec_token_q = self._first_module().tokenizer.encode("[SOS]", add_special_tokens=False)[0]
- self._first_module().bos_spec_token_d = self._first_module().tokenizer.encode("{SOS}", add_special_tokens=False)[0]
- self._first_module().bos_spec_token_q_rep = self._first_module().tokenizer.encode("[", add_special_tokens=False)[0]
- self._first_module().eos_spec_token_q = self._first_module().tokenizer.encode("]", add_special_tokens=False)[0]
- self._first_module().bos_spec_token_d_rep = self._first_module().tokenizer.encode("{", add_special_tokens=False)[0]
- self._first_module().eos_spec_token_d = self._first_module().tokenizer.encode("}", add_special_tokens=False)[0]
- self._first_module().replace_bos = True
-
- def encode(self, sentences, **kwargs):
- """Returns a list of embeddings for the given sentences.
- Args:
- sentences (`List[str]`): List of sentences to encode
- batch_size (`int`): Batch size for the encoding
-
- Returns:
- `List[np.ndarray]` or `List[tensor]`: List of embeddings for the given sentences
- """
- # Add specb query token
- sentences = ["[SOS]" + sent for sent in sentences]
- return super().encode(sentences, **kwargs)
-
- def encode_queries(self, queries: List[str], batch_size: int = 16, **kwargs) -> Union[List[Tensor], np.ndarray, Tensor]:
- # Will be replaced with [ in the models tokenization
- # If we would put [ here, there is a risk of it getting chained with a different token when encoding
- queries = ["[SOS]" + q for q in queries]
- return super().encode(queries, batch_size=batch_size, **kwargs)
-
- def encode_corpus(self, corpus: List[Dict[str, str]], batch_size: int = 8, **kwargs) -> Union[List[Tensor], np.ndarray, Tensor]:
- # Will be replaced with { in the models tokenization
- # If we would put { here, there is a risk of it getting chained with a different token when encoding
- sentences = [("{SOS}" + doc["title"] + self.sep + doc["text"]).strip() if "title" in doc else "{SOS}" + doc["text"].strip() for doc in corpus]
- return super().encode(sentences, batch_size=batch_size, **kwargs)
-
- def encode_corpus_parallel(
- self, corpus: List[Dict[str, str]], pool: Dict[str, object], batch_size: int, chunk_id: int, **kwargs
- ):
- if type(corpus) is dict:
- sentences = [
- ("{SOS}" + corpus["title"][i] + self.sep + corpus["text"][i]).strip()
- if "title" in corpus
- else "{SOS}" + corpus["text"][i].strip()
- for i in range(len(corpus["text"]))
- ]
- else:
- sentences = [
- ("{SOS}" + doc["title"] + self.sep + doc["text"]).strip() if "title" in doc else "{SOS}" + doc["text"].strip()
- for doc in corpus
- ]
-
- if chunk_id is not None and chunk_id >= len(pool["processes"]):
- output_queue = pool["output"]
- output_queue.get()
-
- input_queue = pool["input"]
- input_queue.put([chunk_id, batch_size, sentences])
-
-
- def start_multi_process_pool(self, target_devices: List[str] = None) -> Dict[str, object]:
- logger.info("Start multi-process pool on devices: {}".format(", ".join(map(str, target_devices))))
-
- ctx = mp.get_context("spawn")
- input_queue = ctx.Queue()
- output_queue = ctx.Queue()
- processes = []
-
- for process_id, device_name in enumerate(target_devices):
- p = ctx.Process(
- target=SentenceTransformer._encode_multi_process_worker,
- args=(process_id, device_name, self.model, input_queue, output_queue),
- daemon=True,
- )
- p.start()
- processes.append(p)
-
- return {"input": input_queue, "output": output_queue, "processes": processes}
-
- def stop_multi_process_pool(self, pool: Dict[str, object]):
- output_queue = pool["output"]
- [output_queue.get() for _ in range(len(pool["processes"]))]
- return self.model.stop_multi_process_pool(pool)
-
-
-def parse_args():
- # Parse command line arguments
- parser = argparse.ArgumentParser()
- parser.add_argument("--startid", type=int)
- parser.add_argument("--endid", type=int)
- parser.add_argument("--addspecbdoc", action='store_true')
- parser.add_argument("--addspecbquery", action='store_true')
- parser.add_argument("--modelpath", type=str, default="/gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-msmarco-specb-bitfit")
- parser.add_argument("--lang", type=str, default="en")
- parser.add_argument("--taskname", type=str, default=None)
- parser.add_argument("--batchsize", type=int, default=128)
- args = parser.parse_args()
- return args
-
-def main(args):
-
- if args.addspecbdoc or args.addspecbquery:
- model = SentenceTransformerSpecb(args.modelpath) # Only used for SGPT-msmarco models
- else:
- model = SentenceTransformer(args.modelpath)
-
- if args.taskname is not None:
- task = args.taskname
- model_name = args.modelpath.split("/")[-1].split("_")[-1]
- eval_splits = ["validation"] if task == "MSMARCO" else ["test"]
- evaluation = MTEB(tasks=[task], task_langs=[args.lang])
- evaluation.run(model, output_folder=f"results/{model_name}", batch_size=args.batchsize, eval_splits=eval_splits)
- exit()
-
- for task in TASK_LIST[args.startid:args.endid]:
- print("Running task: ", task)
- eval_splits = ["validation"] if task == "MSMARCO" else ["test"]
- model_name = args.modelpath.split("/")[-1].split("_")[-1]
- evaluation = MTEB(tasks=[task], task_langs=[args.lang])
- evaluation.run(model, output_folder=f"results/{model_name}", batch_size=args.batchsize, eval_splits=eval_splits)
-
-if __name__ == "__main__":
- args = parse_args()
- main(args)
diff --git a/run_array_simcse.py b/run_array_simcse.py
deleted file mode 100644
index cc3eefd8..00000000
--- a/run_array_simcse.py
+++ /dev/null
@@ -1,173 +0,0 @@
-import argparse
-import logging
-import os
-
-logging.basicConfig(level=logging.INFO)
-
-os.environ["HF_DATASETS_OFFLINE"]="1" # 1 for offline
-os.environ["TRANSFORMERS_OFFLINE"]="1" # 1 for offline
-os.environ["TRANSFORMERS_CACHE"]="/gpfswork/rech/six/commun/models"
-os.environ["HF_DATASETS_CACHE"]="/gpfswork/rech/six/commun/datasets"
-os.environ["HF_MODULES_CACHE"]="/gpfswork/rech/six/commun/modules"
-os.environ["HF_METRICS_CACHE"]="/gpfswork/rech/six/commun/metrics"
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-
-import numpy as np
-from mteb import MTEB
-from transformers import AutoModel, AutoTokenizer
-import torch
-
-
-TASK_LIST_CLASSIFICATION = [
- "AmazonCounterfactualClassification",
- "AmazonPolarityClassification",
- "AmazonReviewsClassification",
- "Banking77Classification",
- "EmotionClassification",
- "ImdbClassification",
- "MassiveIntentClassification",
- "MassiveScenarioClassification",
- "MTOPDomainClassification",
- "MTOPIntentClassification",
- "ToxicConversationsClassification",
- "TweetSentimentExtractionClassification",
-]
-
-TASK_LIST_CLUSTERING = [
- "ArxivClusteringP2P",
- "ArxivClusteringS2S",
- "BiorxivClusteringP2P",
- "BiorxivClusteringS2S",
- "MedrxivClusteringP2P",
- "MedrxivClusteringS2S",
- "RedditClustering",
- "RedditClusteringP2P",
- "StackExchangeClustering",
- "StackExchangeClusteringP2P",
- "TwentyNewsgroupsClustering",
-]
-
-TASK_LIST_PAIR_CLASSIFICATION = [
- "SprintDuplicateQuestions",
- "TwitterSemEval2015",
- "TwitterURLCorpus",
-]
-
-TASK_LIST_RERANKING = [
- "AskUbuntuDupQuestions",
- "MindSmallReranking",
- "SciDocsRR",
- "StackOverflowDupQuestions",
-]
-
-TASK_LIST_RETRIEVAL = [
- "ArguAna",
- "ClimateFEVER",
- "CQADupstackAndroidRetrieval",
- "CQADupstackEnglishRetrieval",
- "CQADupstackGamingRetrieval",
- "CQADupstackGisRetrieval",
- "CQADupstackMathematicaRetrieval",
- "CQADupstackPhysicsRetrieval",
- "CQADupstackProgrammersRetrieval",
- "CQADupstackStatsRetrieval",
- "CQADupstackTexRetrieval",
- "CQADupstackUnixRetrieval",
- "CQADupstackWebmastersRetrieval",
- "CQADupstackWordpressRetrieval",
- "DBPedia",
- "FEVER",
- "FiQA2018",
- "HotpotQA",
- "MSMARCO",
- "NFCorpus",
- "NQ",
- "QuoraRetrieval",
- "SCIDOCS",
- "SciFact",
- "Touche2020",
- "TRECCOVID",
-]
-
-TASK_LIST_STS = [
- "BIOSSES",
- "SICK-R",
- "STS12",
- "STS13",
- "STS14",
- "STS15",
- "STS16",
- "STS17",
- "STS22",
- "STSBenchmark",
- "SummEval",
-]
-
-TASK_LIST = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS
-
-
-class SimCSEWrapper:
- def __init__(self, modelpath="princeton-nlp/sup-simcse-bert-base-uncased"):
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
- self.tokenizer = AutoTokenizer.from_pretrained(modelpath)
- self.model = AutoModel.from_pretrained(modelpath).to(self.device)
- self.model.eval()
-
- def encode(self, sentences, batch_size=32, **kwargs):
- """ Returns a list of embeddings for the given sentences.
- Args:
- sentences (`List[str]`): List of sentences to encode
- batch_size (`int`): Batch size for the encoding
-
- Returns:
- `List[np.ndarray]` or `List[tensor]`: List of embeddings for the given sentences
- """
- all_embeddings = []
- length_sorted_idx = np.argsort([len(sen) for sen in sentences])
- sentences_sorted = [sentences[idx] for idx in length_sorted_idx]
-
- for start_index in range(0, len(sentences), batch_size):
- sentences_batch = sentences_sorted[start_index:start_index+batch_size]
- inputs = self.tokenizer(sentences_batch, padding=True, truncation=True, return_tensors="pt")
- inputs = {k: v.to(self.device) for k,v in inputs.items()}
- # Get the embeddings
- with torch.no_grad():
- embeddings = self.model(**inputs, output_hidden_states=True, return_dict=True).pooler_output
- all_embeddings.extend(embeddings.cpu().numpy())
- all_embeddings = [all_embeddings[idx] for idx in np.argsort(length_sorted_idx)]
- return all_embeddings
-
-def parse_args():
- # Parse command line arguments
- parser = argparse.ArgumentParser()
- parser.add_argument("--startid", type=int)
- parser.add_argument("--endid", type=int)
- parser.add_argument("--modelpath", type=str, default="/gpfswork/rech/six/commun/models/princeton-nlp/sup-simcse-bert-base-uncased")
- parser.add_argument("--lang", type=str, default="en")
- parser.add_argument("--taskname", type=str, default=None)
- parser.add_argument("--batchsize", type=int, default=128)
- args = parser.parse_args()
- return args
-
-def main(args):
-
- model = SimCSEWrapper(args.modelpath)
-
- if args.taskname is not None:
- task = args.taskname
- eval_splits = ["validation"] if task == "MSMARCO" else ["test"]
- model_name = args.modelpath.split("/")[-1].split("_")[-1]
- evaluation = MTEB(tasks=[task], task_langs=[args.lang], eval_splits=eval_splits)
- evaluation.run(model, output_folder=f"results/{model_name}", batch_size=args.batchsize)
- exit()
-
- for task in TASK_LIST[args.startid:args.endid]:
- print("Running task: ", task)
- eval_splits = ["validation"] if task == "MSMARCO" else ["test"]
- model_name = args.modelpath.split("/")[-1].split("_")[-1]
- evaluation = MTEB(tasks=[task], task_langs=[args.lang])
- evaluation.run(model, output_folder=f"results/{model_name}", batch_size=args.batchsize, eval_splits=eval_splits)
-
-if __name__ == "__main__":
- args = parse_args()
- main(args)
diff --git a/run_benchmark.py b/run_benchmark.py
deleted file mode 100644
index c333ee4c..00000000
--- a/run_benchmark.py
+++ /dev/null
@@ -1,201 +0,0 @@
-import argparse
-import logging
-import json
-import os
-import subprocess
-import time
-
-logging.basicConfig(level=logging.INFO)
-
-os.environ["HF_DATASETS_OFFLINE"]="1" # 1 for offline
-os.environ["TRANSFORMERS_OFFLINE"]="1" # 1 for offline
-os.environ["TRANSFORMERS_CACHE"]="/gpfswork/rech/six/commun/models"
-os.environ["HF_DATASETS_CACHE"]="/gpfswork/rech/six/commun/datasets"
-os.environ["HF_MODULES_CACHE"]="/gpfswork/rech/six/commun/modules"
-os.environ["HF_METRICS_CACHE"]="/gpfswork/rech/six/commun/metrics"
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-
-import numpy as np
-from mteb import MTEB
-from sentence_transformers import SentenceTransformer
-import torch
-from transformers import AutoModel, AutoTokenizer
-
-
-MODELS = [
- "LASER2",
- "/gpfswork/rech/six/commun/models/sentence-transformers_average_word_embeddings_komninos",
- "/gpfswork/rech/six/commun/models/sentence-transformers_average_word_embeddings_glove.6B.300d",
- "/gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-nli-bitfit",
- "/gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-msmarco-specb-bitfit",
- "/gpfswork/rech/six/commun/models/Muennighoff_SGPT-5.8B-weightedmean-nli-bitfit",
- "/gpfswork/rech/six/commun/models/Muennighoff_SGPT-5.8B-weightedmean-msmarco-specb-bitfit",
- "/gpfswork/rech/six/commun/models/bigscience_sgpt-bloom-7b1-msmarco",
- "/gpfswork/rech/six/commun/models/bigscience-catalogue-lm-data_sgpt-bloom-1b3-nli",
- "/gpfswork/rech/six/commun/models/sentence-transformers_all-MiniLM-L6-v2",
- "/gpfswork/rech/six/commun/models/sentence-transformers_all-mpnet-base-v2",
- "/gpfswork/rech/six/commun/models/sentence-transformers_paraphrase-multilingual-mpnet-base-v2",
- "/gpfswork/rech/six/commun/models/sentence-transformers_sentence-t5-base",
- "/gpfswork/rech/six/commun/models/sentence-transformers_sentence-t5-xxl",
- "/gpfswork/rech/six/commun/models/sentence-transformers_gtr-t5-base",
- "/gpfswork/rech/six/commun/models/sentence-transformers_gtr-t5-xxl",
- "/gpfswork/rech/six/commun/models/nthakur_contriever-base-msmarco",
- "/gpfswork/rech/six/commun/models/sentence-transformers_msmarco-bert-co-condensor",
- "/gpfswork/rech/six/commun/models/bert-base-uncased",
- "/gpfswork/rech/six/commun/models/princeton-nlp_sup-simcse-bert-base-uncased",
- "/gpfswork/rech/six/commun/models/princeton-nlp_unsup-simcse-bert-base-uncased",
- "/gpfswork/rech/six/commun/models/sentence-transformers_LaBSE",
-]
-
-MODELS = [
- "/gpfswork/rech/six/commun/models/sentence-transformers_all-MiniLM-L12-v2",
- "/gpfswork/rech/six/commun/models/sentence-transformers_allenai-specter",
-]
-
-TASKS = [
- "STS15",
-]
-
-class SentenceTransformerSpecb(SentenceTransformer):
- # Requires:
- # https://github.com/Muennighoff/sentence-transformers/tree/sgpt_poolings_specb
- # pip install git+https://github.com/Muennighoff/sentence-transformers.git@sgpt_poolings_specb
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- tokens = ["[SOS]", "{SOS}"]
- self._first_module().tokenizer.add_tokens(tokens, special_tokens=True)
- self._first_module().auto_model.resize_token_embeddings(len(self._first_module().tokenizer))
- # Will be replaced with the rep tokens in the model ones
- # The problem is we don't know if a text is query or document when tokenizing in the Transformer.py module,
- # so we use the SOS tokens as an identifier if we have a query or document at hand & then replace them
- # If we would directly use the brackets here, they may become part of another token
- self._first_module().bos_spec_token_q = self._first_module().tokenizer.encode("[SOS]", add_special_tokens=False)[0]
- self._first_module().bos_spec_token_d = self._first_module().tokenizer.encode("{SOS}", add_special_tokens=False)[0]
- self._first_module().bos_spec_token_q_rep = self._first_module().tokenizer.encode("[", add_special_tokens=False)[0]
- self._first_module().eos_spec_token_q = self._first_module().tokenizer.encode("]", add_special_tokens=False)[0]
- self._first_module().bos_spec_token_d_rep = self._first_module().tokenizer.encode("{", add_special_tokens=False)[0]
- self._first_module().eos_spec_token_d = self._first_module().tokenizer.encode("}", add_special_tokens=False)[0]
- self._first_module().replace_bos = True
-
- def encode(self, sentences, **kwargs):
- """Returns a list of embeddings for the given sentences.
- Args:
- sentences (`List[str]`): List of sentences to encode
- batch_size (`int`): Batch size for the encoding
-
- Returns:
- `List[np.ndarray]` or `List[tensor]`: List of embeddings for the given sentences
- """
- # Add specb query token
- sentences = ["[SOS]" + sent for sent in sentences]
- return super().encode(sentences, **kwargs)
-
-class SimCSEWrapper:
- def __init__(self, modelpath="princeton-nlp/sup-simcse-bert-base-uncased"):
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
- self.tokenizer = AutoTokenizer.from_pretrained(modelpath)
- self.model = AutoModel.from_pretrained(modelpath).to(self.device)
- self.model.eval()
-
- def encode(self, sentences, batch_size=32, **kwargs):
- """ Returns a list of embeddings for the given sentences.
- Args:
- sentences (`List[str]`): List of sentences to encode
- batch_size (`int`): Batch size for the encoding
-
- Returns:
- `List[np.ndarray]` or `List[tensor]`: List of embeddings for the given sentences
- """
- all_embeddings = []
- length_sorted_idx = np.argsort([len(sen) for sen in sentences])
- sentences_sorted = [sentences[idx] for idx in length_sorted_idx]
-
- for start_index in range(0, len(sentences), batch_size):
- sentences_batch = sentences_sorted[start_index:start_index+batch_size]
- inputs = self.tokenizer(sentences_batch, padding=True, truncation=True, return_tensors="pt")
- inputs = {k: v.to(self.device) for k,v in inputs.items()}
- # Get the embeddings
- with torch.no_grad():
- embeddings = self.model(**inputs, output_hidden_states=True, return_dict=True).pooler_output
- all_embeddings.extend(embeddings.cpu().numpy())
- all_embeddings = [all_embeddings[idx] for idx in np.argsort(length_sorted_idx)]
- return all_embeddings
-
-class LASER():
- def encode(self, sentences, batch_size=32, **kwargs):
- """
- Returns a list of embeddings for the given sentences.
- Args:
- sentences (`List[str]`): List of sentences to encode
- batch_size (`int`): Batch size for the encoding
-
- Returns:
- `List[np.ndarray]` or `List[tensor]`: List of embeddings for the given sentences
- """
- if os.path.exists("tmp.txt"):
- os.remove("tmp.txt")
- if os.path.exists("tmp.bin"):
- os.remove("tmp.bin")
-
- # LASER expects one text per line, so we need to replace newlines
- sentences = [s.replace("\n", " ") for s in sentences]
- with open("tmp.txt", "w") as f:
- f.write("\n".join(sentences))
-
- rc = subprocess.call("/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/mteb/LASER/LASER_script.sh", shell=True)
-
- dim = 1024
- X = np.fromfile("tmp.bin", dtype=np.float32, count=-1)
- X.resize(X.shape[0] // dim, dim)
- print(X.shape)
- return X
-
-
-def parse_args():
- # Parse command line arguments
- parser = argparse.ArgumentParser()
- parser.add_argument("--lang", type=str, default="en")
- parser.add_argument("--batchsize", type=int, default=32)
- args = parser.parse_args()
- return args
-
-def main(args):
-
- out = {}
- for model_name in MODELS:
- if ("sgpt" in model_name.lower()) and ("msmarco" in model_name.lower()):
- model = SentenceTransformerSpecb(model_name) # Only used for SGPT-msmarco models
- elif "simcse" in model_name.lower():
- model = SimCSEWrapper(model_name)
- elif "LASER2" == model_name:
- model = LASER()
- else:
- model = SentenceTransformer(model_name)
-
- evaluation = MTEB(tasks=TASKS, task_langs=[args.lang])
- model_name = model_name.split("/")[-1].split("_")[-1]
- for task, task_name in zip(evaluation.tasks, TASKS):
- task.load_data()
-
- # Encode all with the same batch size for a fair comparison of speed / sentence
- data = task.dataset["test"]["sentence1"] + task.dataset["test"]["sentence2"]
- data_len = len(data)
- # Warmup run to build py caches etc
- embeddings = np.asarray(model.encode(data, batch_size=args.batchsize))
- tick = time.time()
- embeddings = np.asarray(model.encode(data, batch_size=args.batchsize))
- tock = time.time()
-
- out.setdefault(model_name, {})
- out[model_name].setdefault(task_name, {})
- out[model_name][task_name]["speed_ms"] = ((tock - tick) / data_len) * 1000
- out[model_name][task_name]["embedding_size_kb"] = embeddings.nbytes / data_len / 1000
-
- # Overwrite every iteration for intermed results
- with open("benchmark.json", "w") as f:
- json.dump(out, f)
-
-
-if __name__ == "__main__":
- args = parse_args()
- main(args)
diff --git a/script_mteb_french/README.md b/script_mteb_french/README.md
deleted file mode 100644
index 03e6d2cf..00000000
--- a/script_mteb_french/README.md
+++ /dev/null
@@ -1,70 +0,0 @@
-# Scripts to run the French MTEB benchmark
-
-This folder contains the scripts used to generate the French tab results on the [MTEB](https://github.com/embeddings-benchmark/mteb) benchmark.
-
-Below are instructions to run the main scripts.
-
-## Benchmark
-
-### Running on host using venv
-
-* Navigate to the repository root folder
-* Create your virtual env:
-
-```bash
-python3 -m venv .venv
-```
-* Activate it and install the requirements:
-```bash
-source .venv/bin/activate
-pip install -r requirements.txt
-```
-* Run the benchmark:
-```bash
-cd script_mteb_french
-python run_benchmark.py
-```
-
-By default the benchmark runs on sentence_transformer models but you can specify the type with the argument `--model_type`:
-```bash
-# default ['sentence_transformer']
-python run_benchmark.py
-# choosing other type ['voyage_ai']
-python run_benchmark.py --model_type voyage_ai
-# running on two types ['voyage_ai', 'sentence_transformer']
-python run_benchmark.py --model_type voyage_ai sentence_transformer
-```
-
-You can also run the benchmark on one model only by specifying `--model_name`:
-```bash
-# default ['sentence_transformer'] -> all models of this type
-python run_benchmark.py
-# running on one model 'camembert-base'
-python run_benchmark.py --model_type sentence_transformer --model_name "xlm-roberta-base"
-```
-Note that the `model_name` should be included in models of specified `model_type`.
-
-You can run the benchmark on one task type in ["all", "classification", "clustering", "reranking", "retrieval", "pair_classification", "sts", "summarization", "bitextmining"], default is set to "all" and will run all tasks :
-```bash
-# running 'sentence_transformer' models on 'classification' task
-python run_benchmark.py --model_type sentence_transformer --task_type classification
-```
-
-## Running using Docker
-
-* Navigate to the repository root folder
-* Build the docker image:
-```bash
-docker build -t mtebscripts_image .
-```
-* Run the benchmark in the container as follows:
-```
-docker run -v $(pwd):/mtebscripts mtebscripts_image sh -c "cd script_mteb_french && python run_benchmark.py"
-```
-If you want to use the gpu, make sure to add the `--gpus` option to your run command, or `--runtime=nvidia` if you are using an older version of docker.
-
-Note: Because the volume is shared between the host and the container, the results will be available in the host at the end.
-
-## Models' characteristics
-
-Additionnaly, you can find a script `get_model_specs.py` to compute models' characteristics (size, number of params, embeddings dimension). You can run it similarly to the benchmark by substituting `run_benchmark.py` with `get_model_specs.py`.
diff --git a/script_mteb_french/results_analysis/__init__.py b/script_mteb_french/results_analysis/__init__.py
deleted file mode 100644
index 11e8bfb7..00000000
--- a/script_mteb_french/results_analysis/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .results_parser import ResultsParser
\ No newline at end of file
diff --git a/slurmscripts/run_array_8a100_st5xxl.slurm b/slurmscripts/run_array_8a100_st5xxl.slurm
deleted file mode 100644
index a6b37b60..00000000
--- a/slurmscripts/run_array_8a100_st5xxl.slurm
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-a100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:8
-#SBATCH --cpus-per-task=64 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@a100
-#SBATCH --reservation=hug
-#SBATCH --constraint=a100
-#SBATCH --partition=gpu_p5
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_sentence-t5-xxl
diff --git a/slurmscripts/run_array_a100_100_gtrxxl.slurm b/slurmscripts/run_array_a100_100_gtrxxl.slurm
deleted file mode 100644
index 02a73c0c..00000000
--- a/slurmscripts/run_array_a100_100_gtrxxl.slurm
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-a100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=8 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time 100:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@a100
-#SBATCH --reservation=hug
-#SBATCH --constraint=a100
-#SBATCH --partition=gpu_p5
-#SBATCH --qos=qos_gpu-gc # up to 100h
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_gtr-t5-xxl
diff --git a/slurmscripts/run_array_a100_100_sgpt5b8_asym.slurm b/slurmscripts/run_array_a100_100_sgpt5b8_asym.slurm
deleted file mode 100644
index faa0fe98..00000000
--- a/slurmscripts/run_array_a100_100_sgpt5b8_asym.slurm
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-a100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=8 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time 100:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@a100
-#SBATCH --reservation=hug
-#SBATCH --constraint=a100
-#SBATCH --partition=gpu_p5
-#SBATCH --qos=qos_gpu-gc # up to 100h
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/Muennighoff_SGPT-5.8B-weightedmean-msmarco-specb-bitfit \
- --addspecbquery
diff --git a/slurmscripts/run_array_a100_100_sgpt5b8_asym_specb.slurm b/slurmscripts/run_array_a100_100_sgpt5b8_asym_specb.slurm
deleted file mode 100644
index 1c3dd343..00000000
--- a/slurmscripts/run_array_a100_100_sgpt5b8_asym_specb.slurm
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-a100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=8 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time 80:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@a100
-#SBATCH --reservation=hug
-#SBATCH --constraint=a100
-#SBATCH --partition=gpu_p5
-#SBATCH --qos=qos_gpu-gc # up to 100h
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array_sgpt.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/Muennighoff_SGPT-5.8B-weightedmean-msmarco-specb-bitfit \
- --addspecbquery
diff --git a/slurmscripts/run_array_a100_100_sgpt5b8_sym.slurm b/slurmscripts/run_array_a100_100_sgpt5b8_sym.slurm
deleted file mode 100644
index b667f34d..00000000
--- a/slurmscripts/run_array_a100_100_sgpt5b8_sym.slurm
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-a100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=8 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time 100:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@a100
-#SBATCH --reservation=hug
-#SBATCH --constraint=a100
-#SBATCH --partition=gpu_p5
-#SBATCH --qos=qos_gpu-gc # up to 100h
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/Muennighoff_SGPT-5.8B-weightedmean-nli-bitfit
diff --git a/slurmscripts/run_array_a100_100_st5xxl.slurm b/slurmscripts/run_array_a100_100_st5xxl.slurm
deleted file mode 100644
index 5982a42b..00000000
--- a/slurmscripts/run_array_a100_100_st5xxl.slurm
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-a100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=8 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time 100:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@a100
-#SBATCH --reservation=hug
-#SBATCH --constraint=a100
-#SBATCH --partition=gpu_p5
-#SBATCH --qos=qos_gpu-gc # up to 100h
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_sentence-t5-xxl
diff --git a/slurmscripts/run_array_a100_sgpt5b8_sym.slurm b/slurmscripts/run_array_a100_sgpt5b8_sym.slurm
deleted file mode 100644
index 98bfb93f..00000000
--- a/slurmscripts/run_array_a100_sgpt5b8_sym.slurm
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-a100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=8 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@a100
-#SBATCH --reservation=hug
-#SBATCH --constraint=a100
-#SBATCH --partition=gpu_p5
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/Muennighoff_SGPT-5.8B-weightedmean-nli-bitfit
diff --git a/slurmscripts/run_array_a100_st5xxl.slurm b/slurmscripts/run_array_a100_st5xxl.slurm
deleted file mode 100644
index d8edab6e..00000000
--- a/slurmscripts/run_array_a100_st5xxl.slurm
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-a100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=8 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@a100
-#SBATCH --reservation=hug
-#SBATCH --constraint=a100
-#SBATCH --partition=gpu_p5
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_sentence-t5-xxl
diff --git a/slurmscripts/run_array_ada.slurm b/slurmscripts/run_array_ada.slurm
deleted file mode 100644
index 2581afe9..00000000
--- a/slurmscripts/run_array_ada.slurm
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=ada
-#SBATCH --partition=prepost
-#SBATCH --nodes=1
-#SBATCH --ntasks-per-node=1 # crucial - only 1 task per dist per node!
-#SBATCH --cpus-per-task=10 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time 20:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@cpu
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/mteb
-
-NUM_TASKS_PER_JOB=10
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python run_array_openai.py \
- --startid $TASK_START \
- --endid $TASK_END
diff --git a/slurmscripts/run_array_v10032_bert.slurm b/slurmscripts/run_array_v10032_bert.slurm
deleted file mode 100644
index 46a2c11f..00000000
--- a/slurmscripts/run_array_v10032_bert.slurm
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-v100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=10 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@gpu
-#SBATCH -C v100-32g
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/bert-base-uncased
diff --git a/slurmscripts/run_array_v10032_cocondensor.slurm b/slurmscripts/run_array_v10032_cocondensor.slurm
deleted file mode 100644
index a08fbc51..00000000
--- a/slurmscripts/run_array_v10032_cocondensor.slurm
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-v100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=10 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --qos=qos_gpu-t3
-#SBATCH -C v100-32g
-#SBATCH --time 20:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@gpu
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_msmarco-bert-co-condensor
diff --git a/slurmscripts/run_array_v10032_contriever.slurm b/slurmscripts/run_array_v10032_contriever.slurm
deleted file mode 100644
index 26d5045b..00000000
--- a/slurmscripts/run_array_v10032_contriever.slurm
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-v100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=10 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --qos=qos_gpu-t3
-#SBATCH -C v100-32g
-#SBATCH --time 20:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@gpu
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/nthakur_contriever-base-msmarco
diff --git a/slurmscripts/run_array_v10032_gtr.slurm b/slurmscripts/run_array_v10032_gtr.slurm
deleted file mode 100644
index 2cd303ff..00000000
--- a/slurmscripts/run_array_v10032_gtr.slurm
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-v100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=10 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH -C v100-32g
-#SBATCH --time 10:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@gpu
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_gtr-t5-base
diff --git a/slurmscripts/run_array_v10032_st5.slurm b/slurmscripts/run_array_v10032_st5.slurm
deleted file mode 100644
index 6c476c45..00000000
--- a/slurmscripts/run_array_v10032_st5.slurm
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-v100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:4
-#SBATCH --cpus-per-task=40 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --qos=qos_gpu-t3
-#SBATCH -C v100-32g
-#SBATCH --time 20:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@gpu
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS=19
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_sentence-t5-base
diff --git a/slurmscripts/run_array_v100_glove.slurm b/slurmscripts/run_array_v100_glove.slurm
deleted file mode 100644
index 4ed2a08a..00000000
--- a/slurmscripts/run_array_v100_glove.slurm
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-v100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=10 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@gpu
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_average_word_embeddings_glove.6B.300d
diff --git a/slurmscripts/run_array_v100_internet_sgpt125m_sym.slurm b/slurmscripts/run_array_v100_internet_sgpt125m_sym.slurm
deleted file mode 100644
index 8050ee48..00000000
--- a/slurmscripts/run_array_v100_internet_sgpt125m_sym.slurm
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-mteb-125M # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --cpus-per-task=10 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@cpu
-#SBATCH --partition=prepost
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS=54
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-nli-bitfit
diff --git a/slurmscripts/run_array_v100_komninos.slurm b/slurmscripts/run_array_v100_komninos.slurm
deleted file mode 100644
index f3f0753f..00000000
--- a/slurmscripts/run_array_v100_komninos.slurm
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-v100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=10 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@gpu
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_average_word_embeddings_komninos
diff --git a/slurmscripts/run_array_v100_labse.slurm b/slurmscripts/run_array_v100_labse.slurm
deleted file mode 100644
index 2a9a9a34..00000000
--- a/slurmscripts/run_array_v100_labse.slurm
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-v100-labse # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=10 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@gpu
-#SBATCH -C v100-32g
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --taskname BUCC \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_LaBSE
diff --git a/slurmscripts/run_array_v100_laser.slurm b/slurmscripts/run_array_v100_laser.slurm
deleted file mode 100644
index 608ca482..00000000
--- a/slurmscripts/run_array_v100_laser.slurm
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-v100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=10 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@gpu
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array_laser.py \
- --startid $TASK_START \
- --endid $TASK_END
diff --git a/slurmscripts/run_array_v100_minilm.slurm b/slurmscripts/run_array_v100_minilm.slurm
deleted file mode 100644
index 6b600bf1..00000000
--- a/slurmscripts/run_array_v100_minilm.slurm
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-v100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=10 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@gpu
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_all-MiniLM-L6-v2
diff --git a/slurmscripts/run_array_v100_mpnet.slurm b/slurmscripts/run_array_v100_mpnet.slurm
deleted file mode 100644
index 1b779454..00000000
--- a/slurmscripts/run_array_v100_mpnet.slurm
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-v100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=10 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@gpu
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_all-mpnet-base-v2
diff --git a/slurmscripts/run_array_v100_multimini.slurm b/slurmscripts/run_array_v100_multimini.slurm
deleted file mode 100644
index 7044300a..00000000
--- a/slurmscripts/run_array_v100_multimini.slurm
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-v100-multimini # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=10 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@gpu
-#SBATCH -C v100-32g
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --taskname BUCC \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2
diff --git a/slurmscripts/run_array_v100_multimpnet.slurm b/slurmscripts/run_array_v100_multimpnet.slurm
deleted file mode 100644
index 0e1098fb..00000000
--- a/slurmscripts/run_array_v100_multimpnet.slurm
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-v100-multimpnet # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=10 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@gpu
-#SBATCH -C v100-32g
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --taskname BUCC \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_paraphrase-multilingual-mpnet-base-v2
diff --git a/slurmscripts/run_array_v100_sgpt125m_asym.slurm b/slurmscripts/run_array_v100_sgpt125m_asym.slurm
deleted file mode 100644
index b6352cb9..00000000
--- a/slurmscripts/run_array_v100_sgpt125m_asym.slurm
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-v100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=10 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@gpu
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS=20
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array_sgpt.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-msmarco-specb-bitfit \
- --addspecbquery
diff --git a/slurmscripts/run_array_v100_sgpt1b3_sym.slurm b/slurmscripts/run_array_v100_sgpt1b3_sym.slurm
deleted file mode 100644
index a1726be7..00000000
--- a/slurmscripts/run_array_v100_sgpt1b3_sym.slurm
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-v100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=10 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@gpu
-#SBATCH -C v100-32g
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --lang zh \
- --modelpath /gpfswork/rech/six/commun/models/bigscience-catalogue-lm-data_sgpt-nli-bloom-1b3
diff --git a/slurmscripts/run_array_v100_simcsesup.slurm b/slurmscripts/run_array_v100_simcsesup.slurm
deleted file mode 100644
index c260c907..00000000
--- a/slurmscripts/run_array_v100_simcsesup.slurm
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-v100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=10 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@gpu
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array_simcse.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/princeton-nlp_sup-simcse-bert-base-uncased
-
-echo "END TIME: $(date)"
diff --git a/slurmscripts/run_array_v100_simcseunsup.slurm b/slurmscripts/run_array_v100_simcseunsup.slurm
deleted file mode 100644
index 023af0b2..00000000
--- a/slurmscripts/run_array_v100_simcseunsup.slurm
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-v100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=10 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@gpu
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array_simcse.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/princeton-nlp_unsup-simcse-bert-base-uncased
diff --git a/slurmscripts/run_array_v100_st5.slurm b/slurmscripts/run_array_v100_st5.slurm
deleted file mode 100644
index 6f2af715..00000000
--- a/slurmscripts/run_array_v100_st5.slurm
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=run-array-v100 # job name
-#SBATCH --ntasks=1 # number of MP tasks
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=10 # number of cores per tasks
-#SBATCH --hint=nomultithread # we get physical cores not logical
-#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS)
-#SBATCH --output=%x-%j.out # output file name
-#SBATCH --account=six@gpu
-
-set -x -e
-
-source $six_ALL_CCFRWORK/start-prod
-conda activate muennighoffmtb
-
-echo "START TIME: $(date)"
-
-cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/
-
-NUM_TASKS=19
-NUM_TASKS_PER_JOB=1
-
-TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB )
-TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB )
-
-python mteb/run_array.py \
- --startid $TASK_START \
- --endid $TASK_END \
- --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_sentence-t5-base
diff --git a/script_mteb_french/results_analysis/README.md b/tools/README.md
similarity index 91%
rename from script_mteb_french/results_analysis/README.md
rename to tools/README.md
index 28e9990c..6a9da674 100644
--- a/script_mteb_french/results_analysis/README.md
+++ b/tools/README.md
@@ -6,11 +6,11 @@ Before starting, you can create your environment using the packages listed in *r
### result_parser.py
-This scripts intent is to ***format results from json files in the results folder to a table*** (csv, excel or latex).
+This script's intent is to ***format results from json files in the results folder to a table*** (csv, excel or latex).
#### Usage
-You can use the class ResultParser like so:
+You can use the class ``ResultParser`` like so:
```py
from results_analysis.results_parser import ResultParser
@@ -30,7 +30,7 @@ rp = ResultParser()
results_df = rp(RESULT_FOLDER_PATH, output_format="latex", apply_style=True)
```
-Alternatively, you can use a command line :
+Alternatively, you can use a command line:
```
python .\script_mteb_french\results_analysis\results_parser.py --results_folder ./results --output_format csv
```
diff --git a/tools/analysis_tools/__init__.py b/tools/analysis_tools/__init__.py
new file mode 100644
index 00000000..e6a677d2
--- /dev/null
+++ b/tools/analysis_tools/__init__.py
@@ -0,0 +1 @@
+from ...tools.results_parser import ResultsParser
\ No newline at end of file
diff --git a/script_mteb_french/results_analysis/dataset_correlation.py b/tools/analysis_tools/dataset_correlation.py
similarity index 96%
rename from script_mteb_french/results_analysis/dataset_correlation.py
rename to tools/analysis_tools/dataset_correlation.py
index 137e1be4..c2afdaa5 100644
--- a/script_mteb_french/results_analysis/dataset_correlation.py
+++ b/tools/analysis_tools/dataset_correlation.py
@@ -4,7 +4,7 @@
import seaborn as sns
import numpy as np
-from results_parser import ResultsParser
+from mtebscripts.tools.results_parser import ResultsParser
def parse_args() -> Namespace:
@@ -15,7 +15,7 @@ def parse_args() -> Namespace:
"""
parser = ArgumentParser()
parser.add_argument("--results_folder", required=True, type=str)
- parser.add_argument("--output_folder", type=str, default="./analyses_outputs/results_correlations")
+ parser.add_argument("--output_folder", type=str, default="./analysis_outputs/results_correlations")
parser.add_argument(
"--output_format",
type=str,
diff --git a/script_mteb_french/results_analysis/datasets_similarity.py b/tools/analysis_tools/datasets_similarity.py
similarity index 99%
rename from script_mteb_french/results_analysis/datasets_similarity.py
rename to tools/analysis_tools/datasets_similarity.py
index c7c2e95f..9af5cb93 100644
--- a/script_mteb_french/results_analysis/datasets_similarity.py
+++ b/tools/analysis_tools/datasets_similarity.py
@@ -169,7 +169,7 @@ def parse_args() -> Namespace:
parser = ArgumentParser()
parser.add_argument("--task_type", type=str, default="all")
parser.add_argument("--langs", type=list[str], default=["fr"])
- parser.add_argument("--output_folder", type=str, default="./analyses_outputs/datasets_similarity")
+ parser.add_argument("--output_folder", type=str, default="./analysis_outputs/datasets_similarity")
parser.add_argument("--model_name", type=str, default="intfloat/multilingual-e5-large")
parser.add_argument("--seed", type=int, default=42)
parser.add_argument("--n_samples", type=int, default=90)
diff --git a/script_mteb_french/results_analysis/performance_vs_characteristics.py b/tools/analysis_tools/performance_vs_characteristics.py
similarity index 97%
rename from script_mteb_french/results_analysis/performance_vs_characteristics.py
rename to tools/analysis_tools/performance_vs_characteristics.py
index 979598a2..458a1bd1 100644
--- a/script_mteb_french/results_analysis/performance_vs_characteristics.py
+++ b/tools/analysis_tools/performance_vs_characteristics.py
@@ -4,7 +4,7 @@
import os
from argparse import ArgumentParser, Namespace
-from results_parser import ResultsParser
+from mtebscripts.tools.results_parser import ResultsParser
import numpy as np
# model,pretrained_or_tuned,multilingual_or_french,number_params,size_gb,seq_len,embedding_dim,model_type,license
@@ -60,7 +60,7 @@ def parse_args() -> Namespace:
parser.add_argument(
"--output_folder",
type=str,
- default="./analyses_outputs/performance_vs_characteristics",
+ default="./analysis_outputs/performance_vs_characteristics",
)
parser.add_argument(
"--output_format",
diff --git a/script_mteb_french/results_analysis/results_parser.py b/tools/analysis_tools/results_parser.py
similarity index 99%
rename from script_mteb_french/results_analysis/results_parser.py
rename to tools/analysis_tools/results_parser.py
index 95837835..8e56cf4e 100644
--- a/script_mteb_french/results_analysis/results_parser.py
+++ b/tools/analysis_tools/results_parser.py
@@ -262,7 +262,7 @@ def parse_args() -> Namespace:
parser.add_argument("--results_folder", required=True, type=str)
parser.add_argument("--output_format", type=str, choices=["excel", "csv", "latex"], default="excel")
parser.add_argument("--apply_style", type=bool, default=True)
- parser.add_argument("--output_folder", type=str, default="./analyses_outputs/")
+ parser.add_argument("--output_folder", type=str, default="./analysis_outputs/")
args = parser.parse_args()
return args
diff --git a/script_mteb_french/results_analysis/statistical_tests.py b/tools/analysis_tools/statistical_tests.py
similarity index 98%
rename from script_mteb_french/results_analysis/statistical_tests.py
rename to tools/analysis_tools/statistical_tests.py
index 4030075b..2a590481 100644
--- a/script_mteb_french/results_analysis/statistical_tests.py
+++ b/tools/analysis_tools/statistical_tests.py
@@ -19,7 +19,7 @@ def parse_args() -> Namespace:
parser.add_argument(
"--output_folder",
type=str,
- default="./analyses_outputs/statistical_tests",
+ default="./analysis_outputs/statistical_tests",
)
parser.add_argument(
"--output_format",
diff --git a/script_mteb_french/estimate_evaluation_cost.py b/tools/model_tools/estimate_evaluation_cost.py
similarity index 100%
rename from script_mteb_french/estimate_evaluation_cost.py
rename to tools/model_tools/estimate_evaluation_cost.py
diff --git a/script_mteb_french/get_model_specs.py b/tools/model_tools/get_model_specs.py
similarity index 98%
rename from script_mteb_french/get_model_specs.py
rename to tools/model_tools/get_model_specs.py
index a8469597..a2edf905 100644
--- a/script_mteb_french/get_model_specs.py
+++ b/tools/model_tools/get_model_specs.py
@@ -4,7 +4,7 @@
import os
from huggingface_hub import HfFileSystem
-import model_spec_utils
+import mtebscripts.tools.model_tools.model_spec_utils as model_spec_utils
from run_benchmark import TYPES_TO_MODELS
import pandas as pd
diff --git a/script_mteb_french/model_spec_utils.py b/tools/model_tools/model_spec_utils.py
similarity index 100%
rename from script_mteb_french/model_spec_utils.py
rename to tools/model_tools/model_spec_utils.py
diff --git a/script_mteb_french/preload_models.py b/tools/preload_models.py
similarity index 100%
rename from script_mteb_french/preload_models.py
rename to tools/preload_models.py
diff --git a/script_mteb_french/preload_tasks.py b/tools/preload_tasks.py
similarity index 100%
rename from script_mteb_french/preload_tasks.py
rename to tools/preload_tasks.py
diff --git a/script_mteb_french/run_benchmark.py b/tools/run_benchmark.py
similarity index 70%
rename from script_mteb_french/run_benchmark.py
rename to tools/run_benchmark.py
index febe880a..9b96278c 100644
--- a/script_mteb_french/run_benchmark.py
+++ b/tools/run_benchmark.py
@@ -6,6 +6,7 @@
from src.ModelConfig import ModelConfig
from utils.tasks_list import get_tasks
+from utils.models_list import TYPES_TO_MODELS, SENTENCE_TRANSORMER_MODELS_WITH_ERRORS
logging.basicConfig(
stream=sys.stdout,
@@ -29,83 +30,9 @@
Example: MODELS = [ModelConfig("intfloat/multilingual-e5-base", model_type="sentence_transformer")]
"""
-#############################
-# Step 1 : Setup model list #
-#############################
-SENTENCE_TRANSORMER_MODELS = [
- "bert-base-multilingual-cased",
- "bert-base-multilingual-uncased",
- "flaubert/flaubert_base_uncased",
- "flaubert/flaubert_base_cased",
- "flaubert/flaubert_large_cased",
- "dangvantuan/sentence-camembert-base",
- "sentence-transformers/distiluse-base-multilingual-cased-v2",
- "sentence-transformers/all-MiniLM-L6-v2",
- "sentence-transformers/all-MiniLM-L12-v2",
- "sentence-transformers/LaBSE",
- "sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
- "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
- "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
- "intfloat/multilingual-e5-base",
- "intfloat/multilingual-e5-large",
- "intfloat/multilingual-e5-small",
- "distilbert-base-uncased",
- "Geotrend/distilbert-base-25lang-cased",
- "Geotrend/distilbert-base-en-fr-es-pt-it-cased",
- "Geotrend/distilbert-base-en-fr-cased",
- "Geotrend/distilbert-base-fr-cased",
- "Geotrend/bert-base-25lang-cased",
- "Geotrend/bert-base-15lang-cased",
- "Geotrend/bert-base-10lang-cased",
- "shibing624/text2vec-base-multilingual",
- "izhx/udever-bloom-560m",
- "izhx/udever-bloom-1b1",
- "sentence-transformers/sentence-t5-base",
- "sentence-transformers/sentence-t5-large",
- "sentence-transformers/sentence-t5-xl",
- "sentence-transformers/sentence-t5-xxl",
- "intfloat/e5-mistral-7b-instruct",
- "Wissam42/sentence-croissant-llm-base"
-]
-
-# these models max_length is indicated to be 514 whereas the embedding layer actually supports 512
-SENTENCE_TRANSORMER_MODELS_WITH_ERRORS = [
- "camembert/camembert-base",
- "camembert/camembert-large",
- "dangvantuan/sentence-camembert-large",
- "xlm-roberta-base",
- "xlm-roberta-large",
-]
-
-UNIVERSAL_SENTENCE_ENCODER_MODELS = [
- "vprelovac/universal-sentence-encoder-multilingual-3",
- "vprelovac/universal-sentence-encoder-multilingual-large-3",
-]
-
-LASER_MODELS = ["laser2"]
-
-VOYAGE_MODELS = ["voyage-2", "voyage-code-2"]
-
-OPEN_AI_MODELS = ["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]
-
-COHERE_MODELS = ["embed-multilingual-light-v3.0", "embed-multilingual-v3.0"]
-
-MISTRAL_MODELS = ["mistral-embed"]
-
-TYPES_TO_MODELS = {
- "sentence_transformer": SENTENCE_TRANSORMER_MODELS
- + SENTENCE_TRANSORMER_MODELS_WITH_ERRORS,
- "universal_sentence_encoder": UNIVERSAL_SENTENCE_ENCODER_MODELS,
- "laser": LASER_MODELS,
- "voyage_ai": VOYAGE_MODELS,
- "open_ai": OPEN_AI_MODELS,
- "cohere": COHERE_MODELS,
- "mistral_ai": MISTRAL_MODELS,
-
-}
##########################
-# Step 3 : Run benchmark #
+# Step : Run benchmark #
##########################
diff --git a/script_mteb_french/utils/__init__.py b/tools/utils/__init__.py
similarity index 100%
rename from script_mteb_french/utils/__init__.py
rename to tools/utils/__init__.py
diff --git a/tools/utils/models_list.py b/tools/utils/models_list.py
new file mode 100644
index 00000000..26fdd926
--- /dev/null
+++ b/tools/utils/models_list.py
@@ -0,0 +1,71 @@
+SENTENCE_TRANSORMER_MODELS = [
+ "bert-base-multilingual-cased",
+ "bert-base-multilingual-uncased",
+ "flaubert/flaubert_base_uncased",
+ "flaubert/flaubert_base_cased",
+ "flaubert/flaubert_large_cased",
+ "dangvantuan/sentence-camembert-base",
+ "sentence-transformers/distiluse-base-multilingual-cased-v2",
+ "sentence-transformers/all-MiniLM-L6-v2",
+ "sentence-transformers/all-MiniLM-L12-v2",
+ "sentence-transformers/LaBSE",
+ "sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
+ "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
+ "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
+ "intfloat/multilingual-e5-base",
+ "intfloat/multilingual-e5-large",
+ "intfloat/multilingual-e5-small",
+ "distilbert-base-uncased",
+ "Geotrend/distilbert-base-25lang-cased",
+ "Geotrend/distilbert-base-en-fr-es-pt-it-cased",
+ "Geotrend/distilbert-base-en-fr-cased",
+ "Geotrend/distilbert-base-fr-cased",
+ "Geotrend/bert-base-25lang-cased",
+ "Geotrend/bert-base-15lang-cased",
+ "Geotrend/bert-base-10lang-cased",
+ "shibing624/text2vec-base-multilingual",
+ "izhx/udever-bloom-560m",
+ "izhx/udever-bloom-1b1",
+ "sentence-transformers/sentence-t5-base",
+ "sentence-transformers/sentence-t5-large",
+ "sentence-transformers/sentence-t5-xl",
+ "sentence-transformers/sentence-t5-xxl",
+ "intfloat/e5-mistral-7b-instruct",
+ "Wissam42/sentence-croissant-llm-base"
+]
+
+# these models max_length is indicated to be 514 whereas the embedding layer actually supports 512
+SENTENCE_TRANSORMER_MODELS_WITH_ERRORS = [
+ "camembert/camembert-base",
+ "camembert/camembert-large",
+ "dangvantuan/sentence-camembert-large",
+ "xlm-roberta-base",
+ "xlm-roberta-large",
+]
+
+UNIVERSAL_SENTENCE_ENCODER_MODELS = [
+ "vprelovac/universal-sentence-encoder-multilingual-3",
+ "vprelovac/universal-sentence-encoder-multilingual-large-3",
+]
+# TODO: use json file keys
+
+LASER_MODELS = ["laser2"]
+
+VOYAGE_MODELS = ["voyage-2", "voyage-code-2"]
+
+OPEN_AI_MODELS = ["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]
+
+COHERE_MODELS = ["embed-multilingual-light-v3.0", "embed-multilingual-v3.0"]
+
+MISTRAL_MODELS = ["mistral-embed"]
+
+TYPES_TO_MODELS = {
+ "sentence_transformer": SENTENCE_TRANSORMER_MODELS
+ + SENTENCE_TRANSORMER_MODELS_WITH_ERRORS,
+ "universal_sentence_encoder": UNIVERSAL_SENTENCE_ENCODER_MODELS,
+ "laser": LASER_MODELS,
+ "voyage_ai": VOYAGE_MODELS,
+ "open_ai": OPEN_AI_MODELS,
+ "cohere": COHERE_MODELS,
+ "mistral_ai": MISTRAL_MODELS,
+}
diff --git a/script_mteb_french/utils/tasks_list.py b/tools/utils/tasks_list.py
similarity index 100%
rename from script_mteb_french/utils/tasks_list.py
rename to tools/utils/tasks_list.py
diff --git a/script_mteb_french/universal_sentence_encoder_models_paths.json b/tools/utils/universal_sentence_encoder_models_paths.json
similarity index 100%
rename from script_mteb_french/universal_sentence_encoder_models_paths.json
rename to tools/utils/universal_sentence_encoder_models_paths.json