diff --git a/.gitignore b/.gitignore index 4bf5d365..52ed372d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,163 @@ *.DS_Store -.env* -__pycache__ ChromaDB -models -.vscode \ No newline at end of file + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env* +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ \ No newline at end of file diff --git a/.python-version b/.python-version deleted file mode 100644 index c8cfe395..00000000 --- a/.python-version +++ /dev/null @@ -1 +0,0 @@ -3.10 diff --git a/README.md b/README.md index a85817af..03e6d2cf 100644 --- a/README.md +++ b/README.md @@ -1,93 +1,70 @@ -# MTEB Scripts +# Scripts to run the French MTEB benchmark -This repository contains scripts used for [MTEB](https://github.com/embeddings-benchmark/mteb) benchmarking. Some scripts rely on a results folder, which can be obtained via `git clone https://huggingface.co/datasets/mteb/results`. +This folder contains the scripts used to generate the French tab results on the [MTEB](https://github.com/embeddings-benchmark/mteb) benchmark. - +Below are instructions to run the main scripts. -- [MTEB Scripts](#mteb-scripts) - - [Benchmark](#benchmark) - - [Env Setup](#env-setup) - - [Model setup](#model-setup) - - [Download](#download) - - [Load](#load) +## Benchmark - +### Running on host using venv -## Benchmark +* Navigate to the repository root folder +* Create your virtual env: -Basic with Internet -```python -from mteb import MTEB -from sentence_transformers import SentenceTransformer -model_path = "/gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-nli-bitfit" -model_name = model_path.split("/")[-1].split("_")[-1] -model = SentenceTransformer(model_path) -evaluation = MTEB(tasks=["Banking77Classification"]) -evaluation.run(model, output_folder=f"results/{model_name}") +```bash +python3 -m venv .venv ``` - -No Internet Access (Download data first) -```python -import os -os.environ["HF_DATASETS_OFFLINE"]="1" # 1 for offline -os.environ["TRANSFORMERS_OFFLINE"]="1" # 1 for offline -os.environ["TRANSFORMERS_CACHE"]="/gpfswork/rech/six/commun/models" -os.environ["HF_DATASETS_CACHE"]="/gpfswork/rech/six/commun/datasets" -os.environ["HF_MODULES_CACHE"]="/gpfswork/rech/six/commun/modules" -os.environ["HF_METRICS_CACHE"]="/gpfswork/rech/six/commun/metrics" -from mteb import MTEB -from sentence_transformers import SentenceTransformer -model_path = "/gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-nli-bitfit" -model_name = model_path.split("/")[-1].split("_")[-1] -model = SentenceTransformer(model_path) -evaluation = MTEB(tasks=["Banking77Classification"]) -evaluation.run(model, output_folder=f"results/{model_name}") +* Activate it and install the requirements: +```bash +source .venv/bin/activate +pip install -r requirements.txt ``` - - -## Env Setup - +* Run the benchmark: ```bash -export CONDA_ENVS_PATH=$six_ALL_CCFRWORK/conda - -conda create -y -n hf-prod python=3.8 -conda activate hf-prod +cd script_mteb_french +python run_benchmark.py +``` -# pt-1.10.1 / cuda 11.3 -conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch +By default the benchmark runs on sentence_transformer models but you can specify the type with the argument `--model_type`: +```bash +# default ['sentence_transformer'] +python run_benchmark.py +# choosing other type ['voyage_ai'] +python run_benchmark.py --model_type voyage_ai +# running on two types ['voyage_ai', 'sentence_transformer'] +python run_benchmark.py --model_type voyage_ai sentence_transformer +``` -# Custom fork that uses offline datasets -!pip install --upgrade git+https://github.com/Muennighoff/mteb.git@offlineaccess -!pip install --upgrade git+https://github.com/Muennighoff/sentence-transformers.git@sgpt_poolings -# If you want to run BEIR tasks -!pip install --upgrade git+https://github.com/beir-cellar/beir.git +You can also run the benchmark on one model only by specifying `--model_name`: +```bash +# default ['sentence_transformer'] -> all models of this type +python run_benchmark.py +# running on one model 'camembert-base' +python run_benchmark.py --model_type sentence_transformer --model_name "xlm-roberta-base" ``` +Note that the `model_name` should be included in models of specified `model_type`. -## Model setup +You can run the benchmark on one task type in ["all", "classification", "clustering", "reranking", "retrieval", "pair_classification", "sts", "summarization", "bitextmining"], default is set to "all" and will run all tasks : +```bash +# running 'sentence_transformer' models on 'classification' task +python run_benchmark.py --model_type sentence_transformer --task_type classification +``` -### Download +## Running using Docker -```python -import os -import sentence_transformers -os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/gpfswork/rech/six/commun/models" -sentence_transformers_cache_dir = os.getenv("SENTENCE_TRANSFORMERS_HOME") -model_repo="sentence-transformers/allenai-specter" -revision="29f9f45ff2a85fe9dfe8ce2cef3d8ec4e65c5f37" -model_path = os.path.join(sentence_transformers_cache_dir, model_repo.replace("/", "_")) -model_path_tmp = sentence_transformers.util.snapshot_download( - repo_id=model_repo, - revision=revision, - cache_dir=sentence_transformers_cache_dir, - library_name="sentence-transformers", - library_version=sentence_transformers.__version__, - ignore_files=["flax_model.msgpack", "rust_model.ot", "tf_model.h5",], -) -os.rename(model_path_tmp, model_path) +* Navigate to the repository root folder +* Build the docker image: +```bash +docker build -t mtebscripts_image . +``` +* Run the benchmark in the container as follows: +``` +docker run -v $(pwd):/mtebscripts mtebscripts_image sh -c "cd script_mteb_french && python run_benchmark.py" ``` +If you want to use the gpu, make sure to add the `--gpus` option to your run command, or `--runtime=nvidia` if you are using an older version of docker. -### Load +Note: Because the volume is shared between the host and the container, the results will be available in the host at the end. -```python -model = SentenceTransformer("/gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-nli-bitfit") -``` +## Models' characteristics + +Additionnaly, you can find a script `get_model_specs.py` to compute models' characteristics (size, number of params, embeddings dimension). You can run it similarly to the benchmark by substituting `run_benchmark.py` with `get_model_specs.py`. diff --git a/README_orig.md b/README_orig.md new file mode 100644 index 00000000..a85817af --- /dev/null +++ b/README_orig.md @@ -0,0 +1,93 @@ +# MTEB Scripts + +This repository contains scripts used for [MTEB](https://github.com/embeddings-benchmark/mteb) benchmarking. Some scripts rely on a results folder, which can be obtained via `git clone https://huggingface.co/datasets/mteb/results`. + + + +- [MTEB Scripts](#mteb-scripts) + - [Benchmark](#benchmark) + - [Env Setup](#env-setup) + - [Model setup](#model-setup) + - [Download](#download) + - [Load](#load) + + + +## Benchmark + +Basic with Internet +```python +from mteb import MTEB +from sentence_transformers import SentenceTransformer +model_path = "/gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-nli-bitfit" +model_name = model_path.split("/")[-1].split("_")[-1] +model = SentenceTransformer(model_path) +evaluation = MTEB(tasks=["Banking77Classification"]) +evaluation.run(model, output_folder=f"results/{model_name}") +``` + +No Internet Access (Download data first) +```python +import os +os.environ["HF_DATASETS_OFFLINE"]="1" # 1 for offline +os.environ["TRANSFORMERS_OFFLINE"]="1" # 1 for offline +os.environ["TRANSFORMERS_CACHE"]="/gpfswork/rech/six/commun/models" +os.environ["HF_DATASETS_CACHE"]="/gpfswork/rech/six/commun/datasets" +os.environ["HF_MODULES_CACHE"]="/gpfswork/rech/six/commun/modules" +os.environ["HF_METRICS_CACHE"]="/gpfswork/rech/six/commun/metrics" +from mteb import MTEB +from sentence_transformers import SentenceTransformer +model_path = "/gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-nli-bitfit" +model_name = model_path.split("/")[-1].split("_")[-1] +model = SentenceTransformer(model_path) +evaluation = MTEB(tasks=["Banking77Classification"]) +evaluation.run(model, output_folder=f"results/{model_name}") +``` + + +## Env Setup + +```bash +export CONDA_ENVS_PATH=$six_ALL_CCFRWORK/conda + +conda create -y -n hf-prod python=3.8 +conda activate hf-prod + +# pt-1.10.1 / cuda 11.3 +conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch + +# Custom fork that uses offline datasets +!pip install --upgrade git+https://github.com/Muennighoff/mteb.git@offlineaccess +!pip install --upgrade git+https://github.com/Muennighoff/sentence-transformers.git@sgpt_poolings +# If you want to run BEIR tasks +!pip install --upgrade git+https://github.com/beir-cellar/beir.git +``` + +## Model setup + +### Download + +```python +import os +import sentence_transformers +os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/gpfswork/rech/six/commun/models" +sentence_transformers_cache_dir = os.getenv("SENTENCE_TRANSFORMERS_HOME") +model_repo="sentence-transformers/allenai-specter" +revision="29f9f45ff2a85fe9dfe8ce2cef3d8ec4e65c5f37" +model_path = os.path.join(sentence_transformers_cache_dir, model_repo.replace("/", "_")) +model_path_tmp = sentence_transformers.util.snapshot_download( + repo_id=model_repo, + revision=revision, + cache_dir=sentence_transformers_cache_dir, + library_name="sentence-transformers", + library_version=sentence_transformers.__version__, + ignore_files=["flax_model.msgpack", "rust_model.ot", "tf_model.h5",], +) +os.rename(model_path_tmp, model_path) +``` + +### Load + +```python +model = SentenceTransformer("/gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-nli-bitfit") +``` diff --git a/script_mteb_french/results_analysis/models_characteristics.csv b/assets/models_characteristics.csv similarity index 100% rename from script_mteb_french/results_analysis/models_characteristics.csv rename to assets/models_characteristics.csv diff --git a/download_tasks.py b/download_tasks.py deleted file mode 100644 index 0fc34286..00000000 --- a/download_tasks.py +++ /dev/null @@ -1,79 +0,0 @@ -"""Downloads all MTEB tasks""" - -TASK_LIST = [ - "AmazonCounterfactualClassification", - "AmazonPolarityClassification", - "AmazonReviewsClassification", - "Banking77Classification", - "EmotionClassification", - "ImdbClassification", - "MassiveIntentClassification", - "MassiveScenarioClassification", - "MTOPDomainClassification", - "MTOPIntentClassification", - "ToxicConversationsClassification", - "TweetSentimentExtractionClassification", - "ArxivClusteringP2P", - "ArxivClusteringS2S", - "BiorxivClusteringP2P", - "BiorxivClusteringS2S", - "MedrxivClusteringP2P", - "MedrxivClusteringS2S", - "RedditClustering", - "RedditClusteringP2P", - "StackExchangeClustering", - "StackExchangeClusteringP2P", - "TwentyNewsgroupsClustering", - "SprintDuplicateQuestions", - "TwitterSemEval2015", - "TwitterURLCorpus", - "AskUbuntuDupQuestions", - "MindSmallReranking", - "SciDocsRR", - "StackOverflowDupQuestions", - "ArguAna", - "ClimateFEVER", - "DBPedia", - "FEVER", - "FiQA2018", - "HotpotQA", - "MSMARCO", - "NFCorpus", - "NQ", - "QuoraRetrieval", - "SCIDOCS", - "SciFact", - "Touche2020", - "TRECCOVID", - "BIOSSES", - "SICK-R", - "STS12", - "STS13", - "STS14", - "STS15", - "STS16", - "STS17", - "STS22", - "STSBenchmark", - "SummEval", -] - -import os -os.environ["HF_DATASETS_OFFLINE"]="0" # 1 for offline -os.environ["TRANSFORMERS_OFFLINE"]="0" # 1 for offline -os.environ["TRANSFORMERS_CACHE"]="/gpfswork/rech/six/commun/models" -os.environ["HF_DATASETS_CACHE"]="/gpfswork/rech/six/commun/datasets" -os.environ["HF_MODULES_CACHE"]="/gpfswork/rech/six/commun/modules" -os.environ["HF_METRICS_CACHE"]="/gpfswork/rech/six/commun/metrics" - -from mteb import MTEB -evaluation = MTEB(tasks=TASK_LIST, task_langs=["en"]) - -for task in evaluation.tasks: - task.load_data() - - # Alternatively clone to desired place - #path = "/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/" + task.description["hf_hub_name"] - #from git import Repo - #Repo.clone_from("https://huggingface.co/datasets/" + self.description["hf_hub_name"], path) - #self.dataset = datasets.load_dataset(path, revision=self.description.get("revision", None)) diff --git a/script_mteb_french/src/AbstractEmbeddingFunction.py b/embedders/AbstractEmbeddingFunction.py similarity index 100% rename from script_mteb_french/src/AbstractEmbeddingFunction.py rename to embedders/AbstractEmbeddingFunction.py diff --git a/script_mteb_french/src/ChromaDBEmbedder.py b/embedders/ChromaDBEmbedder.py similarity index 100% rename from script_mteb_french/src/ChromaDBEmbedder.py rename to embedders/ChromaDBEmbedder.py diff --git a/script_mteb_french/src/CohereEmbeddingFunction.py b/embedders/CohereEmbeddingFunction.py similarity index 100% rename from script_mteb_french/src/CohereEmbeddingFunction.py rename to embedders/CohereEmbeddingFunction.py diff --git a/script_mteb_french/src/LaserEmbeddingFunction.py b/embedders/LaserEmbeddingFunction.py similarity index 96% rename from script_mteb_french/src/LaserEmbeddingFunction.py rename to embedders/LaserEmbeddingFunction.py index 79119dcf..e563dadc 100644 --- a/script_mteb_french/src/LaserEmbeddingFunction.py +++ b/embedders/LaserEmbeddingFunction.py @@ -33,7 +33,7 @@ def encode_documents(self, input: Documents) -> Embeddings: @staticmethod def _download_laser_models(): - MODELS_DOWNLOAD_FOLDER = "models" + MODELS_DOWNLOAD_FOLDER = "downloads" if not os.path.exists(MODELS_DOWNLOAD_FOLDER): os.mkdir(MODELS_DOWNLOAD_FOLDER) diff --git a/script_mteb_french/src/MistralAIEmbeddingFunction.py b/embedders/MistralAIEmbeddingFunction.py similarity index 100% rename from script_mteb_french/src/MistralAIEmbeddingFunction.py rename to embedders/MistralAIEmbeddingFunction.py diff --git a/script_mteb_french/src/ModelConfig.py b/embedders/ModelConfig.py similarity index 100% rename from script_mteb_french/src/ModelConfig.py rename to embedders/ModelConfig.py diff --git a/script_mteb_french/src/OpenAIEmbeddingFunction.py b/embedders/OpenAIEmbeddingFunction.py similarity index 100% rename from script_mteb_french/src/OpenAIEmbeddingFunction.py rename to embedders/OpenAIEmbeddingFunction.py diff --git a/script_mteb_french/src/SentenceTransformerEmbeddingFunction.py b/embedders/SentenceTransformerEmbeddingFunction.py similarity index 100% rename from script_mteb_french/src/SentenceTransformerEmbeddingFunction.py rename to embedders/SentenceTransformerEmbeddingFunction.py diff --git a/script_mteb_french/src/UniversalSentenceEncoderEmbeddingFunction.py b/embedders/UniversalSentenceEncoderEmbeddingFunction.py similarity index 100% rename from script_mteb_french/src/UniversalSentenceEncoderEmbeddingFunction.py rename to embedders/UniversalSentenceEncoderEmbeddingFunction.py diff --git a/script_mteb_french/src/VoyageAIEmbeddingFunction.py b/embedders/VoyageAIEmbeddingFunction.py similarity index 100% rename from script_mteb_french/src/VoyageAIEmbeddingFunction.py rename to embedders/VoyageAIEmbeddingFunction.py diff --git a/script_mteb_french/src/__init__.py b/embedders/__init__.py similarity index 100% rename from script_mteb_french/src/__init__.py rename to embedders/__init__.py diff --git a/fix_results.py b/fix_results.py deleted file mode 100644 index bb095429..00000000 --- a/fix_results.py +++ /dev/null @@ -1,64 +0,0 @@ -""" -Iterates over json results for custom fixes -Usage: python fix_results.py results_folder_path -""" -import glob -import json -import sys -import os - -from mteb import MTEB - -results_folder = sys.argv[1] -files = glob.glob(f'{results_folder.strip("/")}/*/*.json') - -print("Found files: ", files) - -for file_name in files: - with open(file_name, 'r', encoding='utf-8') as f: - results = json.load(f) - if "dataset_version" in results: - results.pop("dataset_version") - if "mteb_version" not in results: - results["mteb_version"] = "0.0.2" - if "mteb_dataset_name" not in results: - results["mteb_dataset_name"] = file_name.split("/")[-1].replace(".json", "") - if "dataset_revision" not in results: - print(file_name) - mteb_desc = ( - MTEB(tasks=[file_name.split("/")[-1].replace(".json", "").replace("CQADupstackRetrieval", "CQADupstackAndroidRetrieval")]) - .tasks[0] - .description - ) - import huggingface_hub - if "hf_hub_name" in mteb_desc: - hf_hub_name = mteb_desc.get("hf_hub_name") - else: - hf_hub_name = "BeIR/" + mteb_desc.get("beir_name") - if "cqadupstack" in hf_hub_name: - hf_hub_name = "BeIR/cqadupstack-qrels" - results["dataset_revision"] = huggingface_hub.hf_api.dataset_info(hf_hub_name).sha - - if "STS22" in file_name: - for split, split_results in results.items(): - if isinstance(split_results, dict): - for metric, score in split_results.items(): - if isinstance(score, dict): - for sub_metric, sub_score in score.items(): - if isinstance(sub_score, dict): - for sub_sub_metric, sub_sub_score in sub_score.items(): - results[split][metric][sub_metric][sub_sub_metric] = abs(sub_sub_score) - else: - results[split][metric][sub_metric] = abs(sub_score) - else: - results[split][metric] = abs(score) - results.setdefault(split, {}) - # Merge MSMARCO dev & test split runs - elif "MSMARCO." in file_name and os.path.exists(file_name.replace("MSMARCO.", "MSMARCO-test.")): - with open(file_name.replace("MSMARCO.", "MSMARCO-test."), 'r', encoding='utf-8') as f: - results_test = json.load(f) - results["test"] = results_test["test"] - - with open(file_name, 'w', encoding='utf-8') as f: - json.dump(results, f, indent=4) - diff --git a/results/Cohere-embed-multilingual-light-v3.0/AlloProfClusteringP2P.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/AlloProfClusteringP2P.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/AlloProfClusteringP2P.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/AlloProfClusteringP2P.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/AlloProfClusteringS2S.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/AlloProfClusteringS2S.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/AlloProfClusteringS2S.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/AlloProfClusteringS2S.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/AlloprofReranking.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/AlloprofReranking.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/AlloprofReranking.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/AlloprofReranking.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/AlloprofRetrieval.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/AlloprofRetrieval.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/AlloprofRetrieval.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/AlloprofRetrieval.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/AmazonReviewsClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/AmazonReviewsClassification.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/AmazonReviewsClassification.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/AmazonReviewsClassification.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/BSARDRetrieval.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/BSARDRetrieval.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/BSARDRetrieval.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/BSARDRetrieval.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/DiaBLaBitextMining.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/DiaBLaBitextMining.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/DiaBLaBitextMining.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/DiaBLaBitextMining.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/FloresBitextMining.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/FloresBitextMining.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/FloresBitextMining.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/FloresBitextMining.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/HALClusteringS2S.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/HALClusteringS2S.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/HALClusteringS2S.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/HALClusteringS2S.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/MLSUMClusteringP2P.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MLSUMClusteringP2P.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/MLSUMClusteringP2P.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MLSUMClusteringP2P.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/MLSUMClusteringS2S.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MLSUMClusteringS2S.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/MLSUMClusteringS2S.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MLSUMClusteringS2S.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/MTOPDomainClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MTOPDomainClassification.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/MTOPDomainClassification.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MTOPDomainClassification.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/MTOPIntentClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MTOPIntentClassification.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/MTOPIntentClassification.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MTOPIntentClassification.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClassification.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClassification.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClassification.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClusteringP2P.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MasakhaNEWSClusteringS2S.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/MassiveIntentClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MassiveIntentClassification.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/MassiveIntentClassification.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MassiveIntentClassification.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/MassiveScenarioClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MassiveScenarioClassification.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/MassiveScenarioClassification.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MassiveScenarioClassification.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/MintakaRetrieval.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MintakaRetrieval.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/MintakaRetrieval.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/MintakaRetrieval.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/OpusparcusPC.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/OpusparcusPC.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/OpusparcusPC.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/OpusparcusPC.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/PawsX.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/PawsX.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/PawsX.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/PawsX.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/SICKFr.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/SICKFr.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/SICKFr.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/SICKFr.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/STS22.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/STS22.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/STS22.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/STS22.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/STSBenchmarkMultilingualSTS.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/SummEvalFr.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/SummEvalFr.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/SummEvalFr.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/SummEvalFr.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/SyntecReranking.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/SyntecReranking.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/SyntecReranking.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/SyntecReranking.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/SyntecRetrieval.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/SyntecRetrieval.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/SyntecRetrieval.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/SyntecRetrieval.json diff --git a/results/Cohere-embed-multilingual-light-v3.0/XPQARetrieval.json b/outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/XPQARetrieval.json similarity index 100% rename from results/Cohere-embed-multilingual-light-v3.0/XPQARetrieval.json rename to outputs/benchmark_results/Cohere-embed-multilingual-light-v3.0/XPQARetrieval.json diff --git a/results/Cohere-embed-multilingual-v3.0/AlloProfClusteringP2P.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/AlloProfClusteringP2P.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/AlloProfClusteringP2P.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/AlloProfClusteringP2P.json diff --git a/results/Cohere-embed-multilingual-v3.0/AlloProfClusteringS2S.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/AlloProfClusteringS2S.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/AlloProfClusteringS2S.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/AlloProfClusteringS2S.json diff --git a/results/Cohere-embed-multilingual-v3.0/AlloprofReranking.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/AlloprofReranking.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/AlloprofReranking.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/AlloprofReranking.json diff --git a/results/Cohere-embed-multilingual-v3.0/AlloprofRetrieval.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/AlloprofRetrieval.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/AlloprofRetrieval.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/AlloprofRetrieval.json diff --git a/results/Cohere-embed-multilingual-v3.0/AmazonReviewsClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/AmazonReviewsClassification.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/AmazonReviewsClassification.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/AmazonReviewsClassification.json diff --git a/results/Cohere-embed-multilingual-v3.0/BSARDRetrieval.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/BSARDRetrieval.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/BSARDRetrieval.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/BSARDRetrieval.json diff --git a/results/Cohere-embed-multilingual-v3.0/DiaBLaBitextMining.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/DiaBLaBitextMining.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/DiaBLaBitextMining.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/DiaBLaBitextMining.json diff --git a/results/Cohere-embed-multilingual-v3.0/FloresBitextMining.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/FloresBitextMining.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/FloresBitextMining.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/FloresBitextMining.json diff --git a/results/Cohere-embed-multilingual-v3.0/HALClusteringS2S.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/HALClusteringS2S.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/HALClusteringS2S.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/HALClusteringS2S.json diff --git a/results/Cohere-embed-multilingual-v3.0/MLSUMClusteringP2P.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MLSUMClusteringP2P.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/MLSUMClusteringP2P.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MLSUMClusteringP2P.json diff --git a/results/Cohere-embed-multilingual-v3.0/MLSUMClusteringS2S.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MLSUMClusteringS2S.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/MLSUMClusteringS2S.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MLSUMClusteringS2S.json diff --git a/results/Cohere-embed-multilingual-v3.0/MTOPDomainClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MTOPDomainClassification.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/MTOPDomainClassification.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MTOPDomainClassification.json diff --git a/results/Cohere-embed-multilingual-v3.0/MTOPIntentClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MTOPIntentClassification.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/MTOPIntentClassification.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MTOPIntentClassification.json diff --git a/results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClassification.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClassification.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClassification.json diff --git a/results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClusteringP2P.json diff --git a/results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MasakhaNEWSClusteringS2S.json diff --git a/results/Cohere-embed-multilingual-v3.0/MassiveIntentClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MassiveIntentClassification.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/MassiveIntentClassification.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MassiveIntentClassification.json diff --git a/results/Cohere-embed-multilingual-v3.0/MassiveScenarioClassification.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MassiveScenarioClassification.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/MassiveScenarioClassification.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MassiveScenarioClassification.json diff --git a/results/Cohere-embed-multilingual-v3.0/MintakaRetrieval.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MintakaRetrieval.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/MintakaRetrieval.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/MintakaRetrieval.json diff --git a/results/Cohere-embed-multilingual-v3.0/OpusparcusPC.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/OpusparcusPC.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/OpusparcusPC.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/OpusparcusPC.json diff --git a/results/Cohere-embed-multilingual-v3.0/PawsX.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/PawsX.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/PawsX.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/PawsX.json diff --git a/results/Cohere-embed-multilingual-v3.0/SICKFr.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/SICKFr.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/SICKFr.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/SICKFr.json diff --git a/results/Cohere-embed-multilingual-v3.0/STS22.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/STS22.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/STS22.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/STS22.json diff --git a/results/Cohere-embed-multilingual-v3.0/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/STSBenchmarkMultilingualSTS.json diff --git a/results/Cohere-embed-multilingual-v3.0/SummEvalFr.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/SummEvalFr.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/SummEvalFr.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/SummEvalFr.json diff --git a/results/Cohere-embed-multilingual-v3.0/SyntecReranking.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/SyntecReranking.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/SyntecReranking.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/SyntecReranking.json diff --git a/results/Cohere-embed-multilingual-v3.0/SyntecRetrieval.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/SyntecRetrieval.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/SyntecRetrieval.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/SyntecRetrieval.json diff --git a/results/Cohere-embed-multilingual-v3.0/XPQARetrieval.json b/outputs/benchmark_results/Cohere-embed-multilingual-v3.0/XPQARetrieval.json similarity index 100% rename from results/Cohere-embed-multilingual-v3.0/XPQARetrieval.json rename to outputs/benchmark_results/Cohere-embed-multilingual-v3.0/XPQARetrieval.json diff --git a/results/Geotrend/bert-base-10lang-cased/AlloProfClusteringP2P.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/AlloProfClusteringP2P.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/AlloProfClusteringP2P.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/AlloProfClusteringP2P.json diff --git a/results/Geotrend/bert-base-10lang-cased/AlloProfClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/AlloProfClusteringS2S.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/AlloProfClusteringS2S.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/AlloProfClusteringS2S.json diff --git a/results/Geotrend/bert-base-10lang-cased/AlloprofReranking.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/AlloprofReranking.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/AlloprofReranking.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/AlloprofReranking.json diff --git a/results/Geotrend/bert-base-10lang-cased/AlloprofRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/AlloprofRetrieval.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/AlloprofRetrieval.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/AlloprofRetrieval.json diff --git a/results/Geotrend/bert-base-10lang-cased/AmazonReviewsClassification.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/AmazonReviewsClassification.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/AmazonReviewsClassification.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/AmazonReviewsClassification.json diff --git a/results/Geotrend/bert-base-10lang-cased/BSARDRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/BSARDRetrieval.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/BSARDRetrieval.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/BSARDRetrieval.json diff --git a/results/Geotrend/bert-base-10lang-cased/DiaBLaBitextMining.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/DiaBLaBitextMining.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/DiaBLaBitextMining.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/DiaBLaBitextMining.json diff --git a/results/Geotrend/bert-base-10lang-cased/FloresBitextMining.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/FloresBitextMining.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/FloresBitextMining.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/FloresBitextMining.json diff --git a/results/Geotrend/bert-base-10lang-cased/HALClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/HALClusteringS2S.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/HALClusteringS2S.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/HALClusteringS2S.json diff --git a/results/Geotrend/bert-base-10lang-cased/MLSUMClusteringP2P.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MLSUMClusteringP2P.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/MLSUMClusteringP2P.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MLSUMClusteringP2P.json diff --git a/results/Geotrend/bert-base-10lang-cased/MLSUMClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MLSUMClusteringS2S.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/MLSUMClusteringS2S.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MLSUMClusteringS2S.json diff --git a/results/Geotrend/bert-base-10lang-cased/MTOPDomainClassification.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MTOPDomainClassification.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/MTOPDomainClassification.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MTOPDomainClassification.json diff --git a/results/Geotrend/bert-base-10lang-cased/MTOPIntentClassification.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MTOPIntentClassification.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/MTOPIntentClassification.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MTOPIntentClassification.json diff --git a/results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClassification.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClassification.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClassification.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClassification.json diff --git a/results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClusteringP2P.json diff --git a/results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MasakhaNEWSClusteringS2S.json diff --git a/results/Geotrend/bert-base-10lang-cased/MassiveIntentClassification.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MassiveIntentClassification.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/MassiveIntentClassification.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MassiveIntentClassification.json diff --git a/results/Geotrend/bert-base-10lang-cased/MassiveScenarioClassification.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MassiveScenarioClassification.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/MassiveScenarioClassification.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MassiveScenarioClassification.json diff --git a/results/Geotrend/bert-base-10lang-cased/MintakaRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MintakaRetrieval.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/MintakaRetrieval.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/MintakaRetrieval.json diff --git a/results/Geotrend/bert-base-10lang-cased/OpusparcusPC.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/OpusparcusPC.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/OpusparcusPC.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/OpusparcusPC.json diff --git a/results/Geotrend/bert-base-10lang-cased/PawsX.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/PawsX.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/PawsX.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/PawsX.json diff --git a/results/Geotrend/bert-base-10lang-cased/SICKFr.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/SICKFr.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/SICKFr.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/SICKFr.json diff --git a/results/Geotrend/bert-base-10lang-cased/STS22.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/STS22.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/STS22.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/STS22.json diff --git a/results/Geotrend/bert-base-10lang-cased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/STSBenchmarkMultilingualSTS.json diff --git a/results/Geotrend/bert-base-10lang-cased/SummEvalFr.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/SummEvalFr.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/SummEvalFr.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/SummEvalFr.json diff --git a/results/Geotrend/bert-base-10lang-cased/SyntecReranking.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/SyntecReranking.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/SyntecReranking.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/SyntecReranking.json diff --git a/results/Geotrend/bert-base-10lang-cased/SyntecRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/SyntecRetrieval.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/SyntecRetrieval.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/SyntecRetrieval.json diff --git a/results/Geotrend/bert-base-10lang-cased/XPQARetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-10lang-cased/XPQARetrieval.json similarity index 100% rename from results/Geotrend/bert-base-10lang-cased/XPQARetrieval.json rename to outputs/benchmark_results/Geotrend/bert-base-10lang-cased/XPQARetrieval.json diff --git a/results/Geotrend/bert-base-15lang-cased/AlloProfClusteringP2P.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/AlloProfClusteringP2P.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/AlloProfClusteringP2P.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/AlloProfClusteringP2P.json diff --git a/results/Geotrend/bert-base-15lang-cased/AlloProfClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/AlloProfClusteringS2S.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/AlloProfClusteringS2S.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/AlloProfClusteringS2S.json diff --git a/results/Geotrend/bert-base-15lang-cased/AlloprofReranking.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/AlloprofReranking.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/AlloprofReranking.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/AlloprofReranking.json diff --git a/results/Geotrend/bert-base-15lang-cased/AlloprofRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/AlloprofRetrieval.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/AlloprofRetrieval.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/AlloprofRetrieval.json diff --git a/results/Geotrend/bert-base-15lang-cased/AmazonReviewsClassification.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/AmazonReviewsClassification.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/AmazonReviewsClassification.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/AmazonReviewsClassification.json diff --git a/results/Geotrend/bert-base-15lang-cased/BSARDRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/BSARDRetrieval.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/BSARDRetrieval.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/BSARDRetrieval.json diff --git a/results/Geotrend/bert-base-15lang-cased/DiaBLaBitextMining.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/DiaBLaBitextMining.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/DiaBLaBitextMining.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/DiaBLaBitextMining.json diff --git a/results/Geotrend/bert-base-15lang-cased/FloresBitextMining.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/FloresBitextMining.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/FloresBitextMining.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/FloresBitextMining.json diff --git a/results/Geotrend/bert-base-15lang-cased/HALClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/HALClusteringS2S.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/HALClusteringS2S.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/HALClusteringS2S.json diff --git a/results/Geotrend/bert-base-15lang-cased/MLSUMClusteringP2P.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MLSUMClusteringP2P.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/MLSUMClusteringP2P.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MLSUMClusteringP2P.json diff --git a/results/Geotrend/bert-base-15lang-cased/MLSUMClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MLSUMClusteringS2S.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/MLSUMClusteringS2S.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MLSUMClusteringS2S.json diff --git a/results/Geotrend/bert-base-15lang-cased/MTOPDomainClassification.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MTOPDomainClassification.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/MTOPDomainClassification.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MTOPDomainClassification.json diff --git a/results/Geotrend/bert-base-15lang-cased/MTOPIntentClassification.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MTOPIntentClassification.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/MTOPIntentClassification.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MTOPIntentClassification.json diff --git a/results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClassification.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClassification.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClassification.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClassification.json diff --git a/results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClusteringP2P.json diff --git a/results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MasakhaNEWSClusteringS2S.json diff --git a/results/Geotrend/bert-base-15lang-cased/MassiveIntentClassification.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MassiveIntentClassification.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/MassiveIntentClassification.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MassiveIntentClassification.json diff --git a/results/Geotrend/bert-base-15lang-cased/MassiveScenarioClassification.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MassiveScenarioClassification.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/MassiveScenarioClassification.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MassiveScenarioClassification.json diff --git a/results/Geotrend/bert-base-15lang-cased/MintakaRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MintakaRetrieval.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/MintakaRetrieval.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/MintakaRetrieval.json diff --git a/results/Geotrend/bert-base-15lang-cased/OpusparcusPC.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/OpusparcusPC.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/OpusparcusPC.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/OpusparcusPC.json diff --git a/results/Geotrend/bert-base-15lang-cased/PawsX.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/PawsX.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/PawsX.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/PawsX.json diff --git a/results/Geotrend/bert-base-15lang-cased/SICKFr.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/SICKFr.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/SICKFr.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/SICKFr.json diff --git a/results/Geotrend/bert-base-15lang-cased/STS22.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/STS22.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/STS22.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/STS22.json diff --git a/results/Geotrend/bert-base-15lang-cased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/STSBenchmarkMultilingualSTS.json diff --git a/results/Geotrend/bert-base-15lang-cased/SummEvalFr.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/SummEvalFr.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/SummEvalFr.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/SummEvalFr.json diff --git a/results/Geotrend/bert-base-15lang-cased/SyntecReranking.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/SyntecReranking.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/SyntecReranking.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/SyntecReranking.json diff --git a/results/Geotrend/bert-base-15lang-cased/SyntecRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/SyntecRetrieval.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/SyntecRetrieval.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/SyntecRetrieval.json diff --git a/results/Geotrend/bert-base-15lang-cased/XPQARetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-15lang-cased/XPQARetrieval.json similarity index 100% rename from results/Geotrend/bert-base-15lang-cased/XPQARetrieval.json rename to outputs/benchmark_results/Geotrend/bert-base-15lang-cased/XPQARetrieval.json diff --git a/results/Geotrend/bert-base-25lang-cased/AlloProfClusteringP2P.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/AlloProfClusteringP2P.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/AlloProfClusteringP2P.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/AlloProfClusteringP2P.json diff --git a/results/Geotrend/bert-base-25lang-cased/AlloProfClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/AlloProfClusteringS2S.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/AlloProfClusteringS2S.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/AlloProfClusteringS2S.json diff --git a/results/Geotrend/bert-base-25lang-cased/AlloprofReranking.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/AlloprofReranking.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/AlloprofReranking.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/AlloprofReranking.json diff --git a/results/Geotrend/bert-base-25lang-cased/AlloprofRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/AlloprofRetrieval.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/AlloprofRetrieval.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/AlloprofRetrieval.json diff --git a/results/Geotrend/bert-base-25lang-cased/AmazonReviewsClassification.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/AmazonReviewsClassification.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/AmazonReviewsClassification.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/AmazonReviewsClassification.json diff --git a/results/Geotrend/bert-base-25lang-cased/BSARDRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/BSARDRetrieval.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/BSARDRetrieval.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/BSARDRetrieval.json diff --git a/results/Geotrend/bert-base-25lang-cased/DiaBLaBitextMining.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/DiaBLaBitextMining.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/DiaBLaBitextMining.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/DiaBLaBitextMining.json diff --git a/results/Geotrend/bert-base-25lang-cased/FloresBitextMining.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/FloresBitextMining.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/FloresBitextMining.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/FloresBitextMining.json diff --git a/results/Geotrend/bert-base-25lang-cased/HALClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/HALClusteringS2S.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/HALClusteringS2S.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/HALClusteringS2S.json diff --git a/results/Geotrend/bert-base-25lang-cased/MLSUMClusteringP2P.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MLSUMClusteringP2P.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/MLSUMClusteringP2P.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MLSUMClusteringP2P.json diff --git a/results/Geotrend/bert-base-25lang-cased/MLSUMClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MLSUMClusteringS2S.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/MLSUMClusteringS2S.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MLSUMClusteringS2S.json diff --git a/results/Geotrend/bert-base-25lang-cased/MTOPDomainClassification.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MTOPDomainClassification.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/MTOPDomainClassification.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MTOPDomainClassification.json diff --git a/results/Geotrend/bert-base-25lang-cased/MTOPIntentClassification.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MTOPIntentClassification.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/MTOPIntentClassification.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MTOPIntentClassification.json diff --git a/results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClassification.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClassification.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClassification.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClassification.json diff --git a/results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClusteringP2P.json diff --git a/results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MasakhaNEWSClusteringS2S.json diff --git a/results/Geotrend/bert-base-25lang-cased/MassiveIntentClassification.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MassiveIntentClassification.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/MassiveIntentClassification.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MassiveIntentClassification.json diff --git a/results/Geotrend/bert-base-25lang-cased/MassiveScenarioClassification.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MassiveScenarioClassification.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/MassiveScenarioClassification.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MassiveScenarioClassification.json diff --git a/results/Geotrend/bert-base-25lang-cased/MintakaRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MintakaRetrieval.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/MintakaRetrieval.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/MintakaRetrieval.json diff --git a/results/Geotrend/bert-base-25lang-cased/OpusparcusPC.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/OpusparcusPC.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/OpusparcusPC.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/OpusparcusPC.json diff --git a/results/Geotrend/bert-base-25lang-cased/PawsX.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/PawsX.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/PawsX.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/PawsX.json diff --git a/results/Geotrend/bert-base-25lang-cased/SICKFr.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/SICKFr.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/SICKFr.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/SICKFr.json diff --git a/results/Geotrend/bert-base-25lang-cased/STS22.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/STS22.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/STS22.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/STS22.json diff --git a/results/Geotrend/bert-base-25lang-cased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/STSBenchmarkMultilingualSTS.json diff --git a/results/Geotrend/bert-base-25lang-cased/SummEvalFr.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/SummEvalFr.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/SummEvalFr.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/SummEvalFr.json diff --git a/results/Geotrend/bert-base-25lang-cased/SyntecReranking.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/SyntecReranking.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/SyntecReranking.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/SyntecReranking.json diff --git a/results/Geotrend/bert-base-25lang-cased/SyntecRetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/SyntecRetrieval.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/SyntecRetrieval.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/SyntecRetrieval.json diff --git a/results/Geotrend/bert-base-25lang-cased/XPQARetrieval.json b/outputs/benchmark_results/Geotrend/bert-base-25lang-cased/XPQARetrieval.json similarity index 100% rename from results/Geotrend/bert-base-25lang-cased/XPQARetrieval.json rename to outputs/benchmark_results/Geotrend/bert-base-25lang-cased/XPQARetrieval.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/AlloProfClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/AlloProfClusteringP2P.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/AlloProfClusteringP2P.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/AlloProfClusteringP2P.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/AlloProfClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/AlloProfClusteringS2S.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/AlloProfClusteringS2S.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/AlloProfClusteringS2S.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/AlloprofReranking.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/AlloprofReranking.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/AlloprofReranking.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/AlloprofReranking.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/AlloprofRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/AlloprofRetrieval.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/AlloprofRetrieval.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/AlloprofRetrieval.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/AmazonReviewsClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/AmazonReviewsClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/AmazonReviewsClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/AmazonReviewsClassification.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/BSARDRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/BSARDRetrieval.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/BSARDRetrieval.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/BSARDRetrieval.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/DiaBLaBitextMining.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/DiaBLaBitextMining.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/DiaBLaBitextMining.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/DiaBLaBitextMining.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/FloresBitextMining.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/FloresBitextMining.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/FloresBitextMining.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/FloresBitextMining.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/HALClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/HALClusteringS2S.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/HALClusteringS2S.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/HALClusteringS2S.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/MLSUMClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MLSUMClusteringP2P.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/MLSUMClusteringP2P.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MLSUMClusteringP2P.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/MLSUMClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MLSUMClusteringS2S.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/MLSUMClusteringS2S.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MLSUMClusteringS2S.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/MTOPDomainClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MTOPDomainClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/MTOPDomainClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MTOPDomainClassification.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/MTOPIntentClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MTOPIntentClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/MTOPIntentClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MTOPIntentClassification.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClassification.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClusteringP2P.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MasakhaNEWSClusteringS2S.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/MassiveIntentClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MassiveIntentClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/MassiveIntentClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MassiveIntentClassification.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/MassiveScenarioClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MassiveScenarioClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/MassiveScenarioClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MassiveScenarioClassification.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/MintakaRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MintakaRetrieval.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/MintakaRetrieval.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/MintakaRetrieval.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/OpusparcusPC.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/OpusparcusPC.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/OpusparcusPC.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/OpusparcusPC.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/PawsX.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/PawsX.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/PawsX.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/PawsX.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/SICKFr.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/SICKFr.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/SICKFr.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/SICKFr.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/STS22.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/STS22.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/STS22.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/STS22.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/STSBenchmarkMultilingualSTS.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/SummEvalFr.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/SummEvalFr.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/SummEvalFr.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/SummEvalFr.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/SyntecReranking.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/SyntecReranking.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/SyntecReranking.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/SyntecReranking.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/SyntecRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/SyntecRetrieval.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/SyntecRetrieval.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/SyntecRetrieval.json diff --git a/results/Geotrend/distilbert-base-25lang-cased/XPQARetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/XPQARetrieval.json similarity index 100% rename from results/Geotrend/distilbert-base-25lang-cased/XPQARetrieval.json rename to outputs/benchmark_results/Geotrend/distilbert-base-25lang-cased/XPQARetrieval.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/AlloProfClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/AlloProfClusteringP2P.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/AlloProfClusteringP2P.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/AlloProfClusteringP2P.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/AlloProfClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/AlloProfClusteringS2S.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/AlloProfClusteringS2S.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/AlloProfClusteringS2S.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/AlloprofReranking.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/AlloprofReranking.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/AlloprofReranking.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/AlloprofReranking.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/AlloprofRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/AlloprofRetrieval.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/AlloprofRetrieval.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/AlloprofRetrieval.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/AmazonReviewsClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/AmazonReviewsClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/AmazonReviewsClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/AmazonReviewsClassification.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/BSARDRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/BSARDRetrieval.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/BSARDRetrieval.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/BSARDRetrieval.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/DiaBLaBitextMining.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/DiaBLaBitextMining.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/DiaBLaBitextMining.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/DiaBLaBitextMining.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/FloresBitextMining.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/FloresBitextMining.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/FloresBitextMining.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/FloresBitextMining.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/HALClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/HALClusteringS2S.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/HALClusteringS2S.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/HALClusteringS2S.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/MLSUMClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MLSUMClusteringP2P.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/MLSUMClusteringP2P.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MLSUMClusteringP2P.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/MLSUMClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MLSUMClusteringS2S.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/MLSUMClusteringS2S.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MLSUMClusteringS2S.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/MTOPDomainClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MTOPDomainClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/MTOPDomainClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MTOPDomainClassification.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/MTOPIntentClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MTOPIntentClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/MTOPIntentClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MTOPIntentClassification.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClassification.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClusteringP2P.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MasakhaNEWSClusteringS2S.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/MassiveIntentClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MassiveIntentClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/MassiveIntentClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MassiveIntentClassification.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/MassiveScenarioClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MassiveScenarioClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/MassiveScenarioClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MassiveScenarioClassification.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/MintakaRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MintakaRetrieval.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/MintakaRetrieval.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/MintakaRetrieval.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/OpusparcusPC.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/OpusparcusPC.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/OpusparcusPC.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/OpusparcusPC.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/PawsX.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/PawsX.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/PawsX.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/PawsX.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/SICKFr.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/SICKFr.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/SICKFr.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/SICKFr.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/STS22.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/STS22.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/STS22.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/STS22.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/STSBenchmarkMultilingualSTS.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/SummEvalFr.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/SummEvalFr.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/SummEvalFr.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/SummEvalFr.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/SyntecReranking.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/SyntecReranking.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/SyntecReranking.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/SyntecReranking.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/SyntecRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/SyntecRetrieval.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/SyntecRetrieval.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/SyntecRetrieval.json diff --git a/results/Geotrend/distilbert-base-en-fr-cased/XPQARetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/XPQARetrieval.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-cased/XPQARetrieval.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-cased/XPQARetrieval.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloProfClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloProfClusteringP2P.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloProfClusteringP2P.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloProfClusteringP2P.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloProfClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloProfClusteringS2S.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloProfClusteringS2S.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloProfClusteringS2S.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloprofReranking.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloprofReranking.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloprofReranking.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloprofReranking.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloprofRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloprofRetrieval.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloprofRetrieval.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AlloprofRetrieval.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AmazonReviewsClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AmazonReviewsClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AmazonReviewsClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/AmazonReviewsClassification.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/BSARDRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/BSARDRetrieval.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/BSARDRetrieval.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/BSARDRetrieval.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/DiaBLaBitextMining.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/DiaBLaBitextMining.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/DiaBLaBitextMining.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/DiaBLaBitextMining.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/FloresBitextMining.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/FloresBitextMining.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/FloresBitextMining.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/FloresBitextMining.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/HALClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/HALClusteringS2S.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/HALClusteringS2S.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/HALClusteringS2S.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MLSUMClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MLSUMClusteringP2P.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MLSUMClusteringP2P.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MLSUMClusteringP2P.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MLSUMClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MLSUMClusteringS2S.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MLSUMClusteringS2S.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MLSUMClusteringS2S.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MTOPDomainClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MTOPDomainClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MTOPDomainClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MTOPDomainClassification.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MTOPIntentClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MTOPIntentClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MTOPIntentClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MTOPIntentClassification.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClassification.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClusteringP2P.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MasakhaNEWSClusteringS2S.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MassiveIntentClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MassiveIntentClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MassiveIntentClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MassiveIntentClassification.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MassiveScenarioClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MassiveScenarioClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MassiveScenarioClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MassiveScenarioClassification.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MintakaRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MintakaRetrieval.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MintakaRetrieval.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/MintakaRetrieval.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/OpusparcusPC.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/OpusparcusPC.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/OpusparcusPC.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/OpusparcusPC.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/PawsX.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/PawsX.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/PawsX.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/PawsX.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SICKFr.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SICKFr.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SICKFr.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SICKFr.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/STS22.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/STS22.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/STS22.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/STS22.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/STSBenchmarkMultilingualSTS.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SummEvalFr.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SummEvalFr.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SummEvalFr.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SummEvalFr.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SyntecReranking.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SyntecReranking.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SyntecReranking.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SyntecReranking.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SyntecRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SyntecRetrieval.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SyntecRetrieval.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/SyntecRetrieval.json diff --git a/results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/XPQARetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/XPQARetrieval.json similarity index 100% rename from results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/XPQARetrieval.json rename to outputs/benchmark_results/Geotrend/distilbert-base-en-fr-es-pt-it-cased/XPQARetrieval.json diff --git a/results/Geotrend/distilbert-base-fr-cased/AlloProfClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/AlloProfClusteringP2P.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/AlloProfClusteringP2P.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/AlloProfClusteringP2P.json diff --git a/results/Geotrend/distilbert-base-fr-cased/AlloProfClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/AlloProfClusteringS2S.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/AlloProfClusteringS2S.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/AlloProfClusteringS2S.json diff --git a/results/Geotrend/distilbert-base-fr-cased/AlloprofReranking.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/AlloprofReranking.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/AlloprofReranking.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/AlloprofReranking.json diff --git a/results/Geotrend/distilbert-base-fr-cased/AlloprofRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/AlloprofRetrieval.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/AlloprofRetrieval.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/AlloprofRetrieval.json diff --git a/results/Geotrend/distilbert-base-fr-cased/AmazonReviewsClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/AmazonReviewsClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/AmazonReviewsClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/AmazonReviewsClassification.json diff --git a/results/Geotrend/distilbert-base-fr-cased/BSARDRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/BSARDRetrieval.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/BSARDRetrieval.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/BSARDRetrieval.json diff --git a/results/Geotrend/distilbert-base-fr-cased/DiaBLaBitextMining.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/DiaBLaBitextMining.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/DiaBLaBitextMining.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/DiaBLaBitextMining.json diff --git a/results/Geotrend/distilbert-base-fr-cased/FloresBitextMining.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/FloresBitextMining.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/FloresBitextMining.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/FloresBitextMining.json diff --git a/results/Geotrend/distilbert-base-fr-cased/HALClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/HALClusteringS2S.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/HALClusteringS2S.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/HALClusteringS2S.json diff --git a/results/Geotrend/distilbert-base-fr-cased/MLSUMClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MLSUMClusteringP2P.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/MLSUMClusteringP2P.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MLSUMClusteringP2P.json diff --git a/results/Geotrend/distilbert-base-fr-cased/MLSUMClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MLSUMClusteringS2S.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/MLSUMClusteringS2S.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MLSUMClusteringS2S.json diff --git a/results/Geotrend/distilbert-base-fr-cased/MTOPDomainClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MTOPDomainClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/MTOPDomainClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MTOPDomainClassification.json diff --git a/results/Geotrend/distilbert-base-fr-cased/MTOPIntentClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MTOPIntentClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/MTOPIntentClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MTOPIntentClassification.json diff --git a/results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClassification.json diff --git a/results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClusteringP2P.json diff --git a/results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MasakhaNEWSClusteringS2S.json diff --git a/results/Geotrend/distilbert-base-fr-cased/MassiveIntentClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MassiveIntentClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/MassiveIntentClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MassiveIntentClassification.json diff --git a/results/Geotrend/distilbert-base-fr-cased/MassiveScenarioClassification.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MassiveScenarioClassification.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/MassiveScenarioClassification.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MassiveScenarioClassification.json diff --git a/results/Geotrend/distilbert-base-fr-cased/MintakaRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MintakaRetrieval.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/MintakaRetrieval.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/MintakaRetrieval.json diff --git a/results/Geotrend/distilbert-base-fr-cased/OpusparcusPC.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/OpusparcusPC.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/OpusparcusPC.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/OpusparcusPC.json diff --git a/results/Geotrend/distilbert-base-fr-cased/PawsX.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/PawsX.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/PawsX.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/PawsX.json diff --git a/results/Geotrend/distilbert-base-fr-cased/SICKFr.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/SICKFr.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/SICKFr.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/SICKFr.json diff --git a/results/Geotrend/distilbert-base-fr-cased/STS22.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/STS22.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/STS22.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/STS22.json diff --git a/results/Geotrend/distilbert-base-fr-cased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/STSBenchmarkMultilingualSTS.json diff --git a/results/Geotrend/distilbert-base-fr-cased/SummEvalFr.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/SummEvalFr.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/SummEvalFr.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/SummEvalFr.json diff --git a/results/Geotrend/distilbert-base-fr-cased/SyntecReranking.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/SyntecReranking.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/SyntecReranking.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/SyntecReranking.json diff --git a/results/Geotrend/distilbert-base-fr-cased/SyntecRetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/SyntecRetrieval.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/SyntecRetrieval.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/SyntecRetrieval.json diff --git a/results/Geotrend/distilbert-base-fr-cased/XPQARetrieval.json b/outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/XPQARetrieval.json similarity index 100% rename from results/Geotrend/distilbert-base-fr-cased/XPQARetrieval.json rename to outputs/benchmark_results/Geotrend/distilbert-base-fr-cased/XPQARetrieval.json diff --git a/results/Wissam42/sentence-croissant-llm-base/AlloProfClusteringP2P.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/AlloProfClusteringP2P.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/AlloProfClusteringP2P.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/AlloProfClusteringP2P.json diff --git a/results/Wissam42/sentence-croissant-llm-base/AlloProfClusteringS2S.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/AlloProfClusteringS2S.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/AlloProfClusteringS2S.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/AlloProfClusteringS2S.json diff --git a/results/Wissam42/sentence-croissant-llm-base/AlloprofReranking.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/AlloprofReranking.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/AlloprofReranking.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/AlloprofReranking.json diff --git a/results/Wissam42/sentence-croissant-llm-base/AlloprofRetrieval.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/AlloprofRetrieval.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/AlloprofRetrieval.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/AlloprofRetrieval.json diff --git a/results/Wissam42/sentence-croissant-llm-base/AmazonReviewsClassification.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/AmazonReviewsClassification.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/AmazonReviewsClassification.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/AmazonReviewsClassification.json diff --git a/results/Wissam42/sentence-croissant-llm-base/BSARDRetrieval.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/BSARDRetrieval.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/BSARDRetrieval.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/BSARDRetrieval.json diff --git a/results/Wissam42/sentence-croissant-llm-base/DiaBLaBitextMining.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/DiaBLaBitextMining.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/DiaBLaBitextMining.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/DiaBLaBitextMining.json diff --git a/results/Wissam42/sentence-croissant-llm-base/FloresBitextMining.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/FloresBitextMining.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/FloresBitextMining.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/FloresBitextMining.json diff --git a/results/Wissam42/sentence-croissant-llm-base/HALClusteringS2S.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/HALClusteringS2S.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/HALClusteringS2S.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/HALClusteringS2S.json diff --git a/results/Wissam42/sentence-croissant-llm-base/MLSUMClusteringP2P.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MLSUMClusteringP2P.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/MLSUMClusteringP2P.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MLSUMClusteringP2P.json diff --git a/results/Wissam42/sentence-croissant-llm-base/MLSUMClusteringS2S.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MLSUMClusteringS2S.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/MLSUMClusteringS2S.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MLSUMClusteringS2S.json diff --git a/results/Wissam42/sentence-croissant-llm-base/MTOPDomainClassification.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MTOPDomainClassification.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/MTOPDomainClassification.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MTOPDomainClassification.json diff --git a/results/Wissam42/sentence-croissant-llm-base/MTOPIntentClassification.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MTOPIntentClassification.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/MTOPIntentClassification.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MTOPIntentClassification.json diff --git a/results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClassification.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClassification.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClassification.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClassification.json diff --git a/results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClusteringP2P.json diff --git a/results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MasakhaNEWSClusteringS2S.json diff --git a/results/Wissam42/sentence-croissant-llm-base/MassiveIntentClassification.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MassiveIntentClassification.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/MassiveIntentClassification.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MassiveIntentClassification.json diff --git a/results/Wissam42/sentence-croissant-llm-base/MassiveScenarioClassification.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MassiveScenarioClassification.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/MassiveScenarioClassification.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MassiveScenarioClassification.json diff --git a/results/Wissam42/sentence-croissant-llm-base/MintakaRetrieval.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MintakaRetrieval.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/MintakaRetrieval.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/MintakaRetrieval.json diff --git a/results/Wissam42/sentence-croissant-llm-base/OpusparcusPC.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/OpusparcusPC.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/OpusparcusPC.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/OpusparcusPC.json diff --git a/results/Wissam42/sentence-croissant-llm-base/PawsX.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/PawsX.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/PawsX.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/PawsX.json diff --git a/results/Wissam42/sentence-croissant-llm-base/SICKFr.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/SICKFr.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/SICKFr.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/SICKFr.json diff --git a/results/Wissam42/sentence-croissant-llm-base/STS22.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/STS22.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/STS22.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/STS22.json diff --git a/results/Wissam42/sentence-croissant-llm-base/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/STSBenchmarkMultilingualSTS.json diff --git a/results/Wissam42/sentence-croissant-llm-base/SummEvalFr.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/SummEvalFr.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/SummEvalFr.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/SummEvalFr.json diff --git a/results/Wissam42/sentence-croissant-llm-base/SyntecReranking.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/SyntecReranking.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/SyntecReranking.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/SyntecReranking.json diff --git a/results/Wissam42/sentence-croissant-llm-base/SyntecRetrieval.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/SyntecRetrieval.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/SyntecRetrieval.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/SyntecRetrieval.json diff --git a/results/Wissam42/sentence-croissant-llm-base/XPQARetrieval.json b/outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/XPQARetrieval.json similarity index 100% rename from results/Wissam42/sentence-croissant-llm-base/XPQARetrieval.json rename to outputs/benchmark_results/Wissam42/sentence-croissant-llm-base/XPQARetrieval.json diff --git a/results/bert-base-multilingual-cased/AlloProfClusteringP2P.json b/outputs/benchmark_results/bert-base-multilingual-cased/AlloProfClusteringP2P.json similarity index 100% rename from results/bert-base-multilingual-cased/AlloProfClusteringP2P.json rename to outputs/benchmark_results/bert-base-multilingual-cased/AlloProfClusteringP2P.json diff --git a/results/bert-base-multilingual-cased/AlloProfClusteringS2S.json b/outputs/benchmark_results/bert-base-multilingual-cased/AlloProfClusteringS2S.json similarity index 100% rename from results/bert-base-multilingual-cased/AlloProfClusteringS2S.json rename to outputs/benchmark_results/bert-base-multilingual-cased/AlloProfClusteringS2S.json diff --git a/results/bert-base-multilingual-cased/AlloprofReranking.json b/outputs/benchmark_results/bert-base-multilingual-cased/AlloprofReranking.json similarity index 100% rename from results/bert-base-multilingual-cased/AlloprofReranking.json rename to outputs/benchmark_results/bert-base-multilingual-cased/AlloprofReranking.json diff --git a/results/bert-base-multilingual-cased/AlloprofRetrieval.json b/outputs/benchmark_results/bert-base-multilingual-cased/AlloprofRetrieval.json similarity index 100% rename from results/bert-base-multilingual-cased/AlloprofRetrieval.json rename to outputs/benchmark_results/bert-base-multilingual-cased/AlloprofRetrieval.json diff --git a/results/bert-base-multilingual-cased/AmazonReviewsClassification.json b/outputs/benchmark_results/bert-base-multilingual-cased/AmazonReviewsClassification.json similarity index 100% rename from results/bert-base-multilingual-cased/AmazonReviewsClassification.json rename to outputs/benchmark_results/bert-base-multilingual-cased/AmazonReviewsClassification.json diff --git a/results/bert-base-multilingual-cased/BSARDRetrieval.json b/outputs/benchmark_results/bert-base-multilingual-cased/BSARDRetrieval.json similarity index 100% rename from results/bert-base-multilingual-cased/BSARDRetrieval.json rename to outputs/benchmark_results/bert-base-multilingual-cased/BSARDRetrieval.json diff --git a/results/bert-base-multilingual-cased/DiaBLaBitextMining.json b/outputs/benchmark_results/bert-base-multilingual-cased/DiaBLaBitextMining.json similarity index 100% rename from results/bert-base-multilingual-cased/DiaBLaBitextMining.json rename to outputs/benchmark_results/bert-base-multilingual-cased/DiaBLaBitextMining.json diff --git a/results/bert-base-multilingual-cased/FloresBitextMining.json b/outputs/benchmark_results/bert-base-multilingual-cased/FloresBitextMining.json similarity index 100% rename from results/bert-base-multilingual-cased/FloresBitextMining.json rename to outputs/benchmark_results/bert-base-multilingual-cased/FloresBitextMining.json diff --git a/results/bert-base-multilingual-cased/HALClusteringS2S.json b/outputs/benchmark_results/bert-base-multilingual-cased/HALClusteringS2S.json similarity index 100% rename from results/bert-base-multilingual-cased/HALClusteringS2S.json rename to outputs/benchmark_results/bert-base-multilingual-cased/HALClusteringS2S.json diff --git a/results/bert-base-multilingual-cased/MLSUMClusteringP2P.json b/outputs/benchmark_results/bert-base-multilingual-cased/MLSUMClusteringP2P.json similarity index 100% rename from results/bert-base-multilingual-cased/MLSUMClusteringP2P.json rename to outputs/benchmark_results/bert-base-multilingual-cased/MLSUMClusteringP2P.json diff --git a/results/bert-base-multilingual-cased/MLSUMClusteringS2S.json b/outputs/benchmark_results/bert-base-multilingual-cased/MLSUMClusteringS2S.json similarity index 100% rename from results/bert-base-multilingual-cased/MLSUMClusteringS2S.json rename to outputs/benchmark_results/bert-base-multilingual-cased/MLSUMClusteringS2S.json diff --git a/results/bert-base-multilingual-cased/MTOPDomainClassification.json b/outputs/benchmark_results/bert-base-multilingual-cased/MTOPDomainClassification.json similarity index 100% rename from results/bert-base-multilingual-cased/MTOPDomainClassification.json rename to outputs/benchmark_results/bert-base-multilingual-cased/MTOPDomainClassification.json diff --git a/results/bert-base-multilingual-cased/MTOPIntentClassification.json b/outputs/benchmark_results/bert-base-multilingual-cased/MTOPIntentClassification.json similarity index 100% rename from results/bert-base-multilingual-cased/MTOPIntentClassification.json rename to outputs/benchmark_results/bert-base-multilingual-cased/MTOPIntentClassification.json diff --git a/results/bert-base-multilingual-cased/MasakhaNEWSClassification.json b/outputs/benchmark_results/bert-base-multilingual-cased/MasakhaNEWSClassification.json similarity index 100% rename from results/bert-base-multilingual-cased/MasakhaNEWSClassification.json rename to outputs/benchmark_results/bert-base-multilingual-cased/MasakhaNEWSClassification.json diff --git a/results/bert-base-multilingual-cased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/bert-base-multilingual-cased/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/bert-base-multilingual-cased/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/bert-base-multilingual-cased/MasakhaNEWSClusteringP2P.json diff --git a/results/bert-base-multilingual-cased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/bert-base-multilingual-cased/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/bert-base-multilingual-cased/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/bert-base-multilingual-cased/MasakhaNEWSClusteringS2S.json diff --git a/results/bert-base-multilingual-cased/MassiveIntentClassification.json b/outputs/benchmark_results/bert-base-multilingual-cased/MassiveIntentClassification.json similarity index 100% rename from results/bert-base-multilingual-cased/MassiveIntentClassification.json rename to outputs/benchmark_results/bert-base-multilingual-cased/MassiveIntentClassification.json diff --git a/results/bert-base-multilingual-cased/MassiveScenarioClassification.json b/outputs/benchmark_results/bert-base-multilingual-cased/MassiveScenarioClassification.json similarity index 100% rename from results/bert-base-multilingual-cased/MassiveScenarioClassification.json rename to outputs/benchmark_results/bert-base-multilingual-cased/MassiveScenarioClassification.json diff --git a/results/bert-base-multilingual-cased/MintakaRetrieval.json b/outputs/benchmark_results/bert-base-multilingual-cased/MintakaRetrieval.json similarity index 100% rename from results/bert-base-multilingual-cased/MintakaRetrieval.json rename to outputs/benchmark_results/bert-base-multilingual-cased/MintakaRetrieval.json diff --git a/results/bert-base-multilingual-cased/OpusparcusPC.json b/outputs/benchmark_results/bert-base-multilingual-cased/OpusparcusPC.json similarity index 100% rename from results/bert-base-multilingual-cased/OpusparcusPC.json rename to outputs/benchmark_results/bert-base-multilingual-cased/OpusparcusPC.json diff --git a/results/bert-base-multilingual-cased/PawsX.json b/outputs/benchmark_results/bert-base-multilingual-cased/PawsX.json similarity index 100% rename from results/bert-base-multilingual-cased/PawsX.json rename to outputs/benchmark_results/bert-base-multilingual-cased/PawsX.json diff --git a/results/bert-base-multilingual-cased/SICKFr.json b/outputs/benchmark_results/bert-base-multilingual-cased/SICKFr.json similarity index 100% rename from results/bert-base-multilingual-cased/SICKFr.json rename to outputs/benchmark_results/bert-base-multilingual-cased/SICKFr.json diff --git a/results/bert-base-multilingual-cased/STS22.json b/outputs/benchmark_results/bert-base-multilingual-cased/STS22.json similarity index 100% rename from results/bert-base-multilingual-cased/STS22.json rename to outputs/benchmark_results/bert-base-multilingual-cased/STS22.json diff --git a/results/bert-base-multilingual-cased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/bert-base-multilingual-cased/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/bert-base-multilingual-cased/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/bert-base-multilingual-cased/STSBenchmarkMultilingualSTS.json diff --git a/results/bert-base-multilingual-cased/SummEvalFr.json b/outputs/benchmark_results/bert-base-multilingual-cased/SummEvalFr.json similarity index 100% rename from results/bert-base-multilingual-cased/SummEvalFr.json rename to outputs/benchmark_results/bert-base-multilingual-cased/SummEvalFr.json diff --git a/results/bert-base-multilingual-cased/SyntecReranking.json b/outputs/benchmark_results/bert-base-multilingual-cased/SyntecReranking.json similarity index 100% rename from results/bert-base-multilingual-cased/SyntecReranking.json rename to outputs/benchmark_results/bert-base-multilingual-cased/SyntecReranking.json diff --git a/results/bert-base-multilingual-cased/SyntecRetrieval.json b/outputs/benchmark_results/bert-base-multilingual-cased/SyntecRetrieval.json similarity index 100% rename from results/bert-base-multilingual-cased/SyntecRetrieval.json rename to outputs/benchmark_results/bert-base-multilingual-cased/SyntecRetrieval.json diff --git a/results/bert-base-multilingual-cased/XPQARetrieval.json b/outputs/benchmark_results/bert-base-multilingual-cased/XPQARetrieval.json similarity index 100% rename from results/bert-base-multilingual-cased/XPQARetrieval.json rename to outputs/benchmark_results/bert-base-multilingual-cased/XPQARetrieval.json diff --git a/results/bert-base-multilingual-uncased/AlloProfClusteringP2P.json b/outputs/benchmark_results/bert-base-multilingual-uncased/AlloProfClusteringP2P.json similarity index 100% rename from results/bert-base-multilingual-uncased/AlloProfClusteringP2P.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/AlloProfClusteringP2P.json diff --git a/results/bert-base-multilingual-uncased/AlloProfClusteringS2S.json b/outputs/benchmark_results/bert-base-multilingual-uncased/AlloProfClusteringS2S.json similarity index 100% rename from results/bert-base-multilingual-uncased/AlloProfClusteringS2S.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/AlloProfClusteringS2S.json diff --git a/results/bert-base-multilingual-uncased/AlloprofReranking.json b/outputs/benchmark_results/bert-base-multilingual-uncased/AlloprofReranking.json similarity index 100% rename from results/bert-base-multilingual-uncased/AlloprofReranking.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/AlloprofReranking.json diff --git a/results/bert-base-multilingual-uncased/AlloprofRetrieval.json b/outputs/benchmark_results/bert-base-multilingual-uncased/AlloprofRetrieval.json similarity index 100% rename from results/bert-base-multilingual-uncased/AlloprofRetrieval.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/AlloprofRetrieval.json diff --git a/results/bert-base-multilingual-uncased/AmazonReviewsClassification.json b/outputs/benchmark_results/bert-base-multilingual-uncased/AmazonReviewsClassification.json similarity index 100% rename from results/bert-base-multilingual-uncased/AmazonReviewsClassification.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/AmazonReviewsClassification.json diff --git a/results/bert-base-multilingual-uncased/BSARDRetrieval.json b/outputs/benchmark_results/bert-base-multilingual-uncased/BSARDRetrieval.json similarity index 100% rename from results/bert-base-multilingual-uncased/BSARDRetrieval.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/BSARDRetrieval.json diff --git a/results/bert-base-multilingual-uncased/DiaBLaBitextMining.json b/outputs/benchmark_results/bert-base-multilingual-uncased/DiaBLaBitextMining.json similarity index 100% rename from results/bert-base-multilingual-uncased/DiaBLaBitextMining.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/DiaBLaBitextMining.json diff --git a/results/bert-base-multilingual-uncased/FloresBitextMining.json b/outputs/benchmark_results/bert-base-multilingual-uncased/FloresBitextMining.json similarity index 100% rename from results/bert-base-multilingual-uncased/FloresBitextMining.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/FloresBitextMining.json diff --git a/results/bert-base-multilingual-uncased/HALClusteringS2S.json b/outputs/benchmark_results/bert-base-multilingual-uncased/HALClusteringS2S.json similarity index 100% rename from results/bert-base-multilingual-uncased/HALClusteringS2S.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/HALClusteringS2S.json diff --git a/results/bert-base-multilingual-uncased/MLSUMClusteringP2P.json b/outputs/benchmark_results/bert-base-multilingual-uncased/MLSUMClusteringP2P.json similarity index 100% rename from results/bert-base-multilingual-uncased/MLSUMClusteringP2P.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/MLSUMClusteringP2P.json diff --git a/results/bert-base-multilingual-uncased/MLSUMClusteringS2S.json b/outputs/benchmark_results/bert-base-multilingual-uncased/MLSUMClusteringS2S.json similarity index 100% rename from results/bert-base-multilingual-uncased/MLSUMClusteringS2S.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/MLSUMClusteringS2S.json diff --git a/results/bert-base-multilingual-uncased/MTOPDomainClassification.json b/outputs/benchmark_results/bert-base-multilingual-uncased/MTOPDomainClassification.json similarity index 100% rename from results/bert-base-multilingual-uncased/MTOPDomainClassification.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/MTOPDomainClassification.json diff --git a/results/bert-base-multilingual-uncased/MTOPIntentClassification.json b/outputs/benchmark_results/bert-base-multilingual-uncased/MTOPIntentClassification.json similarity index 100% rename from results/bert-base-multilingual-uncased/MTOPIntentClassification.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/MTOPIntentClassification.json diff --git a/results/bert-base-multilingual-uncased/MasakhaNEWSClassification.json b/outputs/benchmark_results/bert-base-multilingual-uncased/MasakhaNEWSClassification.json similarity index 100% rename from results/bert-base-multilingual-uncased/MasakhaNEWSClassification.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/MasakhaNEWSClassification.json diff --git a/results/bert-base-multilingual-uncased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/bert-base-multilingual-uncased/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/bert-base-multilingual-uncased/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/MasakhaNEWSClusteringP2P.json diff --git a/results/bert-base-multilingual-uncased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/bert-base-multilingual-uncased/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/bert-base-multilingual-uncased/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/MasakhaNEWSClusteringS2S.json diff --git a/results/bert-base-multilingual-uncased/MassiveIntentClassification.json b/outputs/benchmark_results/bert-base-multilingual-uncased/MassiveIntentClassification.json similarity index 100% rename from results/bert-base-multilingual-uncased/MassiveIntentClassification.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/MassiveIntentClassification.json diff --git a/results/bert-base-multilingual-uncased/MassiveScenarioClassification.json b/outputs/benchmark_results/bert-base-multilingual-uncased/MassiveScenarioClassification.json similarity index 100% rename from results/bert-base-multilingual-uncased/MassiveScenarioClassification.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/MassiveScenarioClassification.json diff --git a/results/bert-base-multilingual-uncased/MintakaRetrieval.json b/outputs/benchmark_results/bert-base-multilingual-uncased/MintakaRetrieval.json similarity index 100% rename from results/bert-base-multilingual-uncased/MintakaRetrieval.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/MintakaRetrieval.json diff --git a/results/bert-base-multilingual-uncased/OpusparcusPC.json b/outputs/benchmark_results/bert-base-multilingual-uncased/OpusparcusPC.json similarity index 100% rename from results/bert-base-multilingual-uncased/OpusparcusPC.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/OpusparcusPC.json diff --git a/results/bert-base-multilingual-uncased/PawsX.json b/outputs/benchmark_results/bert-base-multilingual-uncased/PawsX.json similarity index 100% rename from results/bert-base-multilingual-uncased/PawsX.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/PawsX.json diff --git a/results/bert-base-multilingual-uncased/SICKFr.json b/outputs/benchmark_results/bert-base-multilingual-uncased/SICKFr.json similarity index 100% rename from results/bert-base-multilingual-uncased/SICKFr.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/SICKFr.json diff --git a/results/bert-base-multilingual-uncased/STS22.json b/outputs/benchmark_results/bert-base-multilingual-uncased/STS22.json similarity index 100% rename from results/bert-base-multilingual-uncased/STS22.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/STS22.json diff --git a/results/bert-base-multilingual-uncased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/bert-base-multilingual-uncased/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/bert-base-multilingual-uncased/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/STSBenchmarkMultilingualSTS.json diff --git a/results/bert-base-multilingual-uncased/SummEvalFr.json b/outputs/benchmark_results/bert-base-multilingual-uncased/SummEvalFr.json similarity index 100% rename from results/bert-base-multilingual-uncased/SummEvalFr.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/SummEvalFr.json diff --git a/results/bert-base-multilingual-uncased/SyntecReranking.json b/outputs/benchmark_results/bert-base-multilingual-uncased/SyntecReranking.json similarity index 100% rename from results/bert-base-multilingual-uncased/SyntecReranking.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/SyntecReranking.json diff --git a/results/bert-base-multilingual-uncased/SyntecRetrieval.json b/outputs/benchmark_results/bert-base-multilingual-uncased/SyntecRetrieval.json similarity index 100% rename from results/bert-base-multilingual-uncased/SyntecRetrieval.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/SyntecRetrieval.json diff --git a/results/bert-base-multilingual-uncased/XPQARetrieval.json b/outputs/benchmark_results/bert-base-multilingual-uncased/XPQARetrieval.json similarity index 100% rename from results/bert-base-multilingual-uncased/XPQARetrieval.json rename to outputs/benchmark_results/bert-base-multilingual-uncased/XPQARetrieval.json diff --git a/results/camembert/camembert-base/AlloProfClusteringP2P.json b/outputs/benchmark_results/camembert/camembert-base/AlloProfClusteringP2P.json similarity index 100% rename from results/camembert/camembert-base/AlloProfClusteringP2P.json rename to outputs/benchmark_results/camembert/camembert-base/AlloProfClusteringP2P.json diff --git a/results/camembert/camembert-base/AlloProfClusteringS2S.json b/outputs/benchmark_results/camembert/camembert-base/AlloProfClusteringS2S.json similarity index 100% rename from results/camembert/camembert-base/AlloProfClusteringS2S.json rename to outputs/benchmark_results/camembert/camembert-base/AlloProfClusteringS2S.json diff --git a/results/camembert/camembert-base/AlloprofReranking.json b/outputs/benchmark_results/camembert/camembert-base/AlloprofReranking.json similarity index 100% rename from results/camembert/camembert-base/AlloprofReranking.json rename to outputs/benchmark_results/camembert/camembert-base/AlloprofReranking.json diff --git a/results/camembert/camembert-base/AlloprofRetrieval.json b/outputs/benchmark_results/camembert/camembert-base/AlloprofRetrieval.json similarity index 100% rename from results/camembert/camembert-base/AlloprofRetrieval.json rename to outputs/benchmark_results/camembert/camembert-base/AlloprofRetrieval.json diff --git a/results/camembert/camembert-base/AmazonReviewsClassification.json b/outputs/benchmark_results/camembert/camembert-base/AmazonReviewsClassification.json similarity index 100% rename from results/camembert/camembert-base/AmazonReviewsClassification.json rename to outputs/benchmark_results/camembert/camembert-base/AmazonReviewsClassification.json diff --git a/results/camembert/camembert-base/BSARDRetrieval.json b/outputs/benchmark_results/camembert/camembert-base/BSARDRetrieval.json similarity index 100% rename from results/camembert/camembert-base/BSARDRetrieval.json rename to outputs/benchmark_results/camembert/camembert-base/BSARDRetrieval.json diff --git a/results/camembert/camembert-base/DiaBLaBitextMining.json b/outputs/benchmark_results/camembert/camembert-base/DiaBLaBitextMining.json similarity index 100% rename from results/camembert/camembert-base/DiaBLaBitextMining.json rename to outputs/benchmark_results/camembert/camembert-base/DiaBLaBitextMining.json diff --git a/results/camembert/camembert-base/FloresBitextMining.json b/outputs/benchmark_results/camembert/camembert-base/FloresBitextMining.json similarity index 100% rename from results/camembert/camembert-base/FloresBitextMining.json rename to outputs/benchmark_results/camembert/camembert-base/FloresBitextMining.json diff --git a/results/camembert/camembert-base/HALClusteringS2S.json b/outputs/benchmark_results/camembert/camembert-base/HALClusteringS2S.json similarity index 100% rename from results/camembert/camembert-base/HALClusteringS2S.json rename to outputs/benchmark_results/camembert/camembert-base/HALClusteringS2S.json diff --git a/results/camembert/camembert-base/MLSUMClusteringP2P.json b/outputs/benchmark_results/camembert/camembert-base/MLSUMClusteringP2P.json similarity index 100% rename from results/camembert/camembert-base/MLSUMClusteringP2P.json rename to outputs/benchmark_results/camembert/camembert-base/MLSUMClusteringP2P.json diff --git a/results/camembert/camembert-base/MLSUMClusteringS2S.json b/outputs/benchmark_results/camembert/camembert-base/MLSUMClusteringS2S.json similarity index 100% rename from results/camembert/camembert-base/MLSUMClusteringS2S.json rename to outputs/benchmark_results/camembert/camembert-base/MLSUMClusteringS2S.json diff --git a/results/camembert/camembert-base/MTOPDomainClassification.json b/outputs/benchmark_results/camembert/camembert-base/MTOPDomainClassification.json similarity index 100% rename from results/camembert/camembert-base/MTOPDomainClassification.json rename to outputs/benchmark_results/camembert/camembert-base/MTOPDomainClassification.json diff --git a/results/camembert/camembert-base/MTOPIntentClassification.json b/outputs/benchmark_results/camembert/camembert-base/MTOPIntentClassification.json similarity index 100% rename from results/camembert/camembert-base/MTOPIntentClassification.json rename to outputs/benchmark_results/camembert/camembert-base/MTOPIntentClassification.json diff --git a/results/camembert/camembert-base/MasakhaNEWSClassification.json b/outputs/benchmark_results/camembert/camembert-base/MasakhaNEWSClassification.json similarity index 100% rename from results/camembert/camembert-base/MasakhaNEWSClassification.json rename to outputs/benchmark_results/camembert/camembert-base/MasakhaNEWSClassification.json diff --git a/results/camembert/camembert-base/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/camembert/camembert-base/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/camembert/camembert-base/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/camembert/camembert-base/MasakhaNEWSClusteringP2P.json diff --git a/results/camembert/camembert-base/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/camembert/camembert-base/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/camembert/camembert-base/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/camembert/camembert-base/MasakhaNEWSClusteringS2S.json diff --git a/results/camembert/camembert-base/MassiveIntentClassification.json b/outputs/benchmark_results/camembert/camembert-base/MassiveIntentClassification.json similarity index 100% rename from results/camembert/camembert-base/MassiveIntentClassification.json rename to outputs/benchmark_results/camembert/camembert-base/MassiveIntentClassification.json diff --git a/results/camembert/camembert-base/MassiveScenarioClassification.json b/outputs/benchmark_results/camembert/camembert-base/MassiveScenarioClassification.json similarity index 100% rename from results/camembert/camembert-base/MassiveScenarioClassification.json rename to outputs/benchmark_results/camembert/camembert-base/MassiveScenarioClassification.json diff --git a/results/camembert/camembert-base/MintakaRetrieval.json b/outputs/benchmark_results/camembert/camembert-base/MintakaRetrieval.json similarity index 100% rename from results/camembert/camembert-base/MintakaRetrieval.json rename to outputs/benchmark_results/camembert/camembert-base/MintakaRetrieval.json diff --git a/results/camembert/camembert-base/OpusparcusPC.json b/outputs/benchmark_results/camembert/camembert-base/OpusparcusPC.json similarity index 100% rename from results/camembert/camembert-base/OpusparcusPC.json rename to outputs/benchmark_results/camembert/camembert-base/OpusparcusPC.json diff --git a/results/camembert/camembert-base/PawsX.json b/outputs/benchmark_results/camembert/camembert-base/PawsX.json similarity index 100% rename from results/camembert/camembert-base/PawsX.json rename to outputs/benchmark_results/camembert/camembert-base/PawsX.json diff --git a/results/camembert/camembert-base/SICKFr.json b/outputs/benchmark_results/camembert/camembert-base/SICKFr.json similarity index 100% rename from results/camembert/camembert-base/SICKFr.json rename to outputs/benchmark_results/camembert/camembert-base/SICKFr.json diff --git a/results/camembert/camembert-base/STS22.json b/outputs/benchmark_results/camembert/camembert-base/STS22.json similarity index 100% rename from results/camembert/camembert-base/STS22.json rename to outputs/benchmark_results/camembert/camembert-base/STS22.json diff --git a/results/camembert/camembert-base/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/camembert/camembert-base/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/camembert/camembert-base/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/camembert/camembert-base/STSBenchmarkMultilingualSTS.json diff --git a/results/camembert/camembert-base/SummEvalFr.json b/outputs/benchmark_results/camembert/camembert-base/SummEvalFr.json similarity index 100% rename from results/camembert/camembert-base/SummEvalFr.json rename to outputs/benchmark_results/camembert/camembert-base/SummEvalFr.json diff --git a/results/camembert/camembert-base/SyntecReranking.json b/outputs/benchmark_results/camembert/camembert-base/SyntecReranking.json similarity index 100% rename from results/camembert/camembert-base/SyntecReranking.json rename to outputs/benchmark_results/camembert/camembert-base/SyntecReranking.json diff --git a/results/camembert/camembert-base/SyntecRetrieval.json b/outputs/benchmark_results/camembert/camembert-base/SyntecRetrieval.json similarity index 100% rename from results/camembert/camembert-base/SyntecRetrieval.json rename to outputs/benchmark_results/camembert/camembert-base/SyntecRetrieval.json diff --git a/results/camembert/camembert-base/XPQARetrieval.json b/outputs/benchmark_results/camembert/camembert-base/XPQARetrieval.json similarity index 100% rename from results/camembert/camembert-base/XPQARetrieval.json rename to outputs/benchmark_results/camembert/camembert-base/XPQARetrieval.json diff --git a/results/camembert/camembert-large/AlloProfClusteringP2P.json b/outputs/benchmark_results/camembert/camembert-large/AlloProfClusteringP2P.json similarity index 100% rename from results/camembert/camembert-large/AlloProfClusteringP2P.json rename to outputs/benchmark_results/camembert/camembert-large/AlloProfClusteringP2P.json diff --git a/results/camembert/camembert-large/AlloProfClusteringS2S.json b/outputs/benchmark_results/camembert/camembert-large/AlloProfClusteringS2S.json similarity index 100% rename from results/camembert/camembert-large/AlloProfClusteringS2S.json rename to outputs/benchmark_results/camembert/camembert-large/AlloProfClusteringS2S.json diff --git a/results/camembert/camembert-large/AlloprofReranking.json b/outputs/benchmark_results/camembert/camembert-large/AlloprofReranking.json similarity index 100% rename from results/camembert/camembert-large/AlloprofReranking.json rename to outputs/benchmark_results/camembert/camembert-large/AlloprofReranking.json diff --git a/results/camembert/camembert-large/AlloprofRetrieval.json b/outputs/benchmark_results/camembert/camembert-large/AlloprofRetrieval.json similarity index 100% rename from results/camembert/camembert-large/AlloprofRetrieval.json rename to outputs/benchmark_results/camembert/camembert-large/AlloprofRetrieval.json diff --git a/results/camembert/camembert-large/AmazonReviewsClassification.json b/outputs/benchmark_results/camembert/camembert-large/AmazonReviewsClassification.json similarity index 100% rename from results/camembert/camembert-large/AmazonReviewsClassification.json rename to outputs/benchmark_results/camembert/camembert-large/AmazonReviewsClassification.json diff --git a/results/camembert/camembert-large/BSARDRetrieval.json b/outputs/benchmark_results/camembert/camembert-large/BSARDRetrieval.json similarity index 100% rename from results/camembert/camembert-large/BSARDRetrieval.json rename to outputs/benchmark_results/camembert/camembert-large/BSARDRetrieval.json diff --git a/results/camembert/camembert-large/DiaBLaBitextMining.json b/outputs/benchmark_results/camembert/camembert-large/DiaBLaBitextMining.json similarity index 100% rename from results/camembert/camembert-large/DiaBLaBitextMining.json rename to outputs/benchmark_results/camembert/camembert-large/DiaBLaBitextMining.json diff --git a/results/camembert/camembert-large/FloresBitextMining.json b/outputs/benchmark_results/camembert/camembert-large/FloresBitextMining.json similarity index 100% rename from results/camembert/camembert-large/FloresBitextMining.json rename to outputs/benchmark_results/camembert/camembert-large/FloresBitextMining.json diff --git a/results/camembert/camembert-large/HALClusteringS2S.json b/outputs/benchmark_results/camembert/camembert-large/HALClusteringS2S.json similarity index 100% rename from results/camembert/camembert-large/HALClusteringS2S.json rename to outputs/benchmark_results/camembert/camembert-large/HALClusteringS2S.json diff --git a/results/camembert/camembert-large/MLSUMClusteringP2P.json b/outputs/benchmark_results/camembert/camembert-large/MLSUMClusteringP2P.json similarity index 100% rename from results/camembert/camembert-large/MLSUMClusteringP2P.json rename to outputs/benchmark_results/camembert/camembert-large/MLSUMClusteringP2P.json diff --git a/results/camembert/camembert-large/MLSUMClusteringS2S.json b/outputs/benchmark_results/camembert/camembert-large/MLSUMClusteringS2S.json similarity index 100% rename from results/camembert/camembert-large/MLSUMClusteringS2S.json rename to outputs/benchmark_results/camembert/camembert-large/MLSUMClusteringS2S.json diff --git a/results/camembert/camembert-large/MTOPDomainClassification.json b/outputs/benchmark_results/camembert/camembert-large/MTOPDomainClassification.json similarity index 100% rename from results/camembert/camembert-large/MTOPDomainClassification.json rename to outputs/benchmark_results/camembert/camembert-large/MTOPDomainClassification.json diff --git a/results/camembert/camembert-large/MTOPIntentClassification.json b/outputs/benchmark_results/camembert/camembert-large/MTOPIntentClassification.json similarity index 100% rename from results/camembert/camembert-large/MTOPIntentClassification.json rename to outputs/benchmark_results/camembert/camembert-large/MTOPIntentClassification.json diff --git a/results/camembert/camembert-large/MasakhaNEWSClassification.json b/outputs/benchmark_results/camembert/camembert-large/MasakhaNEWSClassification.json similarity index 100% rename from results/camembert/camembert-large/MasakhaNEWSClassification.json rename to outputs/benchmark_results/camembert/camembert-large/MasakhaNEWSClassification.json diff --git a/results/camembert/camembert-large/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/camembert/camembert-large/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/camembert/camembert-large/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/camembert/camembert-large/MasakhaNEWSClusteringP2P.json diff --git a/results/camembert/camembert-large/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/camembert/camembert-large/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/camembert/camembert-large/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/camembert/camembert-large/MasakhaNEWSClusteringS2S.json diff --git a/results/camembert/camembert-large/MassiveIntentClassification.json b/outputs/benchmark_results/camembert/camembert-large/MassiveIntentClassification.json similarity index 100% rename from results/camembert/camembert-large/MassiveIntentClassification.json rename to outputs/benchmark_results/camembert/camembert-large/MassiveIntentClassification.json diff --git a/results/camembert/camembert-large/MassiveScenarioClassification.json b/outputs/benchmark_results/camembert/camembert-large/MassiveScenarioClassification.json similarity index 100% rename from results/camembert/camembert-large/MassiveScenarioClassification.json rename to outputs/benchmark_results/camembert/camembert-large/MassiveScenarioClassification.json diff --git a/results/camembert/camembert-large/MintakaRetrieval.json b/outputs/benchmark_results/camembert/camembert-large/MintakaRetrieval.json similarity index 100% rename from results/camembert/camembert-large/MintakaRetrieval.json rename to outputs/benchmark_results/camembert/camembert-large/MintakaRetrieval.json diff --git a/results/camembert/camembert-large/OpusparcusPC.json b/outputs/benchmark_results/camembert/camembert-large/OpusparcusPC.json similarity index 100% rename from results/camembert/camembert-large/OpusparcusPC.json rename to outputs/benchmark_results/camembert/camembert-large/OpusparcusPC.json diff --git a/results/camembert/camembert-large/PawsX.json b/outputs/benchmark_results/camembert/camembert-large/PawsX.json similarity index 100% rename from results/camembert/camembert-large/PawsX.json rename to outputs/benchmark_results/camembert/camembert-large/PawsX.json diff --git a/results/camembert/camembert-large/SICKFr.json b/outputs/benchmark_results/camembert/camembert-large/SICKFr.json similarity index 100% rename from results/camembert/camembert-large/SICKFr.json rename to outputs/benchmark_results/camembert/camembert-large/SICKFr.json diff --git a/results/camembert/camembert-large/STS22.json b/outputs/benchmark_results/camembert/camembert-large/STS22.json similarity index 100% rename from results/camembert/camembert-large/STS22.json rename to outputs/benchmark_results/camembert/camembert-large/STS22.json diff --git a/results/camembert/camembert-large/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/camembert/camembert-large/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/camembert/camembert-large/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/camembert/camembert-large/STSBenchmarkMultilingualSTS.json diff --git a/results/camembert/camembert-large/SummEvalFr.json b/outputs/benchmark_results/camembert/camembert-large/SummEvalFr.json similarity index 100% rename from results/camembert/camembert-large/SummEvalFr.json rename to outputs/benchmark_results/camembert/camembert-large/SummEvalFr.json diff --git a/results/camembert/camembert-large/SyntecReranking.json b/outputs/benchmark_results/camembert/camembert-large/SyntecReranking.json similarity index 100% rename from results/camembert/camembert-large/SyntecReranking.json rename to outputs/benchmark_results/camembert/camembert-large/SyntecReranking.json diff --git a/results/camembert/camembert-large/SyntecRetrieval.json b/outputs/benchmark_results/camembert/camembert-large/SyntecRetrieval.json similarity index 100% rename from results/camembert/camembert-large/SyntecRetrieval.json rename to outputs/benchmark_results/camembert/camembert-large/SyntecRetrieval.json diff --git a/results/camembert/camembert-large/XPQARetrieval.json b/outputs/benchmark_results/camembert/camembert-large/XPQARetrieval.json similarity index 100% rename from results/camembert/camembert-large/XPQARetrieval.json rename to outputs/benchmark_results/camembert/camembert-large/XPQARetrieval.json diff --git a/results/dangvantuan/sentence-camembert-base/AlloProfClusteringP2P.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/AlloProfClusteringP2P.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/AlloProfClusteringP2P.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/AlloProfClusteringP2P.json diff --git a/results/dangvantuan/sentence-camembert-base/AlloProfClusteringS2S.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/AlloProfClusteringS2S.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/AlloProfClusteringS2S.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/AlloProfClusteringS2S.json diff --git a/results/dangvantuan/sentence-camembert-base/AlloprofReranking.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/AlloprofReranking.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/AlloprofReranking.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/AlloprofReranking.json diff --git a/results/dangvantuan/sentence-camembert-base/AlloprofRetrieval.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/AlloprofRetrieval.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/AlloprofRetrieval.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/AlloprofRetrieval.json diff --git a/results/dangvantuan/sentence-camembert-base/AmazonReviewsClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/AmazonReviewsClassification.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/AmazonReviewsClassification.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/AmazonReviewsClassification.json diff --git a/results/dangvantuan/sentence-camembert-base/BSARDRetrieval.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/BSARDRetrieval.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/BSARDRetrieval.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/BSARDRetrieval.json diff --git a/results/dangvantuan/sentence-camembert-base/DiaBLaBitextMining.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/DiaBLaBitextMining.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/DiaBLaBitextMining.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/DiaBLaBitextMining.json diff --git a/results/dangvantuan/sentence-camembert-base/FloresBitextMining.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/FloresBitextMining.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/FloresBitextMining.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/FloresBitextMining.json diff --git a/results/dangvantuan/sentence-camembert-base/HALClusteringS2S.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/HALClusteringS2S.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/HALClusteringS2S.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/HALClusteringS2S.json diff --git a/results/dangvantuan/sentence-camembert-base/MLSUMClusteringP2P.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/MLSUMClusteringP2P.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/MLSUMClusteringP2P.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/MLSUMClusteringP2P.json diff --git a/results/dangvantuan/sentence-camembert-base/MLSUMClusteringS2S.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/MLSUMClusteringS2S.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/MLSUMClusteringS2S.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/MLSUMClusteringS2S.json diff --git a/results/dangvantuan/sentence-camembert-base/MTOPDomainClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/MTOPDomainClassification.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/MTOPDomainClassification.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/MTOPDomainClassification.json diff --git a/results/dangvantuan/sentence-camembert-base/MTOPIntentClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/MTOPIntentClassification.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/MTOPIntentClassification.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/MTOPIntentClassification.json diff --git a/results/dangvantuan/sentence-camembert-base/MasakhaNEWSClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/MasakhaNEWSClassification.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/MasakhaNEWSClassification.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/MasakhaNEWSClassification.json diff --git a/results/dangvantuan/sentence-camembert-base/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/MasakhaNEWSClusteringP2P.json diff --git a/results/dangvantuan/sentence-camembert-base/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/MasakhaNEWSClusteringS2S.json diff --git a/results/dangvantuan/sentence-camembert-base/MassiveIntentClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/MassiveIntentClassification.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/MassiveIntentClassification.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/MassiveIntentClassification.json diff --git a/results/dangvantuan/sentence-camembert-base/MassiveScenarioClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/MassiveScenarioClassification.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/MassiveScenarioClassification.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/MassiveScenarioClassification.json diff --git a/results/dangvantuan/sentence-camembert-base/MintakaRetrieval.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/MintakaRetrieval.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/MintakaRetrieval.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/MintakaRetrieval.json diff --git a/results/dangvantuan/sentence-camembert-base/OpusparcusPC.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/OpusparcusPC.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/OpusparcusPC.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/OpusparcusPC.json diff --git a/results/dangvantuan/sentence-camembert-base/PawsX.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/PawsX.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/PawsX.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/PawsX.json diff --git a/results/dangvantuan/sentence-camembert-base/SICKFr.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/SICKFr.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/SICKFr.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/SICKFr.json diff --git a/results/dangvantuan/sentence-camembert-base/STS22.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/STS22.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/STS22.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/STS22.json diff --git a/results/dangvantuan/sentence-camembert-base/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/STSBenchmarkMultilingualSTS.json diff --git a/results/dangvantuan/sentence-camembert-base/SummEvalFr.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/SummEvalFr.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/SummEvalFr.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/SummEvalFr.json diff --git a/results/dangvantuan/sentence-camembert-base/SyntecReranking.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/SyntecReranking.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/SyntecReranking.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/SyntecReranking.json diff --git a/results/dangvantuan/sentence-camembert-base/SyntecRetrieval.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/SyntecRetrieval.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/SyntecRetrieval.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/SyntecRetrieval.json diff --git a/results/dangvantuan/sentence-camembert-base/XPQARetrieval.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-base/XPQARetrieval.json similarity index 100% rename from results/dangvantuan/sentence-camembert-base/XPQARetrieval.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-base/XPQARetrieval.json diff --git a/results/dangvantuan/sentence-camembert-large/AlloProfClusteringP2P.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/AlloProfClusteringP2P.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/AlloProfClusteringP2P.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/AlloProfClusteringP2P.json diff --git a/results/dangvantuan/sentence-camembert-large/AlloProfClusteringS2S.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/AlloProfClusteringS2S.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/AlloProfClusteringS2S.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/AlloProfClusteringS2S.json diff --git a/results/dangvantuan/sentence-camembert-large/AlloprofReranking.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/AlloprofReranking.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/AlloprofReranking.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/AlloprofReranking.json diff --git a/results/dangvantuan/sentence-camembert-large/AlloprofRetrieval.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/AlloprofRetrieval.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/AlloprofRetrieval.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/AlloprofRetrieval.json diff --git a/results/dangvantuan/sentence-camembert-large/AmazonReviewsClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/AmazonReviewsClassification.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/AmazonReviewsClassification.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/AmazonReviewsClassification.json diff --git a/results/dangvantuan/sentence-camembert-large/BSARDRetrieval.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/BSARDRetrieval.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/BSARDRetrieval.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/BSARDRetrieval.json diff --git a/results/dangvantuan/sentence-camembert-large/DiaBLaBitextMining.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/DiaBLaBitextMining.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/DiaBLaBitextMining.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/DiaBLaBitextMining.json diff --git a/results/dangvantuan/sentence-camembert-large/FloresBitextMining.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/FloresBitextMining.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/FloresBitextMining.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/FloresBitextMining.json diff --git a/results/dangvantuan/sentence-camembert-large/HALClusteringS2S.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/HALClusteringS2S.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/HALClusteringS2S.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/HALClusteringS2S.json diff --git a/results/dangvantuan/sentence-camembert-large/MLSUMClusteringP2P.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/MLSUMClusteringP2P.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/MLSUMClusteringP2P.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/MLSUMClusteringP2P.json diff --git a/results/dangvantuan/sentence-camembert-large/MLSUMClusteringS2S.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/MLSUMClusteringS2S.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/MLSUMClusteringS2S.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/MLSUMClusteringS2S.json diff --git a/results/dangvantuan/sentence-camembert-large/MTOPDomainClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/MTOPDomainClassification.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/MTOPDomainClassification.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/MTOPDomainClassification.json diff --git a/results/dangvantuan/sentence-camembert-large/MTOPIntentClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/MTOPIntentClassification.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/MTOPIntentClassification.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/MTOPIntentClassification.json diff --git a/results/dangvantuan/sentence-camembert-large/MasakhaNEWSClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/MasakhaNEWSClassification.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/MasakhaNEWSClassification.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/MasakhaNEWSClassification.json diff --git a/results/dangvantuan/sentence-camembert-large/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/MasakhaNEWSClusteringP2P.json diff --git a/results/dangvantuan/sentence-camembert-large/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/MasakhaNEWSClusteringS2S.json diff --git a/results/dangvantuan/sentence-camembert-large/MassiveIntentClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/MassiveIntentClassification.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/MassiveIntentClassification.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/MassiveIntentClassification.json diff --git a/results/dangvantuan/sentence-camembert-large/MassiveScenarioClassification.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/MassiveScenarioClassification.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/MassiveScenarioClassification.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/MassiveScenarioClassification.json diff --git a/results/dangvantuan/sentence-camembert-large/MintakaRetrieval.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/MintakaRetrieval.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/MintakaRetrieval.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/MintakaRetrieval.json diff --git a/results/dangvantuan/sentence-camembert-large/OpusparcusPC.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/OpusparcusPC.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/OpusparcusPC.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/OpusparcusPC.json diff --git a/results/dangvantuan/sentence-camembert-large/PawsX.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/PawsX.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/PawsX.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/PawsX.json diff --git a/results/dangvantuan/sentence-camembert-large/SICKFr.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/SICKFr.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/SICKFr.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/SICKFr.json diff --git a/results/dangvantuan/sentence-camembert-large/STS22.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/STS22.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/STS22.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/STS22.json diff --git a/results/dangvantuan/sentence-camembert-large/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/STSBenchmarkMultilingualSTS.json diff --git a/results/dangvantuan/sentence-camembert-large/SummEvalFr.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/SummEvalFr.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/SummEvalFr.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/SummEvalFr.json diff --git a/results/dangvantuan/sentence-camembert-large/SyntecReranking.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/SyntecReranking.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/SyntecReranking.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/SyntecReranking.json diff --git a/results/dangvantuan/sentence-camembert-large/SyntecRetrieval.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/SyntecRetrieval.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/SyntecRetrieval.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/SyntecRetrieval.json diff --git a/results/dangvantuan/sentence-camembert-large/XPQARetrieval.json b/outputs/benchmark_results/dangvantuan/sentence-camembert-large/XPQARetrieval.json similarity index 100% rename from results/dangvantuan/sentence-camembert-large/XPQARetrieval.json rename to outputs/benchmark_results/dangvantuan/sentence-camembert-large/XPQARetrieval.json diff --git a/results/distilbert-base-uncased/AlloProfClusteringP2P.json b/outputs/benchmark_results/distilbert-base-uncased/AlloProfClusteringP2P.json similarity index 100% rename from results/distilbert-base-uncased/AlloProfClusteringP2P.json rename to outputs/benchmark_results/distilbert-base-uncased/AlloProfClusteringP2P.json diff --git a/results/distilbert-base-uncased/AlloProfClusteringS2S.json b/outputs/benchmark_results/distilbert-base-uncased/AlloProfClusteringS2S.json similarity index 100% rename from results/distilbert-base-uncased/AlloProfClusteringS2S.json rename to outputs/benchmark_results/distilbert-base-uncased/AlloProfClusteringS2S.json diff --git a/results/distilbert-base-uncased/AlloprofReranking.json b/outputs/benchmark_results/distilbert-base-uncased/AlloprofReranking.json similarity index 100% rename from results/distilbert-base-uncased/AlloprofReranking.json rename to outputs/benchmark_results/distilbert-base-uncased/AlloprofReranking.json diff --git a/results/distilbert-base-uncased/AlloprofRetrieval.json b/outputs/benchmark_results/distilbert-base-uncased/AlloprofRetrieval.json similarity index 100% rename from results/distilbert-base-uncased/AlloprofRetrieval.json rename to outputs/benchmark_results/distilbert-base-uncased/AlloprofRetrieval.json diff --git a/results/distilbert-base-uncased/AmazonReviewsClassification.json b/outputs/benchmark_results/distilbert-base-uncased/AmazonReviewsClassification.json similarity index 100% rename from results/distilbert-base-uncased/AmazonReviewsClassification.json rename to outputs/benchmark_results/distilbert-base-uncased/AmazonReviewsClassification.json diff --git a/results/distilbert-base-uncased/BSARDRetrieval.json b/outputs/benchmark_results/distilbert-base-uncased/BSARDRetrieval.json similarity index 100% rename from results/distilbert-base-uncased/BSARDRetrieval.json rename to outputs/benchmark_results/distilbert-base-uncased/BSARDRetrieval.json diff --git a/results/distilbert-base-uncased/DiaBLaBitextMining.json b/outputs/benchmark_results/distilbert-base-uncased/DiaBLaBitextMining.json similarity index 100% rename from results/distilbert-base-uncased/DiaBLaBitextMining.json rename to outputs/benchmark_results/distilbert-base-uncased/DiaBLaBitextMining.json diff --git a/results/distilbert-base-uncased/FloresBitextMining.json b/outputs/benchmark_results/distilbert-base-uncased/FloresBitextMining.json similarity index 100% rename from results/distilbert-base-uncased/FloresBitextMining.json rename to outputs/benchmark_results/distilbert-base-uncased/FloresBitextMining.json diff --git a/results/distilbert-base-uncased/HALClusteringS2S.json b/outputs/benchmark_results/distilbert-base-uncased/HALClusteringS2S.json similarity index 100% rename from results/distilbert-base-uncased/HALClusteringS2S.json rename to outputs/benchmark_results/distilbert-base-uncased/HALClusteringS2S.json diff --git a/results/distilbert-base-uncased/MLSUMClusteringP2P.json b/outputs/benchmark_results/distilbert-base-uncased/MLSUMClusteringP2P.json similarity index 100% rename from results/distilbert-base-uncased/MLSUMClusteringP2P.json rename to outputs/benchmark_results/distilbert-base-uncased/MLSUMClusteringP2P.json diff --git a/results/distilbert-base-uncased/MLSUMClusteringS2S.json b/outputs/benchmark_results/distilbert-base-uncased/MLSUMClusteringS2S.json similarity index 100% rename from results/distilbert-base-uncased/MLSUMClusteringS2S.json rename to outputs/benchmark_results/distilbert-base-uncased/MLSUMClusteringS2S.json diff --git a/results/distilbert-base-uncased/MTOPDomainClassification.json b/outputs/benchmark_results/distilbert-base-uncased/MTOPDomainClassification.json similarity index 100% rename from results/distilbert-base-uncased/MTOPDomainClassification.json rename to outputs/benchmark_results/distilbert-base-uncased/MTOPDomainClassification.json diff --git a/results/distilbert-base-uncased/MTOPIntentClassification.json b/outputs/benchmark_results/distilbert-base-uncased/MTOPIntentClassification.json similarity index 100% rename from results/distilbert-base-uncased/MTOPIntentClassification.json rename to outputs/benchmark_results/distilbert-base-uncased/MTOPIntentClassification.json diff --git a/results/distilbert-base-uncased/MasakhaNEWSClassification.json b/outputs/benchmark_results/distilbert-base-uncased/MasakhaNEWSClassification.json similarity index 100% rename from results/distilbert-base-uncased/MasakhaNEWSClassification.json rename to outputs/benchmark_results/distilbert-base-uncased/MasakhaNEWSClassification.json diff --git a/results/distilbert-base-uncased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/distilbert-base-uncased/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/distilbert-base-uncased/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/distilbert-base-uncased/MasakhaNEWSClusteringP2P.json diff --git a/results/distilbert-base-uncased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/distilbert-base-uncased/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/distilbert-base-uncased/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/distilbert-base-uncased/MasakhaNEWSClusteringS2S.json diff --git a/results/distilbert-base-uncased/MassiveIntentClassification.json b/outputs/benchmark_results/distilbert-base-uncased/MassiveIntentClassification.json similarity index 100% rename from results/distilbert-base-uncased/MassiveIntentClassification.json rename to outputs/benchmark_results/distilbert-base-uncased/MassiveIntentClassification.json diff --git a/results/distilbert-base-uncased/MassiveScenarioClassification.json b/outputs/benchmark_results/distilbert-base-uncased/MassiveScenarioClassification.json similarity index 100% rename from results/distilbert-base-uncased/MassiveScenarioClassification.json rename to outputs/benchmark_results/distilbert-base-uncased/MassiveScenarioClassification.json diff --git a/results/distilbert-base-uncased/MintakaRetrieval.json b/outputs/benchmark_results/distilbert-base-uncased/MintakaRetrieval.json similarity index 100% rename from results/distilbert-base-uncased/MintakaRetrieval.json rename to outputs/benchmark_results/distilbert-base-uncased/MintakaRetrieval.json diff --git a/results/distilbert-base-uncased/OpusparcusPC.json b/outputs/benchmark_results/distilbert-base-uncased/OpusparcusPC.json similarity index 100% rename from results/distilbert-base-uncased/OpusparcusPC.json rename to outputs/benchmark_results/distilbert-base-uncased/OpusparcusPC.json diff --git a/results/distilbert-base-uncased/PawsX.json b/outputs/benchmark_results/distilbert-base-uncased/PawsX.json similarity index 100% rename from results/distilbert-base-uncased/PawsX.json rename to outputs/benchmark_results/distilbert-base-uncased/PawsX.json diff --git a/results/distilbert-base-uncased/SICKFr.json b/outputs/benchmark_results/distilbert-base-uncased/SICKFr.json similarity index 100% rename from results/distilbert-base-uncased/SICKFr.json rename to outputs/benchmark_results/distilbert-base-uncased/SICKFr.json diff --git a/results/distilbert-base-uncased/STS22.json b/outputs/benchmark_results/distilbert-base-uncased/STS22.json similarity index 100% rename from results/distilbert-base-uncased/STS22.json rename to outputs/benchmark_results/distilbert-base-uncased/STS22.json diff --git a/results/distilbert-base-uncased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/distilbert-base-uncased/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/distilbert-base-uncased/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/distilbert-base-uncased/STSBenchmarkMultilingualSTS.json diff --git a/results/distilbert-base-uncased/SummEvalFr.json b/outputs/benchmark_results/distilbert-base-uncased/SummEvalFr.json similarity index 100% rename from results/distilbert-base-uncased/SummEvalFr.json rename to outputs/benchmark_results/distilbert-base-uncased/SummEvalFr.json diff --git a/results/distilbert-base-uncased/SyntecReranking.json b/outputs/benchmark_results/distilbert-base-uncased/SyntecReranking.json similarity index 100% rename from results/distilbert-base-uncased/SyntecReranking.json rename to outputs/benchmark_results/distilbert-base-uncased/SyntecReranking.json diff --git a/results/distilbert-base-uncased/SyntecRetrieval.json b/outputs/benchmark_results/distilbert-base-uncased/SyntecRetrieval.json similarity index 100% rename from results/distilbert-base-uncased/SyntecRetrieval.json rename to outputs/benchmark_results/distilbert-base-uncased/SyntecRetrieval.json diff --git a/results/distilbert-base-uncased/XPQARetrieval.json b/outputs/benchmark_results/distilbert-base-uncased/XPQARetrieval.json similarity index 100% rename from results/distilbert-base-uncased/XPQARetrieval.json rename to outputs/benchmark_results/distilbert-base-uncased/XPQARetrieval.json diff --git a/results/flaubert/flaubert_base_cased/AlloProfClusteringP2P.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/AlloProfClusteringP2P.json similarity index 100% rename from results/flaubert/flaubert_base_cased/AlloProfClusteringP2P.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/AlloProfClusteringP2P.json diff --git a/results/flaubert/flaubert_base_cased/AlloProfClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/AlloProfClusteringS2S.json similarity index 100% rename from results/flaubert/flaubert_base_cased/AlloProfClusteringS2S.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/AlloProfClusteringS2S.json diff --git a/results/flaubert/flaubert_base_cased/AlloprofReranking.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/AlloprofReranking.json similarity index 100% rename from results/flaubert/flaubert_base_cased/AlloprofReranking.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/AlloprofReranking.json diff --git a/results/flaubert/flaubert_base_cased/AlloprofRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/AlloprofRetrieval.json similarity index 100% rename from results/flaubert/flaubert_base_cased/AlloprofRetrieval.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/AlloprofRetrieval.json diff --git a/results/flaubert/flaubert_base_cased/AmazonReviewsClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/AmazonReviewsClassification.json similarity index 100% rename from results/flaubert/flaubert_base_cased/AmazonReviewsClassification.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/AmazonReviewsClassification.json diff --git a/results/flaubert/flaubert_base_cased/BSARDRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/BSARDRetrieval.json similarity index 100% rename from results/flaubert/flaubert_base_cased/BSARDRetrieval.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/BSARDRetrieval.json diff --git a/results/flaubert/flaubert_base_cased/DiaBLaBitextMining.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/DiaBLaBitextMining.json similarity index 100% rename from results/flaubert/flaubert_base_cased/DiaBLaBitextMining.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/DiaBLaBitextMining.json diff --git a/results/flaubert/flaubert_base_cased/FloresBitextMining.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/FloresBitextMining.json similarity index 100% rename from results/flaubert/flaubert_base_cased/FloresBitextMining.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/FloresBitextMining.json diff --git a/results/flaubert/flaubert_base_cased/HALClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/HALClusteringS2S.json similarity index 100% rename from results/flaubert/flaubert_base_cased/HALClusteringS2S.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/HALClusteringS2S.json diff --git a/results/flaubert/flaubert_base_cased/MLSUMClusteringP2P.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/MLSUMClusteringP2P.json similarity index 100% rename from results/flaubert/flaubert_base_cased/MLSUMClusteringP2P.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/MLSUMClusteringP2P.json diff --git a/results/flaubert/flaubert_base_cased/MLSUMClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/MLSUMClusteringS2S.json similarity index 100% rename from results/flaubert/flaubert_base_cased/MLSUMClusteringS2S.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/MLSUMClusteringS2S.json diff --git a/results/flaubert/flaubert_base_cased/MTOPDomainClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/MTOPDomainClassification.json similarity index 100% rename from results/flaubert/flaubert_base_cased/MTOPDomainClassification.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/MTOPDomainClassification.json diff --git a/results/flaubert/flaubert_base_cased/MTOPIntentClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/MTOPIntentClassification.json similarity index 100% rename from results/flaubert/flaubert_base_cased/MTOPIntentClassification.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/MTOPIntentClassification.json diff --git a/results/flaubert/flaubert_base_cased/MasakhaNEWSClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/MasakhaNEWSClassification.json similarity index 100% rename from results/flaubert/flaubert_base_cased/MasakhaNEWSClassification.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/MasakhaNEWSClassification.json diff --git a/results/flaubert/flaubert_base_cased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/flaubert/flaubert_base_cased/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/MasakhaNEWSClusteringP2P.json diff --git a/results/flaubert/flaubert_base_cased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/flaubert/flaubert_base_cased/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/MasakhaNEWSClusteringS2S.json diff --git a/results/flaubert/flaubert_base_cased/MassiveIntentClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/MassiveIntentClassification.json similarity index 100% rename from results/flaubert/flaubert_base_cased/MassiveIntentClassification.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/MassiveIntentClassification.json diff --git a/results/flaubert/flaubert_base_cased/MassiveScenarioClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/MassiveScenarioClassification.json similarity index 100% rename from results/flaubert/flaubert_base_cased/MassiveScenarioClassification.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/MassiveScenarioClassification.json diff --git a/results/flaubert/flaubert_base_cased/MintakaRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/MintakaRetrieval.json similarity index 100% rename from results/flaubert/flaubert_base_cased/MintakaRetrieval.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/MintakaRetrieval.json diff --git a/results/flaubert/flaubert_base_cased/OpusparcusPC.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/OpusparcusPC.json similarity index 100% rename from results/flaubert/flaubert_base_cased/OpusparcusPC.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/OpusparcusPC.json diff --git a/results/flaubert/flaubert_base_cased/PawsX.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/PawsX.json similarity index 100% rename from results/flaubert/flaubert_base_cased/PawsX.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/PawsX.json diff --git a/results/flaubert/flaubert_base_cased/SICKFr.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/SICKFr.json similarity index 100% rename from results/flaubert/flaubert_base_cased/SICKFr.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/SICKFr.json diff --git a/results/flaubert/flaubert_base_cased/STS22.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/STS22.json similarity index 100% rename from results/flaubert/flaubert_base_cased/STS22.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/STS22.json diff --git a/results/flaubert/flaubert_base_cased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/flaubert/flaubert_base_cased/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/STSBenchmarkMultilingualSTS.json diff --git a/results/flaubert/flaubert_base_cased/SummEvalFr.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/SummEvalFr.json similarity index 100% rename from results/flaubert/flaubert_base_cased/SummEvalFr.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/SummEvalFr.json diff --git a/results/flaubert/flaubert_base_cased/SyntecReranking.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/SyntecReranking.json similarity index 100% rename from results/flaubert/flaubert_base_cased/SyntecReranking.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/SyntecReranking.json diff --git a/results/flaubert/flaubert_base_cased/SyntecRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/SyntecRetrieval.json similarity index 100% rename from results/flaubert/flaubert_base_cased/SyntecRetrieval.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/SyntecRetrieval.json diff --git a/results/flaubert/flaubert_base_cased/XPQARetrieval.json b/outputs/benchmark_results/flaubert/flaubert_base_cased/XPQARetrieval.json similarity index 100% rename from results/flaubert/flaubert_base_cased/XPQARetrieval.json rename to outputs/benchmark_results/flaubert/flaubert_base_cased/XPQARetrieval.json diff --git a/results/flaubert/flaubert_base_uncased/AlloProfClusteringP2P.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/AlloProfClusteringP2P.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/AlloProfClusteringP2P.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/AlloProfClusteringP2P.json diff --git a/results/flaubert/flaubert_base_uncased/AlloProfClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/AlloProfClusteringS2S.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/AlloProfClusteringS2S.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/AlloProfClusteringS2S.json diff --git a/results/flaubert/flaubert_base_uncased/AlloprofReranking.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/AlloprofReranking.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/AlloprofReranking.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/AlloprofReranking.json diff --git a/results/flaubert/flaubert_base_uncased/AlloprofRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/AlloprofRetrieval.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/AlloprofRetrieval.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/AlloprofRetrieval.json diff --git a/results/flaubert/flaubert_base_uncased/AmazonReviewsClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/AmazonReviewsClassification.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/AmazonReviewsClassification.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/AmazonReviewsClassification.json diff --git a/results/flaubert/flaubert_base_uncased/BSARDRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/BSARDRetrieval.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/BSARDRetrieval.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/BSARDRetrieval.json diff --git a/results/flaubert/flaubert_base_uncased/DiaBLaBitextMining.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/DiaBLaBitextMining.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/DiaBLaBitextMining.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/DiaBLaBitextMining.json diff --git a/results/flaubert/flaubert_base_uncased/FloresBitextMining.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/FloresBitextMining.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/FloresBitextMining.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/FloresBitextMining.json diff --git a/results/flaubert/flaubert_base_uncased/HALClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/HALClusteringS2S.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/HALClusteringS2S.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/HALClusteringS2S.json diff --git a/results/flaubert/flaubert_base_uncased/MLSUMClusteringP2P.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/MLSUMClusteringP2P.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/MLSUMClusteringP2P.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/MLSUMClusteringP2P.json diff --git a/results/flaubert/flaubert_base_uncased/MLSUMClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/MLSUMClusteringS2S.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/MLSUMClusteringS2S.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/MLSUMClusteringS2S.json diff --git a/results/flaubert/flaubert_base_uncased/MTOPDomainClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/MTOPDomainClassification.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/MTOPDomainClassification.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/MTOPDomainClassification.json diff --git a/results/flaubert/flaubert_base_uncased/MTOPIntentClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/MTOPIntentClassification.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/MTOPIntentClassification.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/MTOPIntentClassification.json diff --git a/results/flaubert/flaubert_base_uncased/MasakhaNEWSClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/MasakhaNEWSClassification.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/MasakhaNEWSClassification.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/MasakhaNEWSClassification.json diff --git a/results/flaubert/flaubert_base_uncased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/MasakhaNEWSClusteringP2P.json diff --git a/results/flaubert/flaubert_base_uncased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/MasakhaNEWSClusteringS2S.json diff --git a/results/flaubert/flaubert_base_uncased/MassiveIntentClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/MassiveIntentClassification.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/MassiveIntentClassification.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/MassiveIntentClassification.json diff --git a/results/flaubert/flaubert_base_uncased/MassiveScenarioClassification.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/MassiveScenarioClassification.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/MassiveScenarioClassification.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/MassiveScenarioClassification.json diff --git a/results/flaubert/flaubert_base_uncased/MintakaRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/MintakaRetrieval.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/MintakaRetrieval.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/MintakaRetrieval.json diff --git a/results/flaubert/flaubert_base_uncased/OpusparcusPC.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/OpusparcusPC.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/OpusparcusPC.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/OpusparcusPC.json diff --git a/results/flaubert/flaubert_base_uncased/PawsX.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/PawsX.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/PawsX.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/PawsX.json diff --git a/results/flaubert/flaubert_base_uncased/SICKFr.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/SICKFr.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/SICKFr.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/SICKFr.json diff --git a/results/flaubert/flaubert_base_uncased/STS22.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/STS22.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/STS22.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/STS22.json diff --git a/results/flaubert/flaubert_base_uncased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/STSBenchmarkMultilingualSTS.json diff --git a/results/flaubert/flaubert_base_uncased/SummEvalFr.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/SummEvalFr.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/SummEvalFr.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/SummEvalFr.json diff --git a/results/flaubert/flaubert_base_uncased/SyntecReranking.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/SyntecReranking.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/SyntecReranking.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/SyntecReranking.json diff --git a/results/flaubert/flaubert_base_uncased/SyntecRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/SyntecRetrieval.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/SyntecRetrieval.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/SyntecRetrieval.json diff --git a/results/flaubert/flaubert_base_uncased/XPQARetrieval.json b/outputs/benchmark_results/flaubert/flaubert_base_uncased/XPQARetrieval.json similarity index 100% rename from results/flaubert/flaubert_base_uncased/XPQARetrieval.json rename to outputs/benchmark_results/flaubert/flaubert_base_uncased/XPQARetrieval.json diff --git a/results/flaubert/flaubert_large_cased/AlloProfClusteringP2P.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/AlloProfClusteringP2P.json similarity index 100% rename from results/flaubert/flaubert_large_cased/AlloProfClusteringP2P.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/AlloProfClusteringP2P.json diff --git a/results/flaubert/flaubert_large_cased/AlloProfClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/AlloProfClusteringS2S.json similarity index 100% rename from results/flaubert/flaubert_large_cased/AlloProfClusteringS2S.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/AlloProfClusteringS2S.json diff --git a/results/flaubert/flaubert_large_cased/AlloprofReranking.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/AlloprofReranking.json similarity index 100% rename from results/flaubert/flaubert_large_cased/AlloprofReranking.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/AlloprofReranking.json diff --git a/results/flaubert/flaubert_large_cased/AlloprofRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/AlloprofRetrieval.json similarity index 100% rename from results/flaubert/flaubert_large_cased/AlloprofRetrieval.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/AlloprofRetrieval.json diff --git a/results/flaubert/flaubert_large_cased/AmazonReviewsClassification.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/AmazonReviewsClassification.json similarity index 100% rename from results/flaubert/flaubert_large_cased/AmazonReviewsClassification.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/AmazonReviewsClassification.json diff --git a/results/flaubert/flaubert_large_cased/BSARDRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/BSARDRetrieval.json similarity index 100% rename from results/flaubert/flaubert_large_cased/BSARDRetrieval.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/BSARDRetrieval.json diff --git a/results/flaubert/flaubert_large_cased/DiaBLaBitextMining.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/DiaBLaBitextMining.json similarity index 100% rename from results/flaubert/flaubert_large_cased/DiaBLaBitextMining.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/DiaBLaBitextMining.json diff --git a/results/flaubert/flaubert_large_cased/FloresBitextMining.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/FloresBitextMining.json similarity index 100% rename from results/flaubert/flaubert_large_cased/FloresBitextMining.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/FloresBitextMining.json diff --git a/results/flaubert/flaubert_large_cased/HALClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/HALClusteringS2S.json similarity index 100% rename from results/flaubert/flaubert_large_cased/HALClusteringS2S.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/HALClusteringS2S.json diff --git a/results/flaubert/flaubert_large_cased/MLSUMClusteringP2P.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/MLSUMClusteringP2P.json similarity index 100% rename from results/flaubert/flaubert_large_cased/MLSUMClusteringP2P.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/MLSUMClusteringP2P.json diff --git a/results/flaubert/flaubert_large_cased/MLSUMClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/MLSUMClusteringS2S.json similarity index 100% rename from results/flaubert/flaubert_large_cased/MLSUMClusteringS2S.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/MLSUMClusteringS2S.json diff --git a/results/flaubert/flaubert_large_cased/MTOPDomainClassification.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/MTOPDomainClassification.json similarity index 100% rename from results/flaubert/flaubert_large_cased/MTOPDomainClassification.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/MTOPDomainClassification.json diff --git a/results/flaubert/flaubert_large_cased/MTOPIntentClassification.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/MTOPIntentClassification.json similarity index 100% rename from results/flaubert/flaubert_large_cased/MTOPIntentClassification.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/MTOPIntentClassification.json diff --git a/results/flaubert/flaubert_large_cased/MasakhaNEWSClassification.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/MasakhaNEWSClassification.json similarity index 100% rename from results/flaubert/flaubert_large_cased/MasakhaNEWSClassification.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/MasakhaNEWSClassification.json diff --git a/results/flaubert/flaubert_large_cased/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/flaubert/flaubert_large_cased/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/MasakhaNEWSClusteringP2P.json diff --git a/results/flaubert/flaubert_large_cased/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/flaubert/flaubert_large_cased/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/MasakhaNEWSClusteringS2S.json diff --git a/results/flaubert/flaubert_large_cased/MassiveIntentClassification.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/MassiveIntentClassification.json similarity index 100% rename from results/flaubert/flaubert_large_cased/MassiveIntentClassification.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/MassiveIntentClassification.json diff --git a/results/flaubert/flaubert_large_cased/MassiveScenarioClassification.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/MassiveScenarioClassification.json similarity index 100% rename from results/flaubert/flaubert_large_cased/MassiveScenarioClassification.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/MassiveScenarioClassification.json diff --git a/results/flaubert/flaubert_large_cased/MintakaRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/MintakaRetrieval.json similarity index 100% rename from results/flaubert/flaubert_large_cased/MintakaRetrieval.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/MintakaRetrieval.json diff --git a/results/flaubert/flaubert_large_cased/OpusparcusPC.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/OpusparcusPC.json similarity index 100% rename from results/flaubert/flaubert_large_cased/OpusparcusPC.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/OpusparcusPC.json diff --git a/results/flaubert/flaubert_large_cased/PawsX.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/PawsX.json similarity index 100% rename from results/flaubert/flaubert_large_cased/PawsX.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/PawsX.json diff --git a/results/flaubert/flaubert_large_cased/SICKFr.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/SICKFr.json similarity index 100% rename from results/flaubert/flaubert_large_cased/SICKFr.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/SICKFr.json diff --git a/results/flaubert/flaubert_large_cased/STS22.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/STS22.json similarity index 100% rename from results/flaubert/flaubert_large_cased/STS22.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/STS22.json diff --git a/results/flaubert/flaubert_large_cased/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/flaubert/flaubert_large_cased/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/STSBenchmarkMultilingualSTS.json diff --git a/results/flaubert/flaubert_large_cased/SummEvalFr.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/SummEvalFr.json similarity index 100% rename from results/flaubert/flaubert_large_cased/SummEvalFr.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/SummEvalFr.json diff --git a/results/flaubert/flaubert_large_cased/SyntecReranking.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/SyntecReranking.json similarity index 100% rename from results/flaubert/flaubert_large_cased/SyntecReranking.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/SyntecReranking.json diff --git a/results/flaubert/flaubert_large_cased/SyntecRetrieval.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/SyntecRetrieval.json similarity index 100% rename from results/flaubert/flaubert_large_cased/SyntecRetrieval.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/SyntecRetrieval.json diff --git a/results/flaubert/flaubert_large_cased/XPQARetrieval.json b/outputs/benchmark_results/flaubert/flaubert_large_cased/XPQARetrieval.json similarity index 100% rename from results/flaubert/flaubert_large_cased/XPQARetrieval.json rename to outputs/benchmark_results/flaubert/flaubert_large_cased/XPQARetrieval.json diff --git a/results/intfloat/e5-mistral-7b-instruct/AlloProfClusteringP2P.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/AlloProfClusteringP2P.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/AlloProfClusteringP2P.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/AlloProfClusteringP2P.json diff --git a/results/intfloat/e5-mistral-7b-instruct/AlloProfClusteringS2S.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/AlloProfClusteringS2S.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/AlloProfClusteringS2S.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/AlloProfClusteringS2S.json diff --git a/results/intfloat/e5-mistral-7b-instruct/AlloprofReranking.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/AlloprofReranking.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/AlloprofReranking.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/AlloprofReranking.json diff --git a/results/intfloat/e5-mistral-7b-instruct/AlloprofRetrieval.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/AlloprofRetrieval.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/AlloprofRetrieval.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/AlloprofRetrieval.json diff --git a/results/intfloat/e5-mistral-7b-instruct/AmazonReviewsClassification.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/AmazonReviewsClassification.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/AmazonReviewsClassification.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/AmazonReviewsClassification.json diff --git a/results/intfloat/e5-mistral-7b-instruct/BSARDRetrieval.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/BSARDRetrieval.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/BSARDRetrieval.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/BSARDRetrieval.json diff --git a/results/intfloat/e5-mistral-7b-instruct/DiaBLaBitextMining.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/DiaBLaBitextMining.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/DiaBLaBitextMining.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/DiaBLaBitextMining.json diff --git a/results/intfloat/e5-mistral-7b-instruct/FloresBitextMining.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/FloresBitextMining.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/FloresBitextMining.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/FloresBitextMining.json diff --git a/results/intfloat/e5-mistral-7b-instruct/HALClusteringS2S.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/HALClusteringS2S.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/HALClusteringS2S.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/HALClusteringS2S.json diff --git a/results/intfloat/e5-mistral-7b-instruct/MLSUMClusteringP2P.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MLSUMClusteringP2P.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/MLSUMClusteringP2P.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MLSUMClusteringP2P.json diff --git a/results/intfloat/e5-mistral-7b-instruct/MLSUMClusteringS2S.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MLSUMClusteringS2S.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/MLSUMClusteringS2S.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MLSUMClusteringS2S.json diff --git a/results/intfloat/e5-mistral-7b-instruct/MTOPDomainClassification.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MTOPDomainClassification.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/MTOPDomainClassification.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MTOPDomainClassification.json diff --git a/results/intfloat/e5-mistral-7b-instruct/MTOPIntentClassification.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MTOPIntentClassification.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/MTOPIntentClassification.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MTOPIntentClassification.json diff --git a/results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClassification.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClassification.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClassification.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClassification.json diff --git a/results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClusteringP2P.json diff --git a/results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MasakhaNEWSClusteringS2S.json diff --git a/results/intfloat/e5-mistral-7b-instruct/MassiveIntentClassification.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MassiveIntentClassification.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/MassiveIntentClassification.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MassiveIntentClassification.json diff --git a/results/intfloat/e5-mistral-7b-instruct/MassiveScenarioClassification.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MassiveScenarioClassification.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/MassiveScenarioClassification.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MassiveScenarioClassification.json diff --git a/results/intfloat/e5-mistral-7b-instruct/MintakaRetrieval.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MintakaRetrieval.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/MintakaRetrieval.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/MintakaRetrieval.json diff --git a/results/intfloat/e5-mistral-7b-instruct/OpusparcusPC.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/OpusparcusPC.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/OpusparcusPC.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/OpusparcusPC.json diff --git a/results/intfloat/e5-mistral-7b-instruct/PawsX.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/PawsX.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/PawsX.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/PawsX.json diff --git a/results/intfloat/e5-mistral-7b-instruct/SICKFr.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/SICKFr.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/SICKFr.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/SICKFr.json diff --git a/results/intfloat/e5-mistral-7b-instruct/STS22.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/STS22.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/STS22.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/STS22.json diff --git a/results/intfloat/e5-mistral-7b-instruct/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/STSBenchmarkMultilingualSTS.json diff --git a/results/intfloat/e5-mistral-7b-instruct/SummEvalFr.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/SummEvalFr.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/SummEvalFr.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/SummEvalFr.json diff --git a/results/intfloat/e5-mistral-7b-instruct/SyntecReranking.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/SyntecReranking.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/SyntecReranking.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/SyntecReranking.json diff --git a/results/intfloat/e5-mistral-7b-instruct/SyntecRetrieval.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/SyntecRetrieval.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/SyntecRetrieval.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/SyntecRetrieval.json diff --git a/results/intfloat/e5-mistral-7b-instruct/XPQARetrieval.json b/outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/XPQARetrieval.json similarity index 100% rename from results/intfloat/e5-mistral-7b-instruct/XPQARetrieval.json rename to outputs/benchmark_results/intfloat/e5-mistral-7b-instruct/XPQARetrieval.json diff --git a/results/intfloat/multilingual-e5-base/AlloProfClusteringP2P.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/AlloProfClusteringP2P.json similarity index 100% rename from results/intfloat/multilingual-e5-base/AlloProfClusteringP2P.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/AlloProfClusteringP2P.json diff --git a/results/intfloat/multilingual-e5-base/AlloProfClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/AlloProfClusteringS2S.json similarity index 100% rename from results/intfloat/multilingual-e5-base/AlloProfClusteringS2S.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/AlloProfClusteringS2S.json diff --git a/results/intfloat/multilingual-e5-base/AlloprofReranking.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/AlloprofReranking.json similarity index 100% rename from results/intfloat/multilingual-e5-base/AlloprofReranking.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/AlloprofReranking.json diff --git a/results/intfloat/multilingual-e5-base/AlloprofRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/AlloprofRetrieval.json similarity index 100% rename from results/intfloat/multilingual-e5-base/AlloprofRetrieval.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/AlloprofRetrieval.json diff --git a/results/intfloat/multilingual-e5-base/AmazonReviewsClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/AmazonReviewsClassification.json similarity index 100% rename from results/intfloat/multilingual-e5-base/AmazonReviewsClassification.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/AmazonReviewsClassification.json diff --git a/results/intfloat/multilingual-e5-base/BSARDRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/BSARDRetrieval.json similarity index 100% rename from results/intfloat/multilingual-e5-base/BSARDRetrieval.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/BSARDRetrieval.json diff --git a/results/intfloat/multilingual-e5-base/DiaBLaBitextMining.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/DiaBLaBitextMining.json similarity index 100% rename from results/intfloat/multilingual-e5-base/DiaBLaBitextMining.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/DiaBLaBitextMining.json diff --git a/results/intfloat/multilingual-e5-base/FloresBitextMining.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/FloresBitextMining.json similarity index 100% rename from results/intfloat/multilingual-e5-base/FloresBitextMining.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/FloresBitextMining.json diff --git a/results/intfloat/multilingual-e5-base/HALClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/HALClusteringS2S.json similarity index 100% rename from results/intfloat/multilingual-e5-base/HALClusteringS2S.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/HALClusteringS2S.json diff --git a/results/intfloat/multilingual-e5-base/MLSUMClusteringP2P.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/MLSUMClusteringP2P.json similarity index 100% rename from results/intfloat/multilingual-e5-base/MLSUMClusteringP2P.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/MLSUMClusteringP2P.json diff --git a/results/intfloat/multilingual-e5-base/MLSUMClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/MLSUMClusteringS2S.json similarity index 100% rename from results/intfloat/multilingual-e5-base/MLSUMClusteringS2S.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/MLSUMClusteringS2S.json diff --git a/results/intfloat/multilingual-e5-base/MTOPDomainClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/MTOPDomainClassification.json similarity index 100% rename from results/intfloat/multilingual-e5-base/MTOPDomainClassification.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/MTOPDomainClassification.json diff --git a/results/intfloat/multilingual-e5-base/MTOPIntentClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/MTOPIntentClassification.json similarity index 100% rename from results/intfloat/multilingual-e5-base/MTOPIntentClassification.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/MTOPIntentClassification.json diff --git a/results/intfloat/multilingual-e5-base/MasakhaNEWSClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/MasakhaNEWSClassification.json similarity index 100% rename from results/intfloat/multilingual-e5-base/MasakhaNEWSClassification.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/MasakhaNEWSClassification.json diff --git a/results/intfloat/multilingual-e5-base/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/intfloat/multilingual-e5-base/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/MasakhaNEWSClusteringP2P.json diff --git a/results/intfloat/multilingual-e5-base/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/intfloat/multilingual-e5-base/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/MasakhaNEWSClusteringS2S.json diff --git a/results/intfloat/multilingual-e5-base/MassiveIntentClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/MassiveIntentClassification.json similarity index 100% rename from results/intfloat/multilingual-e5-base/MassiveIntentClassification.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/MassiveIntentClassification.json diff --git a/results/intfloat/multilingual-e5-base/MassiveScenarioClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/MassiveScenarioClassification.json similarity index 100% rename from results/intfloat/multilingual-e5-base/MassiveScenarioClassification.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/MassiveScenarioClassification.json diff --git a/results/intfloat/multilingual-e5-base/MintakaRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/MintakaRetrieval.json similarity index 100% rename from results/intfloat/multilingual-e5-base/MintakaRetrieval.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/MintakaRetrieval.json diff --git a/results/intfloat/multilingual-e5-base/OpusparcusPC.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/OpusparcusPC.json similarity index 100% rename from results/intfloat/multilingual-e5-base/OpusparcusPC.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/OpusparcusPC.json diff --git a/results/intfloat/multilingual-e5-base/PawsX.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/PawsX.json similarity index 100% rename from results/intfloat/multilingual-e5-base/PawsX.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/PawsX.json diff --git a/results/intfloat/multilingual-e5-base/SICKFr.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/SICKFr.json similarity index 100% rename from results/intfloat/multilingual-e5-base/SICKFr.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/SICKFr.json diff --git a/results/intfloat/multilingual-e5-base/STS22.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/STS22.json similarity index 100% rename from results/intfloat/multilingual-e5-base/STS22.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/STS22.json diff --git a/results/intfloat/multilingual-e5-base/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/intfloat/multilingual-e5-base/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/STSBenchmarkMultilingualSTS.json diff --git a/results/intfloat/multilingual-e5-base/SummEvalFr.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/SummEvalFr.json similarity index 100% rename from results/intfloat/multilingual-e5-base/SummEvalFr.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/SummEvalFr.json diff --git a/results/intfloat/multilingual-e5-base/SyntecReranking.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/SyntecReranking.json similarity index 100% rename from results/intfloat/multilingual-e5-base/SyntecReranking.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/SyntecReranking.json diff --git a/results/intfloat/multilingual-e5-base/SyntecRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/SyntecRetrieval.json similarity index 100% rename from results/intfloat/multilingual-e5-base/SyntecRetrieval.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/SyntecRetrieval.json diff --git a/results/intfloat/multilingual-e5-base/XPQARetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-base/XPQARetrieval.json similarity index 100% rename from results/intfloat/multilingual-e5-base/XPQARetrieval.json rename to outputs/benchmark_results/intfloat/multilingual-e5-base/XPQARetrieval.json diff --git a/results/intfloat/multilingual-e5-large/AlloProfClusteringP2P.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/AlloProfClusteringP2P.json similarity index 100% rename from results/intfloat/multilingual-e5-large/AlloProfClusteringP2P.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/AlloProfClusteringP2P.json diff --git a/results/intfloat/multilingual-e5-large/AlloProfClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/AlloProfClusteringS2S.json similarity index 100% rename from results/intfloat/multilingual-e5-large/AlloProfClusteringS2S.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/AlloProfClusteringS2S.json diff --git a/results/intfloat/multilingual-e5-large/AlloprofReranking.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/AlloprofReranking.json similarity index 100% rename from results/intfloat/multilingual-e5-large/AlloprofReranking.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/AlloprofReranking.json diff --git a/results/intfloat/multilingual-e5-large/AlloprofRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/AlloprofRetrieval.json similarity index 100% rename from results/intfloat/multilingual-e5-large/AlloprofRetrieval.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/AlloprofRetrieval.json diff --git a/results/intfloat/multilingual-e5-large/AmazonReviewsClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/AmazonReviewsClassification.json similarity index 100% rename from results/intfloat/multilingual-e5-large/AmazonReviewsClassification.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/AmazonReviewsClassification.json diff --git a/results/intfloat/multilingual-e5-large/BSARDRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/BSARDRetrieval.json similarity index 100% rename from results/intfloat/multilingual-e5-large/BSARDRetrieval.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/BSARDRetrieval.json diff --git a/results/intfloat/multilingual-e5-large/DiaBLaBitextMining.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/DiaBLaBitextMining.json similarity index 100% rename from results/intfloat/multilingual-e5-large/DiaBLaBitextMining.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/DiaBLaBitextMining.json diff --git a/results/intfloat/multilingual-e5-large/FloresBitextMining.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/FloresBitextMining.json similarity index 100% rename from results/intfloat/multilingual-e5-large/FloresBitextMining.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/FloresBitextMining.json diff --git a/results/intfloat/multilingual-e5-large/HALClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/HALClusteringS2S.json similarity index 100% rename from results/intfloat/multilingual-e5-large/HALClusteringS2S.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/HALClusteringS2S.json diff --git a/results/intfloat/multilingual-e5-large/MLSUMClusteringP2P.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/MLSUMClusteringP2P.json similarity index 100% rename from results/intfloat/multilingual-e5-large/MLSUMClusteringP2P.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/MLSUMClusteringP2P.json diff --git a/results/intfloat/multilingual-e5-large/MLSUMClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/MLSUMClusteringS2S.json similarity index 100% rename from results/intfloat/multilingual-e5-large/MLSUMClusteringS2S.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/MLSUMClusteringS2S.json diff --git a/results/intfloat/multilingual-e5-large/MTOPDomainClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/MTOPDomainClassification.json similarity index 100% rename from results/intfloat/multilingual-e5-large/MTOPDomainClassification.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/MTOPDomainClassification.json diff --git a/results/intfloat/multilingual-e5-large/MTOPIntentClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/MTOPIntentClassification.json similarity index 100% rename from results/intfloat/multilingual-e5-large/MTOPIntentClassification.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/MTOPIntentClassification.json diff --git a/results/intfloat/multilingual-e5-large/MasakhaNEWSClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/MasakhaNEWSClassification.json similarity index 100% rename from results/intfloat/multilingual-e5-large/MasakhaNEWSClassification.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/MasakhaNEWSClassification.json diff --git a/results/intfloat/multilingual-e5-large/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/intfloat/multilingual-e5-large/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/MasakhaNEWSClusteringP2P.json diff --git a/results/intfloat/multilingual-e5-large/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/intfloat/multilingual-e5-large/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/MasakhaNEWSClusteringS2S.json diff --git a/results/intfloat/multilingual-e5-large/MassiveIntentClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/MassiveIntentClassification.json similarity index 100% rename from results/intfloat/multilingual-e5-large/MassiveIntentClassification.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/MassiveIntentClassification.json diff --git a/results/intfloat/multilingual-e5-large/MassiveScenarioClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/MassiveScenarioClassification.json similarity index 100% rename from results/intfloat/multilingual-e5-large/MassiveScenarioClassification.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/MassiveScenarioClassification.json diff --git a/results/intfloat/multilingual-e5-large/MintakaRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/MintakaRetrieval.json similarity index 100% rename from results/intfloat/multilingual-e5-large/MintakaRetrieval.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/MintakaRetrieval.json diff --git a/results/intfloat/multilingual-e5-large/OpusparcusPC.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/OpusparcusPC.json similarity index 100% rename from results/intfloat/multilingual-e5-large/OpusparcusPC.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/OpusparcusPC.json diff --git a/results/intfloat/multilingual-e5-large/PawsX.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/PawsX.json similarity index 100% rename from results/intfloat/multilingual-e5-large/PawsX.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/PawsX.json diff --git a/results/intfloat/multilingual-e5-large/SICKFr.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/SICKFr.json similarity index 100% rename from results/intfloat/multilingual-e5-large/SICKFr.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/SICKFr.json diff --git a/results/intfloat/multilingual-e5-large/STS22.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/STS22.json similarity index 100% rename from results/intfloat/multilingual-e5-large/STS22.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/STS22.json diff --git a/results/intfloat/multilingual-e5-large/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/intfloat/multilingual-e5-large/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/STSBenchmarkMultilingualSTS.json diff --git a/results/intfloat/multilingual-e5-large/SummEvalFr.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/SummEvalFr.json similarity index 100% rename from results/intfloat/multilingual-e5-large/SummEvalFr.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/SummEvalFr.json diff --git a/results/intfloat/multilingual-e5-large/SyntecReranking.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/SyntecReranking.json similarity index 100% rename from results/intfloat/multilingual-e5-large/SyntecReranking.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/SyntecReranking.json diff --git a/results/intfloat/multilingual-e5-large/SyntecRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/SyntecRetrieval.json similarity index 100% rename from results/intfloat/multilingual-e5-large/SyntecRetrieval.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/SyntecRetrieval.json diff --git a/results/intfloat/multilingual-e5-large/XPQARetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-large/XPQARetrieval.json similarity index 100% rename from results/intfloat/multilingual-e5-large/XPQARetrieval.json rename to outputs/benchmark_results/intfloat/multilingual-e5-large/XPQARetrieval.json diff --git a/results/intfloat/multilingual-e5-small/AlloProfClusteringP2P.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/AlloProfClusteringP2P.json similarity index 100% rename from results/intfloat/multilingual-e5-small/AlloProfClusteringP2P.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/AlloProfClusteringP2P.json diff --git a/results/intfloat/multilingual-e5-small/AlloProfClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/AlloProfClusteringS2S.json similarity index 100% rename from results/intfloat/multilingual-e5-small/AlloProfClusteringS2S.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/AlloProfClusteringS2S.json diff --git a/results/intfloat/multilingual-e5-small/AlloprofReranking.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/AlloprofReranking.json similarity index 100% rename from results/intfloat/multilingual-e5-small/AlloprofReranking.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/AlloprofReranking.json diff --git a/results/intfloat/multilingual-e5-small/AlloprofRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/AlloprofRetrieval.json similarity index 100% rename from results/intfloat/multilingual-e5-small/AlloprofRetrieval.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/AlloprofRetrieval.json diff --git a/results/intfloat/multilingual-e5-small/AmazonReviewsClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/AmazonReviewsClassification.json similarity index 100% rename from results/intfloat/multilingual-e5-small/AmazonReviewsClassification.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/AmazonReviewsClassification.json diff --git a/results/intfloat/multilingual-e5-small/BSARDRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/BSARDRetrieval.json similarity index 100% rename from results/intfloat/multilingual-e5-small/BSARDRetrieval.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/BSARDRetrieval.json diff --git a/results/intfloat/multilingual-e5-small/DiaBLaBitextMining.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/DiaBLaBitextMining.json similarity index 100% rename from results/intfloat/multilingual-e5-small/DiaBLaBitextMining.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/DiaBLaBitextMining.json diff --git a/results/intfloat/multilingual-e5-small/FloresBitextMining.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/FloresBitextMining.json similarity index 100% rename from results/intfloat/multilingual-e5-small/FloresBitextMining.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/FloresBitextMining.json diff --git a/results/intfloat/multilingual-e5-small/HALClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/HALClusteringS2S.json similarity index 100% rename from results/intfloat/multilingual-e5-small/HALClusteringS2S.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/HALClusteringS2S.json diff --git a/results/intfloat/multilingual-e5-small/MLSUMClusteringP2P.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/MLSUMClusteringP2P.json similarity index 100% rename from results/intfloat/multilingual-e5-small/MLSUMClusteringP2P.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/MLSUMClusteringP2P.json diff --git a/results/intfloat/multilingual-e5-small/MLSUMClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/MLSUMClusteringS2S.json similarity index 100% rename from results/intfloat/multilingual-e5-small/MLSUMClusteringS2S.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/MLSUMClusteringS2S.json diff --git a/results/intfloat/multilingual-e5-small/MTOPDomainClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/MTOPDomainClassification.json similarity index 100% rename from results/intfloat/multilingual-e5-small/MTOPDomainClassification.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/MTOPDomainClassification.json diff --git a/results/intfloat/multilingual-e5-small/MTOPIntentClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/MTOPIntentClassification.json similarity index 100% rename from results/intfloat/multilingual-e5-small/MTOPIntentClassification.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/MTOPIntentClassification.json diff --git a/results/intfloat/multilingual-e5-small/MasakhaNEWSClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/MasakhaNEWSClassification.json similarity index 100% rename from results/intfloat/multilingual-e5-small/MasakhaNEWSClassification.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/MasakhaNEWSClassification.json diff --git a/results/intfloat/multilingual-e5-small/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/intfloat/multilingual-e5-small/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/MasakhaNEWSClusteringP2P.json diff --git a/results/intfloat/multilingual-e5-small/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/intfloat/multilingual-e5-small/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/MasakhaNEWSClusteringS2S.json diff --git a/results/intfloat/multilingual-e5-small/MassiveIntentClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/MassiveIntentClassification.json similarity index 100% rename from results/intfloat/multilingual-e5-small/MassiveIntentClassification.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/MassiveIntentClassification.json diff --git a/results/intfloat/multilingual-e5-small/MassiveScenarioClassification.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/MassiveScenarioClassification.json similarity index 100% rename from results/intfloat/multilingual-e5-small/MassiveScenarioClassification.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/MassiveScenarioClassification.json diff --git a/results/intfloat/multilingual-e5-small/MintakaRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/MintakaRetrieval.json similarity index 100% rename from results/intfloat/multilingual-e5-small/MintakaRetrieval.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/MintakaRetrieval.json diff --git a/results/intfloat/multilingual-e5-small/OpusparcusPC.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/OpusparcusPC.json similarity index 100% rename from results/intfloat/multilingual-e5-small/OpusparcusPC.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/OpusparcusPC.json diff --git a/results/intfloat/multilingual-e5-small/PawsX.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/PawsX.json similarity index 100% rename from results/intfloat/multilingual-e5-small/PawsX.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/PawsX.json diff --git a/results/intfloat/multilingual-e5-small/SICKFr.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/SICKFr.json similarity index 100% rename from results/intfloat/multilingual-e5-small/SICKFr.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/SICKFr.json diff --git a/results/intfloat/multilingual-e5-small/STS22.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/STS22.json similarity index 100% rename from results/intfloat/multilingual-e5-small/STS22.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/STS22.json diff --git a/results/intfloat/multilingual-e5-small/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/intfloat/multilingual-e5-small/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/STSBenchmarkMultilingualSTS.json diff --git a/results/intfloat/multilingual-e5-small/SummEvalFr.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/SummEvalFr.json similarity index 100% rename from results/intfloat/multilingual-e5-small/SummEvalFr.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/SummEvalFr.json diff --git a/results/intfloat/multilingual-e5-small/SyntecReranking.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/SyntecReranking.json similarity index 100% rename from results/intfloat/multilingual-e5-small/SyntecReranking.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/SyntecReranking.json diff --git a/results/intfloat/multilingual-e5-small/SyntecRetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/SyntecRetrieval.json similarity index 100% rename from results/intfloat/multilingual-e5-small/SyntecRetrieval.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/SyntecRetrieval.json diff --git a/results/intfloat/multilingual-e5-small/XPQARetrieval.json b/outputs/benchmark_results/intfloat/multilingual-e5-small/XPQARetrieval.json similarity index 100% rename from results/intfloat/multilingual-e5-small/XPQARetrieval.json rename to outputs/benchmark_results/intfloat/multilingual-e5-small/XPQARetrieval.json diff --git a/results/izhx/udever-bloom-1b1/AlloProfClusteringP2P.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/AlloProfClusteringP2P.json similarity index 100% rename from results/izhx/udever-bloom-1b1/AlloProfClusteringP2P.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/AlloProfClusteringP2P.json diff --git a/results/izhx/udever-bloom-1b1/AlloProfClusteringS2S.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/AlloProfClusteringS2S.json similarity index 100% rename from results/izhx/udever-bloom-1b1/AlloProfClusteringS2S.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/AlloProfClusteringS2S.json diff --git a/results/izhx/udever-bloom-1b1/AlloprofReranking.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/AlloprofReranking.json similarity index 100% rename from results/izhx/udever-bloom-1b1/AlloprofReranking.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/AlloprofReranking.json diff --git a/results/izhx/udever-bloom-1b1/AlloprofRetrieval.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/AlloprofRetrieval.json similarity index 100% rename from results/izhx/udever-bloom-1b1/AlloprofRetrieval.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/AlloprofRetrieval.json diff --git a/results/izhx/udever-bloom-1b1/AmazonReviewsClassification.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/AmazonReviewsClassification.json similarity index 100% rename from results/izhx/udever-bloom-1b1/AmazonReviewsClassification.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/AmazonReviewsClassification.json diff --git a/results/izhx/udever-bloom-1b1/BSARDRetrieval.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/BSARDRetrieval.json similarity index 100% rename from results/izhx/udever-bloom-1b1/BSARDRetrieval.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/BSARDRetrieval.json diff --git a/results/izhx/udever-bloom-1b1/DiaBLaBitextMining.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/DiaBLaBitextMining.json similarity index 100% rename from results/izhx/udever-bloom-1b1/DiaBLaBitextMining.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/DiaBLaBitextMining.json diff --git a/results/izhx/udever-bloom-1b1/FloresBitextMining.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/FloresBitextMining.json similarity index 100% rename from results/izhx/udever-bloom-1b1/FloresBitextMining.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/FloresBitextMining.json diff --git a/results/izhx/udever-bloom-1b1/HALClusteringS2S.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/HALClusteringS2S.json similarity index 100% rename from results/izhx/udever-bloom-1b1/HALClusteringS2S.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/HALClusteringS2S.json diff --git a/results/izhx/udever-bloom-1b1/MLSUMClusteringP2P.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/MLSUMClusteringP2P.json similarity index 100% rename from results/izhx/udever-bloom-1b1/MLSUMClusteringP2P.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/MLSUMClusteringP2P.json diff --git a/results/izhx/udever-bloom-1b1/MLSUMClusteringS2S.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/MLSUMClusteringS2S.json similarity index 100% rename from results/izhx/udever-bloom-1b1/MLSUMClusteringS2S.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/MLSUMClusteringS2S.json diff --git a/results/izhx/udever-bloom-1b1/MTOPDomainClassification.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/MTOPDomainClassification.json similarity index 100% rename from results/izhx/udever-bloom-1b1/MTOPDomainClassification.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/MTOPDomainClassification.json diff --git a/results/izhx/udever-bloom-1b1/MTOPIntentClassification.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/MTOPIntentClassification.json similarity index 100% rename from results/izhx/udever-bloom-1b1/MTOPIntentClassification.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/MTOPIntentClassification.json diff --git a/results/izhx/udever-bloom-1b1/MasakhaNEWSClassification.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/MasakhaNEWSClassification.json similarity index 100% rename from results/izhx/udever-bloom-1b1/MasakhaNEWSClassification.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/MasakhaNEWSClassification.json diff --git a/results/izhx/udever-bloom-1b1/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/izhx/udever-bloom-1b1/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/MasakhaNEWSClusteringP2P.json diff --git a/results/izhx/udever-bloom-1b1/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/izhx/udever-bloom-1b1/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/MasakhaNEWSClusteringS2S.json diff --git a/results/izhx/udever-bloom-1b1/MassiveIntentClassification.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/MassiveIntentClassification.json similarity index 100% rename from results/izhx/udever-bloom-1b1/MassiveIntentClassification.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/MassiveIntentClassification.json diff --git a/results/izhx/udever-bloom-1b1/MassiveScenarioClassification.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/MassiveScenarioClassification.json similarity index 100% rename from results/izhx/udever-bloom-1b1/MassiveScenarioClassification.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/MassiveScenarioClassification.json diff --git a/results/izhx/udever-bloom-1b1/MintakaRetrieval.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/MintakaRetrieval.json similarity index 100% rename from results/izhx/udever-bloom-1b1/MintakaRetrieval.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/MintakaRetrieval.json diff --git a/results/izhx/udever-bloom-1b1/OpusparcusPC.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/OpusparcusPC.json similarity index 100% rename from results/izhx/udever-bloom-1b1/OpusparcusPC.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/OpusparcusPC.json diff --git a/results/izhx/udever-bloom-1b1/PawsX.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/PawsX.json similarity index 100% rename from results/izhx/udever-bloom-1b1/PawsX.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/PawsX.json diff --git a/results/izhx/udever-bloom-1b1/SICKFr.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/SICKFr.json similarity index 100% rename from results/izhx/udever-bloom-1b1/SICKFr.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/SICKFr.json diff --git a/results/izhx/udever-bloom-1b1/STS22.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/STS22.json similarity index 100% rename from results/izhx/udever-bloom-1b1/STS22.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/STS22.json diff --git a/results/izhx/udever-bloom-1b1/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/izhx/udever-bloom-1b1/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/STSBenchmarkMultilingualSTS.json diff --git a/results/izhx/udever-bloom-1b1/SummEvalFr.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/SummEvalFr.json similarity index 100% rename from results/izhx/udever-bloom-1b1/SummEvalFr.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/SummEvalFr.json diff --git a/results/izhx/udever-bloom-1b1/SyntecReranking.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/SyntecReranking.json similarity index 100% rename from results/izhx/udever-bloom-1b1/SyntecReranking.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/SyntecReranking.json diff --git a/results/izhx/udever-bloom-1b1/SyntecRetrieval.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/SyntecRetrieval.json similarity index 100% rename from results/izhx/udever-bloom-1b1/SyntecRetrieval.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/SyntecRetrieval.json diff --git a/results/izhx/udever-bloom-1b1/XPQARetrieval.json b/outputs/benchmark_results/izhx/udever-bloom-1b1/XPQARetrieval.json similarity index 100% rename from results/izhx/udever-bloom-1b1/XPQARetrieval.json rename to outputs/benchmark_results/izhx/udever-bloom-1b1/XPQARetrieval.json diff --git a/results/izhx/udever-bloom-560m/AlloProfClusteringP2P.json b/outputs/benchmark_results/izhx/udever-bloom-560m/AlloProfClusteringP2P.json similarity index 100% rename from results/izhx/udever-bloom-560m/AlloProfClusteringP2P.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/AlloProfClusteringP2P.json diff --git a/results/izhx/udever-bloom-560m/AlloProfClusteringS2S.json b/outputs/benchmark_results/izhx/udever-bloom-560m/AlloProfClusteringS2S.json similarity index 100% rename from results/izhx/udever-bloom-560m/AlloProfClusteringS2S.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/AlloProfClusteringS2S.json diff --git a/results/izhx/udever-bloom-560m/AlloprofReranking.json b/outputs/benchmark_results/izhx/udever-bloom-560m/AlloprofReranking.json similarity index 100% rename from results/izhx/udever-bloom-560m/AlloprofReranking.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/AlloprofReranking.json diff --git a/results/izhx/udever-bloom-560m/AlloprofRetrieval.json b/outputs/benchmark_results/izhx/udever-bloom-560m/AlloprofRetrieval.json similarity index 100% rename from results/izhx/udever-bloom-560m/AlloprofRetrieval.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/AlloprofRetrieval.json diff --git a/results/izhx/udever-bloom-560m/AmazonReviewsClassification.json b/outputs/benchmark_results/izhx/udever-bloom-560m/AmazonReviewsClassification.json similarity index 100% rename from results/izhx/udever-bloom-560m/AmazonReviewsClassification.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/AmazonReviewsClassification.json diff --git a/results/izhx/udever-bloom-560m/BSARDRetrieval.json b/outputs/benchmark_results/izhx/udever-bloom-560m/BSARDRetrieval.json similarity index 100% rename from results/izhx/udever-bloom-560m/BSARDRetrieval.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/BSARDRetrieval.json diff --git a/results/izhx/udever-bloom-560m/DiaBLaBitextMining.json b/outputs/benchmark_results/izhx/udever-bloom-560m/DiaBLaBitextMining.json similarity index 100% rename from results/izhx/udever-bloom-560m/DiaBLaBitextMining.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/DiaBLaBitextMining.json diff --git a/results/izhx/udever-bloom-560m/FloresBitextMining.json b/outputs/benchmark_results/izhx/udever-bloom-560m/FloresBitextMining.json similarity index 100% rename from results/izhx/udever-bloom-560m/FloresBitextMining.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/FloresBitextMining.json diff --git a/results/izhx/udever-bloom-560m/HALClusteringS2S.json b/outputs/benchmark_results/izhx/udever-bloom-560m/HALClusteringS2S.json similarity index 100% rename from results/izhx/udever-bloom-560m/HALClusteringS2S.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/HALClusteringS2S.json diff --git a/results/izhx/udever-bloom-560m/MLSUMClusteringP2P.json b/outputs/benchmark_results/izhx/udever-bloom-560m/MLSUMClusteringP2P.json similarity index 100% rename from results/izhx/udever-bloom-560m/MLSUMClusteringP2P.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/MLSUMClusteringP2P.json diff --git a/results/izhx/udever-bloom-560m/MLSUMClusteringS2S.json b/outputs/benchmark_results/izhx/udever-bloom-560m/MLSUMClusteringS2S.json similarity index 100% rename from results/izhx/udever-bloom-560m/MLSUMClusteringS2S.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/MLSUMClusteringS2S.json diff --git a/results/izhx/udever-bloom-560m/MTOPDomainClassification.json b/outputs/benchmark_results/izhx/udever-bloom-560m/MTOPDomainClassification.json similarity index 100% rename from results/izhx/udever-bloom-560m/MTOPDomainClassification.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/MTOPDomainClassification.json diff --git a/results/izhx/udever-bloom-560m/MTOPIntentClassification.json b/outputs/benchmark_results/izhx/udever-bloom-560m/MTOPIntentClassification.json similarity index 100% rename from results/izhx/udever-bloom-560m/MTOPIntentClassification.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/MTOPIntentClassification.json diff --git a/results/izhx/udever-bloom-560m/MasakhaNEWSClassification.json b/outputs/benchmark_results/izhx/udever-bloom-560m/MasakhaNEWSClassification.json similarity index 100% rename from results/izhx/udever-bloom-560m/MasakhaNEWSClassification.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/MasakhaNEWSClassification.json diff --git a/results/izhx/udever-bloom-560m/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/izhx/udever-bloom-560m/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/izhx/udever-bloom-560m/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/MasakhaNEWSClusteringP2P.json diff --git a/results/izhx/udever-bloom-560m/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/izhx/udever-bloom-560m/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/izhx/udever-bloom-560m/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/MasakhaNEWSClusteringS2S.json diff --git a/results/izhx/udever-bloom-560m/MassiveIntentClassification.json b/outputs/benchmark_results/izhx/udever-bloom-560m/MassiveIntentClassification.json similarity index 100% rename from results/izhx/udever-bloom-560m/MassiveIntentClassification.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/MassiveIntentClassification.json diff --git a/results/izhx/udever-bloom-560m/MassiveScenarioClassification.json b/outputs/benchmark_results/izhx/udever-bloom-560m/MassiveScenarioClassification.json similarity index 100% rename from results/izhx/udever-bloom-560m/MassiveScenarioClassification.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/MassiveScenarioClassification.json diff --git a/results/izhx/udever-bloom-560m/MintakaRetrieval.json b/outputs/benchmark_results/izhx/udever-bloom-560m/MintakaRetrieval.json similarity index 100% rename from results/izhx/udever-bloom-560m/MintakaRetrieval.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/MintakaRetrieval.json diff --git a/results/izhx/udever-bloom-560m/OpusparcusPC.json b/outputs/benchmark_results/izhx/udever-bloom-560m/OpusparcusPC.json similarity index 100% rename from results/izhx/udever-bloom-560m/OpusparcusPC.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/OpusparcusPC.json diff --git a/results/izhx/udever-bloom-560m/PawsX.json b/outputs/benchmark_results/izhx/udever-bloom-560m/PawsX.json similarity index 100% rename from results/izhx/udever-bloom-560m/PawsX.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/PawsX.json diff --git a/results/izhx/udever-bloom-560m/SICKFr.json b/outputs/benchmark_results/izhx/udever-bloom-560m/SICKFr.json similarity index 100% rename from results/izhx/udever-bloom-560m/SICKFr.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/SICKFr.json diff --git a/results/izhx/udever-bloom-560m/STS22.json b/outputs/benchmark_results/izhx/udever-bloom-560m/STS22.json similarity index 100% rename from results/izhx/udever-bloom-560m/STS22.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/STS22.json diff --git a/results/izhx/udever-bloom-560m/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/izhx/udever-bloom-560m/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/izhx/udever-bloom-560m/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/STSBenchmarkMultilingualSTS.json diff --git a/results/izhx/udever-bloom-560m/SummEvalFr.json b/outputs/benchmark_results/izhx/udever-bloom-560m/SummEvalFr.json similarity index 100% rename from results/izhx/udever-bloom-560m/SummEvalFr.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/SummEvalFr.json diff --git a/results/izhx/udever-bloom-560m/SyntecReranking.json b/outputs/benchmark_results/izhx/udever-bloom-560m/SyntecReranking.json similarity index 100% rename from results/izhx/udever-bloom-560m/SyntecReranking.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/SyntecReranking.json diff --git a/results/izhx/udever-bloom-560m/SyntecRetrieval.json b/outputs/benchmark_results/izhx/udever-bloom-560m/SyntecRetrieval.json similarity index 100% rename from results/izhx/udever-bloom-560m/SyntecRetrieval.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/SyntecRetrieval.json diff --git a/results/izhx/udever-bloom-560m/XPQARetrieval.json b/outputs/benchmark_results/izhx/udever-bloom-560m/XPQARetrieval.json similarity index 100% rename from results/izhx/udever-bloom-560m/XPQARetrieval.json rename to outputs/benchmark_results/izhx/udever-bloom-560m/XPQARetrieval.json diff --git a/results/laser2/AlloProfClusteringP2P.json b/outputs/benchmark_results/laser2/AlloProfClusteringP2P.json similarity index 100% rename from results/laser2/AlloProfClusteringP2P.json rename to outputs/benchmark_results/laser2/AlloProfClusteringP2P.json diff --git a/results/laser2/AlloProfClusteringS2S.json b/outputs/benchmark_results/laser2/AlloProfClusteringS2S.json similarity index 100% rename from results/laser2/AlloProfClusteringS2S.json rename to outputs/benchmark_results/laser2/AlloProfClusteringS2S.json diff --git a/results/laser2/AlloprofReranking.json b/outputs/benchmark_results/laser2/AlloprofReranking.json similarity index 100% rename from results/laser2/AlloprofReranking.json rename to outputs/benchmark_results/laser2/AlloprofReranking.json diff --git a/results/laser2/AlloprofRetrieval.json b/outputs/benchmark_results/laser2/AlloprofRetrieval.json similarity index 100% rename from results/laser2/AlloprofRetrieval.json rename to outputs/benchmark_results/laser2/AlloprofRetrieval.json diff --git a/results/laser2/AmazonReviewsClassification.json b/outputs/benchmark_results/laser2/AmazonReviewsClassification.json similarity index 100% rename from results/laser2/AmazonReviewsClassification.json rename to outputs/benchmark_results/laser2/AmazonReviewsClassification.json diff --git a/results/laser2/BSARDRetrieval.json b/outputs/benchmark_results/laser2/BSARDRetrieval.json similarity index 100% rename from results/laser2/BSARDRetrieval.json rename to outputs/benchmark_results/laser2/BSARDRetrieval.json diff --git a/results/laser2/DiaBLaBitextMining.json b/outputs/benchmark_results/laser2/DiaBLaBitextMining.json similarity index 100% rename from results/laser2/DiaBLaBitextMining.json rename to outputs/benchmark_results/laser2/DiaBLaBitextMining.json diff --git a/results/laser2/FloresBitextMining.json b/outputs/benchmark_results/laser2/FloresBitextMining.json similarity index 100% rename from results/laser2/FloresBitextMining.json rename to outputs/benchmark_results/laser2/FloresBitextMining.json diff --git a/results/laser2/HALClusteringS2S.json b/outputs/benchmark_results/laser2/HALClusteringS2S.json similarity index 100% rename from results/laser2/HALClusteringS2S.json rename to outputs/benchmark_results/laser2/HALClusteringS2S.json diff --git a/results/laser2/MLSUMClusteringP2P.json b/outputs/benchmark_results/laser2/MLSUMClusteringP2P.json similarity index 100% rename from results/laser2/MLSUMClusteringP2P.json rename to outputs/benchmark_results/laser2/MLSUMClusteringP2P.json diff --git a/results/laser2/MLSUMClusteringS2S.json b/outputs/benchmark_results/laser2/MLSUMClusteringS2S.json similarity index 100% rename from results/laser2/MLSUMClusteringS2S.json rename to outputs/benchmark_results/laser2/MLSUMClusteringS2S.json diff --git a/results/laser2/MTOPDomainClassification.json b/outputs/benchmark_results/laser2/MTOPDomainClassification.json similarity index 100% rename from results/laser2/MTOPDomainClassification.json rename to outputs/benchmark_results/laser2/MTOPDomainClassification.json diff --git a/results/laser2/MTOPIntentClassification.json b/outputs/benchmark_results/laser2/MTOPIntentClassification.json similarity index 100% rename from results/laser2/MTOPIntentClassification.json rename to outputs/benchmark_results/laser2/MTOPIntentClassification.json diff --git a/results/laser2/MasakhaNEWSClassification.json b/outputs/benchmark_results/laser2/MasakhaNEWSClassification.json similarity index 100% rename from results/laser2/MasakhaNEWSClassification.json rename to outputs/benchmark_results/laser2/MasakhaNEWSClassification.json diff --git a/results/laser2/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/laser2/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/laser2/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/laser2/MasakhaNEWSClusteringP2P.json diff --git a/results/laser2/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/laser2/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/laser2/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/laser2/MasakhaNEWSClusteringS2S.json diff --git a/results/laser2/MassiveIntentClassification.json b/outputs/benchmark_results/laser2/MassiveIntentClassification.json similarity index 100% rename from results/laser2/MassiveIntentClassification.json rename to outputs/benchmark_results/laser2/MassiveIntentClassification.json diff --git a/results/laser2/MassiveScenarioClassification.json b/outputs/benchmark_results/laser2/MassiveScenarioClassification.json similarity index 100% rename from results/laser2/MassiveScenarioClassification.json rename to outputs/benchmark_results/laser2/MassiveScenarioClassification.json diff --git a/results/laser2/MintakaRetrieval.json b/outputs/benchmark_results/laser2/MintakaRetrieval.json similarity index 100% rename from results/laser2/MintakaRetrieval.json rename to outputs/benchmark_results/laser2/MintakaRetrieval.json diff --git a/results/laser2/OpusparcusPC.json b/outputs/benchmark_results/laser2/OpusparcusPC.json similarity index 100% rename from results/laser2/OpusparcusPC.json rename to outputs/benchmark_results/laser2/OpusparcusPC.json diff --git a/results/laser2/PawsX.json b/outputs/benchmark_results/laser2/PawsX.json similarity index 100% rename from results/laser2/PawsX.json rename to outputs/benchmark_results/laser2/PawsX.json diff --git a/results/laser2/SICKFr.json b/outputs/benchmark_results/laser2/SICKFr.json similarity index 100% rename from results/laser2/SICKFr.json rename to outputs/benchmark_results/laser2/SICKFr.json diff --git a/results/laser2/STS22.json b/outputs/benchmark_results/laser2/STS22.json similarity index 100% rename from results/laser2/STS22.json rename to outputs/benchmark_results/laser2/STS22.json diff --git a/results/laser2/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/laser2/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/laser2/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/laser2/STSBenchmarkMultilingualSTS.json diff --git a/results/laser2/SummEvalFr.json b/outputs/benchmark_results/laser2/SummEvalFr.json similarity index 100% rename from results/laser2/SummEvalFr.json rename to outputs/benchmark_results/laser2/SummEvalFr.json diff --git a/results/laser2/SyntecReranking.json b/outputs/benchmark_results/laser2/SyntecReranking.json similarity index 100% rename from results/laser2/SyntecReranking.json rename to outputs/benchmark_results/laser2/SyntecReranking.json diff --git a/results/laser2/SyntecRetrieval.json b/outputs/benchmark_results/laser2/SyntecRetrieval.json similarity index 100% rename from results/laser2/SyntecRetrieval.json rename to outputs/benchmark_results/laser2/SyntecRetrieval.json diff --git a/results/laser2/XPQARetrieval.json b/outputs/benchmark_results/laser2/XPQARetrieval.json similarity index 100% rename from results/laser2/XPQARetrieval.json rename to outputs/benchmark_results/laser2/XPQARetrieval.json diff --git a/results/mistral-embed/AlloProfClusteringP2P.json b/outputs/benchmark_results/mistral-embed/AlloProfClusteringP2P.json similarity index 100% rename from results/mistral-embed/AlloProfClusteringP2P.json rename to outputs/benchmark_results/mistral-embed/AlloProfClusteringP2P.json diff --git a/results/mistral-embed/AlloProfClusteringS2S.json b/outputs/benchmark_results/mistral-embed/AlloProfClusteringS2S.json similarity index 100% rename from results/mistral-embed/AlloProfClusteringS2S.json rename to outputs/benchmark_results/mistral-embed/AlloProfClusteringS2S.json diff --git a/results/mistral-embed/AlloprofReranking.json b/outputs/benchmark_results/mistral-embed/AlloprofReranking.json similarity index 100% rename from results/mistral-embed/AlloprofReranking.json rename to outputs/benchmark_results/mistral-embed/AlloprofReranking.json diff --git a/results/mistral-embed/AlloprofRetrieval.json b/outputs/benchmark_results/mistral-embed/AlloprofRetrieval.json similarity index 100% rename from results/mistral-embed/AlloprofRetrieval.json rename to outputs/benchmark_results/mistral-embed/AlloprofRetrieval.json diff --git a/results/mistral-embed/AmazonReviewsClassification.json b/outputs/benchmark_results/mistral-embed/AmazonReviewsClassification.json similarity index 100% rename from results/mistral-embed/AmazonReviewsClassification.json rename to outputs/benchmark_results/mistral-embed/AmazonReviewsClassification.json diff --git a/results/mistral-embed/BSARDRetrieval.json b/outputs/benchmark_results/mistral-embed/BSARDRetrieval.json similarity index 100% rename from results/mistral-embed/BSARDRetrieval.json rename to outputs/benchmark_results/mistral-embed/BSARDRetrieval.json diff --git a/results/mistral-embed/HALClusteringS2S.json b/outputs/benchmark_results/mistral-embed/HALClusteringS2S.json similarity index 100% rename from results/mistral-embed/HALClusteringS2S.json rename to outputs/benchmark_results/mistral-embed/HALClusteringS2S.json diff --git a/results/mistral-embed/MLSUMClusteringP2P.json b/outputs/benchmark_results/mistral-embed/MLSUMClusteringP2P.json similarity index 100% rename from results/mistral-embed/MLSUMClusteringP2P.json rename to outputs/benchmark_results/mistral-embed/MLSUMClusteringP2P.json diff --git a/results/mistral-embed/MLSUMClusteringS2S.json b/outputs/benchmark_results/mistral-embed/MLSUMClusteringS2S.json similarity index 100% rename from results/mistral-embed/MLSUMClusteringS2S.json rename to outputs/benchmark_results/mistral-embed/MLSUMClusteringS2S.json diff --git a/results/mistral-embed/MTOPDomainClassification.json b/outputs/benchmark_results/mistral-embed/MTOPDomainClassification.json similarity index 100% rename from results/mistral-embed/MTOPDomainClassification.json rename to outputs/benchmark_results/mistral-embed/MTOPDomainClassification.json diff --git a/results/mistral-embed/MTOPIntentClassification.json b/outputs/benchmark_results/mistral-embed/MTOPIntentClassification.json similarity index 100% rename from results/mistral-embed/MTOPIntentClassification.json rename to outputs/benchmark_results/mistral-embed/MTOPIntentClassification.json diff --git a/results/mistral-embed/MasakhaNEWSClassification.json b/outputs/benchmark_results/mistral-embed/MasakhaNEWSClassification.json similarity index 100% rename from results/mistral-embed/MasakhaNEWSClassification.json rename to outputs/benchmark_results/mistral-embed/MasakhaNEWSClassification.json diff --git a/results/mistral-embed/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/mistral-embed/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/mistral-embed/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/mistral-embed/MasakhaNEWSClusteringP2P.json diff --git a/results/mistral-embed/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/mistral-embed/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/mistral-embed/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/mistral-embed/MasakhaNEWSClusteringS2S.json diff --git a/results/mistral-embed/MassiveIntentClassification.json b/outputs/benchmark_results/mistral-embed/MassiveIntentClassification.json similarity index 100% rename from results/mistral-embed/MassiveIntentClassification.json rename to outputs/benchmark_results/mistral-embed/MassiveIntentClassification.json diff --git a/results/mistral-embed/MassiveScenarioClassification.json b/outputs/benchmark_results/mistral-embed/MassiveScenarioClassification.json similarity index 100% rename from results/mistral-embed/MassiveScenarioClassification.json rename to outputs/benchmark_results/mistral-embed/MassiveScenarioClassification.json diff --git a/results/mistral-embed/MintakaRetrieval.json b/outputs/benchmark_results/mistral-embed/MintakaRetrieval.json similarity index 100% rename from results/mistral-embed/MintakaRetrieval.json rename to outputs/benchmark_results/mistral-embed/MintakaRetrieval.json diff --git a/results/mistral-embed/OpusparcusPC.json b/outputs/benchmark_results/mistral-embed/OpusparcusPC.json similarity index 100% rename from results/mistral-embed/OpusparcusPC.json rename to outputs/benchmark_results/mistral-embed/OpusparcusPC.json diff --git a/results/mistral-embed/PawsX.json b/outputs/benchmark_results/mistral-embed/PawsX.json similarity index 100% rename from results/mistral-embed/PawsX.json rename to outputs/benchmark_results/mistral-embed/PawsX.json diff --git a/results/mistral-embed/SICKFr.json b/outputs/benchmark_results/mistral-embed/SICKFr.json similarity index 100% rename from results/mistral-embed/SICKFr.json rename to outputs/benchmark_results/mistral-embed/SICKFr.json diff --git a/results/mistral-embed/STS22.json b/outputs/benchmark_results/mistral-embed/STS22.json similarity index 100% rename from results/mistral-embed/STS22.json rename to outputs/benchmark_results/mistral-embed/STS22.json diff --git a/results/mistral-embed/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/mistral-embed/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/mistral-embed/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/mistral-embed/STSBenchmarkMultilingualSTS.json diff --git a/results/mistral-embed/SummEvalFr.json b/outputs/benchmark_results/mistral-embed/SummEvalFr.json similarity index 100% rename from results/mistral-embed/SummEvalFr.json rename to outputs/benchmark_results/mistral-embed/SummEvalFr.json diff --git a/results/mistral-embed/SyntecReranking.json b/outputs/benchmark_results/mistral-embed/SyntecReranking.json similarity index 100% rename from results/mistral-embed/SyntecReranking.json rename to outputs/benchmark_results/mistral-embed/SyntecReranking.json diff --git a/results/mistral-embed/SyntecRetrieval.json b/outputs/benchmark_results/mistral-embed/SyntecRetrieval.json similarity index 100% rename from results/mistral-embed/SyntecRetrieval.json rename to outputs/benchmark_results/mistral-embed/SyntecRetrieval.json diff --git a/results/mistral-embed/XPQARetrieval.json b/outputs/benchmark_results/mistral-embed/XPQARetrieval.json similarity index 100% rename from results/mistral-embed/XPQARetrieval.json rename to outputs/benchmark_results/mistral-embed/XPQARetrieval.json diff --git a/results/sentence-transformers/LaBSE/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/LaBSE/AlloProfClusteringP2P.json similarity index 100% rename from results/sentence-transformers/LaBSE/AlloProfClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/AlloProfClusteringP2P.json diff --git a/results/sentence-transformers/LaBSE/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/LaBSE/AlloProfClusteringS2S.json similarity index 100% rename from results/sentence-transformers/LaBSE/AlloProfClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/AlloProfClusteringS2S.json diff --git a/results/sentence-transformers/LaBSE/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/LaBSE/AlloprofReranking.json similarity index 100% rename from results/sentence-transformers/LaBSE/AlloprofReranking.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/AlloprofReranking.json diff --git a/results/sentence-transformers/LaBSE/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/LaBSE/AlloprofRetrieval.json similarity index 100% rename from results/sentence-transformers/LaBSE/AlloprofRetrieval.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/AlloprofRetrieval.json diff --git a/results/sentence-transformers/LaBSE/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/LaBSE/AmazonReviewsClassification.json similarity index 100% rename from results/sentence-transformers/LaBSE/AmazonReviewsClassification.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/AmazonReviewsClassification.json diff --git a/results/sentence-transformers/LaBSE/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/LaBSE/BSARDRetrieval.json similarity index 100% rename from results/sentence-transformers/LaBSE/BSARDRetrieval.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/BSARDRetrieval.json diff --git a/results/sentence-transformers/LaBSE/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/LaBSE/DiaBLaBitextMining.json similarity index 100% rename from results/sentence-transformers/LaBSE/DiaBLaBitextMining.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/DiaBLaBitextMining.json diff --git a/results/sentence-transformers/LaBSE/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/LaBSE/FloresBitextMining.json similarity index 100% rename from results/sentence-transformers/LaBSE/FloresBitextMining.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/FloresBitextMining.json diff --git a/results/sentence-transformers/LaBSE/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/LaBSE/HALClusteringS2S.json similarity index 100% rename from results/sentence-transformers/LaBSE/HALClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/HALClusteringS2S.json diff --git a/results/sentence-transformers/LaBSE/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/LaBSE/MLSUMClusteringP2P.json similarity index 100% rename from results/sentence-transformers/LaBSE/MLSUMClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/MLSUMClusteringP2P.json diff --git a/results/sentence-transformers/LaBSE/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/LaBSE/MLSUMClusteringS2S.json similarity index 100% rename from results/sentence-transformers/LaBSE/MLSUMClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/MLSUMClusteringS2S.json diff --git a/results/sentence-transformers/LaBSE/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/LaBSE/MTOPDomainClassification.json similarity index 100% rename from results/sentence-transformers/LaBSE/MTOPDomainClassification.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/MTOPDomainClassification.json diff --git a/results/sentence-transformers/LaBSE/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/LaBSE/MTOPIntentClassification.json similarity index 100% rename from results/sentence-transformers/LaBSE/MTOPIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/MTOPIntentClassification.json diff --git a/results/sentence-transformers/LaBSE/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/LaBSE/MasakhaNEWSClassification.json similarity index 100% rename from results/sentence-transformers/LaBSE/MasakhaNEWSClassification.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/MasakhaNEWSClassification.json diff --git a/results/sentence-transformers/LaBSE/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/LaBSE/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/sentence-transformers/LaBSE/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/MasakhaNEWSClusteringP2P.json diff --git a/results/sentence-transformers/LaBSE/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/LaBSE/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/sentence-transformers/LaBSE/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/MasakhaNEWSClusteringS2S.json diff --git a/results/sentence-transformers/LaBSE/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/LaBSE/MassiveIntentClassification.json similarity index 100% rename from results/sentence-transformers/LaBSE/MassiveIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/MassiveIntentClassification.json diff --git a/results/sentence-transformers/LaBSE/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/LaBSE/MassiveScenarioClassification.json similarity index 100% rename from results/sentence-transformers/LaBSE/MassiveScenarioClassification.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/MassiveScenarioClassification.json diff --git a/results/sentence-transformers/LaBSE/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/LaBSE/MintakaRetrieval.json similarity index 100% rename from results/sentence-transformers/LaBSE/MintakaRetrieval.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/MintakaRetrieval.json diff --git a/results/sentence-transformers/LaBSE/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/LaBSE/OpusparcusPC.json similarity index 100% rename from results/sentence-transformers/LaBSE/OpusparcusPC.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/OpusparcusPC.json diff --git a/results/sentence-transformers/LaBSE/PawsX.json b/outputs/benchmark_results/sentence-transformers/LaBSE/PawsX.json similarity index 100% rename from results/sentence-transformers/LaBSE/PawsX.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/PawsX.json diff --git a/results/sentence-transformers/LaBSE/SICKFr.json b/outputs/benchmark_results/sentence-transformers/LaBSE/SICKFr.json similarity index 100% rename from results/sentence-transformers/LaBSE/SICKFr.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/SICKFr.json diff --git a/results/sentence-transformers/LaBSE/STS22.json b/outputs/benchmark_results/sentence-transformers/LaBSE/STS22.json similarity index 100% rename from results/sentence-transformers/LaBSE/STS22.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/STS22.json diff --git a/results/sentence-transformers/LaBSE/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/LaBSE/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/sentence-transformers/LaBSE/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/STSBenchmarkMultilingualSTS.json diff --git a/results/sentence-transformers/LaBSE/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/LaBSE/SummEvalFr.json similarity index 100% rename from results/sentence-transformers/LaBSE/SummEvalFr.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/SummEvalFr.json diff --git a/results/sentence-transformers/LaBSE/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/LaBSE/SyntecReranking.json similarity index 100% rename from results/sentence-transformers/LaBSE/SyntecReranking.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/SyntecReranking.json diff --git a/results/sentence-transformers/LaBSE/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/LaBSE/SyntecRetrieval.json similarity index 100% rename from results/sentence-transformers/LaBSE/SyntecRetrieval.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/SyntecRetrieval.json diff --git a/results/sentence-transformers/LaBSE/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/LaBSE/XPQARetrieval.json similarity index 100% rename from results/sentence-transformers/LaBSE/XPQARetrieval.json rename to outputs/benchmark_results/sentence-transformers/LaBSE/XPQARetrieval.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/AlloProfClusteringP2P.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/AlloProfClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/AlloProfClusteringP2P.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/AlloProfClusteringS2S.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/AlloProfClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/AlloProfClusteringS2S.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/AlloprofReranking.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/AlloprofReranking.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/AlloprofReranking.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/AlloprofRetrieval.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/AlloprofRetrieval.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/AlloprofRetrieval.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/AmazonReviewsClassification.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/AmazonReviewsClassification.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/AmazonReviewsClassification.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/BSARDRetrieval.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/BSARDRetrieval.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/BSARDRetrieval.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/DiaBLaBitextMining.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/DiaBLaBitextMining.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/DiaBLaBitextMining.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/FloresBitextMining.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/FloresBitextMining.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/FloresBitextMining.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/HALClusteringS2S.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/HALClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/HALClusteringS2S.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MLSUMClusteringP2P.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/MLSUMClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MLSUMClusteringP2P.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MLSUMClusteringS2S.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/MLSUMClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MLSUMClusteringS2S.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MTOPDomainClassification.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/MTOPDomainClassification.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MTOPDomainClassification.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MTOPIntentClassification.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/MTOPIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MTOPIntentClassification.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClassification.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClassification.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClassification.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClusteringP2P.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MasakhaNEWSClusteringS2S.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MassiveIntentClassification.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/MassiveIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MassiveIntentClassification.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MassiveScenarioClassification.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/MassiveScenarioClassification.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MassiveScenarioClassification.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MintakaRetrieval.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/MintakaRetrieval.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/MintakaRetrieval.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/OpusparcusPC.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/OpusparcusPC.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/OpusparcusPC.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/PawsX.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/PawsX.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/PawsX.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/PawsX.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/SICKFr.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/SICKFr.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/SICKFr.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/SICKFr.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/STS22.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/STS22.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/STS22.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/STS22.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/STSBenchmarkMultilingualSTS.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/SummEvalFr.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/SummEvalFr.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/SummEvalFr.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/SyntecReranking.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/SyntecReranking.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/SyntecReranking.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/SyntecRetrieval.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/SyntecRetrieval.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/SyntecRetrieval.json diff --git a/results/sentence-transformers/all-MiniLM-L12-v2/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/XPQARetrieval.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L12-v2/XPQARetrieval.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L12-v2/XPQARetrieval.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/AlloProfClusteringP2P.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/AlloProfClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/AlloProfClusteringP2P.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/AlloProfClusteringS2S.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/AlloProfClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/AlloProfClusteringS2S.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/AlloprofReranking.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/AlloprofReranking.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/AlloprofReranking.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/AlloprofRetrieval.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/AlloprofRetrieval.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/AlloprofRetrieval.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/AmazonReviewsClassification.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/AmazonReviewsClassification.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/AmazonReviewsClassification.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/BSARDRetrieval.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/BSARDRetrieval.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/BSARDRetrieval.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/DiaBLaBitextMining.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/DiaBLaBitextMining.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/DiaBLaBitextMining.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/FloresBitextMining.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/FloresBitextMining.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/FloresBitextMining.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/HALClusteringS2S.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/HALClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/HALClusteringS2S.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MLSUMClusteringP2P.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/MLSUMClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MLSUMClusteringP2P.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MLSUMClusteringS2S.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/MLSUMClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MLSUMClusteringS2S.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MTOPDomainClassification.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/MTOPDomainClassification.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MTOPDomainClassification.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MTOPIntentClassification.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/MTOPIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MTOPIntentClassification.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClassification.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClassification.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClassification.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClusteringP2P.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MasakhaNEWSClusteringS2S.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MassiveIntentClassification.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/MassiveIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MassiveIntentClassification.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MassiveScenarioClassification.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/MassiveScenarioClassification.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MassiveScenarioClassification.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MintakaRetrieval.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/MintakaRetrieval.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/MintakaRetrieval.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/OpusparcusPC.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/OpusparcusPC.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/OpusparcusPC.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/PawsX.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/PawsX.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/PawsX.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/PawsX.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/SICKFr.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/SICKFr.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/SICKFr.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/SICKFr.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/STS22.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/STS22.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/STS22.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/STS22.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/STSBenchmarkMultilingualSTS.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/SummEvalFr.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/SummEvalFr.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/SummEvalFr.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/SyntecReranking.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/SyntecReranking.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/SyntecReranking.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/SyntecRetrieval.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/SyntecRetrieval.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/SyntecRetrieval.json diff --git a/results/sentence-transformers/all-MiniLM-L6-v2/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/XPQARetrieval.json similarity index 100% rename from results/sentence-transformers/all-MiniLM-L6-v2/XPQARetrieval.json rename to outputs/benchmark_results/sentence-transformers/all-MiniLM-L6-v2/XPQARetrieval.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloProfClusteringP2P.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloProfClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloProfClusteringP2P.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloProfClusteringS2S.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloProfClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloProfClusteringS2S.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloprofReranking.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloprofReranking.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloprofReranking.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloprofRetrieval.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloprofRetrieval.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/AlloprofRetrieval.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/AmazonReviewsClassification.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/AmazonReviewsClassification.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/AmazonReviewsClassification.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/BSARDRetrieval.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/BSARDRetrieval.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/BSARDRetrieval.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/DiaBLaBitextMining.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/DiaBLaBitextMining.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/DiaBLaBitextMining.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/FloresBitextMining.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/FloresBitextMining.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/FloresBitextMining.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/HALClusteringS2S.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/HALClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/HALClusteringS2S.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MLSUMClusteringP2P.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/MLSUMClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MLSUMClusteringP2P.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MLSUMClusteringS2S.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/MLSUMClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MLSUMClusteringS2S.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MTOPDomainClassification.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/MTOPDomainClassification.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MTOPDomainClassification.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MTOPIntentClassification.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/MTOPIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MTOPIntentClassification.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClassification.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClassification.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClassification.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClusteringP2P.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MasakhaNEWSClusteringS2S.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MassiveIntentClassification.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/MassiveIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MassiveIntentClassification.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MassiveScenarioClassification.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/MassiveScenarioClassification.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MassiveScenarioClassification.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MintakaRetrieval.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/MintakaRetrieval.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/MintakaRetrieval.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/OpusparcusPC.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/OpusparcusPC.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/OpusparcusPC.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/PawsX.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/PawsX.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/PawsX.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/PawsX.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/SICKFr.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/SICKFr.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/SICKFr.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/SICKFr.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/STS22.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/STS22.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/STS22.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/STS22.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/STSBenchmarkMultilingualSTS.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/SummEvalFr.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/SummEvalFr.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/SummEvalFr.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/SyntecReranking.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/SyntecReranking.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/SyntecReranking.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/SyntecRetrieval.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/SyntecRetrieval.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/SyntecRetrieval.json diff --git a/results/sentence-transformers/distiluse-base-multilingual-cased-v2/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/XPQARetrieval.json similarity index 100% rename from results/sentence-transformers/distiluse-base-multilingual-cased-v2/XPQARetrieval.json rename to outputs/benchmark_results/sentence-transformers/distiluse-base-multilingual-cased-v2/XPQARetrieval.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloProfClusteringP2P.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloProfClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloProfClusteringP2P.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloProfClusteringS2S.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloProfClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloProfClusteringS2S.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloprofReranking.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloprofReranking.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloprofReranking.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloprofRetrieval.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloprofRetrieval.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AlloprofRetrieval.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AmazonReviewsClassification.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AmazonReviewsClassification.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/AmazonReviewsClassification.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/BSARDRetrieval.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/BSARDRetrieval.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/BSARDRetrieval.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/DiaBLaBitextMining.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/DiaBLaBitextMining.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/DiaBLaBitextMining.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/FloresBitextMining.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/FloresBitextMining.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/FloresBitextMining.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/HALClusteringS2S.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/HALClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/HALClusteringS2S.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MLSUMClusteringP2P.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MLSUMClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MLSUMClusteringP2P.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MLSUMClusteringS2S.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MLSUMClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MLSUMClusteringS2S.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MTOPDomainClassification.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MTOPDomainClassification.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MTOPDomainClassification.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MTOPIntentClassification.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MTOPIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MTOPIntentClassification.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClassification.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClassification.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClassification.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClusteringP2P.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MasakhaNEWSClusteringS2S.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MassiveIntentClassification.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MassiveIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MassiveIntentClassification.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MassiveScenarioClassification.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MassiveScenarioClassification.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MassiveScenarioClassification.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MintakaRetrieval.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MintakaRetrieval.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/MintakaRetrieval.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/OpusparcusPC.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/OpusparcusPC.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/OpusparcusPC.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/PawsX.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/PawsX.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/PawsX.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/PawsX.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SICKFr.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SICKFr.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SICKFr.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SICKFr.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/STS22.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/STS22.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/STS22.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/STS22.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/STSBenchmarkMultilingualSTS.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SummEvalFr.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SummEvalFr.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SummEvalFr.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SyntecReranking.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SyntecReranking.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SyntecReranking.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SyntecRetrieval.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SyntecRetrieval.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/SyntecRetrieval.json diff --git a/results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/XPQARetrieval.json similarity index 100% rename from results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/XPQARetrieval.json rename to outputs/benchmark_results/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/XPQARetrieval.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloProfClusteringP2P.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloProfClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloProfClusteringP2P.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloProfClusteringS2S.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloProfClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloProfClusteringS2S.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloprofReranking.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloprofReranking.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloprofReranking.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloprofRetrieval.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloprofRetrieval.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AlloprofRetrieval.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AmazonReviewsClassification.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AmazonReviewsClassification.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/AmazonReviewsClassification.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/BSARDRetrieval.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/BSARDRetrieval.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/BSARDRetrieval.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/DiaBLaBitextMining.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/DiaBLaBitextMining.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/DiaBLaBitextMining.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/FloresBitextMining.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/FloresBitextMining.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/FloresBitextMining.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/HALClusteringS2S.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/HALClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/HALClusteringS2S.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MLSUMClusteringP2P.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MLSUMClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MLSUMClusteringP2P.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MLSUMClusteringS2S.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MLSUMClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MLSUMClusteringS2S.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MTOPDomainClassification.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MTOPDomainClassification.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MTOPDomainClassification.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MTOPIntentClassification.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MTOPIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MTOPIntentClassification.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClassification.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClassification.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClassification.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClusteringP2P.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MasakhaNEWSClusteringS2S.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MassiveIntentClassification.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MassiveIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MassiveIntentClassification.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MassiveScenarioClassification.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MassiveScenarioClassification.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MassiveScenarioClassification.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MintakaRetrieval.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MintakaRetrieval.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/MintakaRetrieval.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/OpusparcusPC.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/OpusparcusPC.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/OpusparcusPC.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/PawsX.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/PawsX.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/PawsX.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/PawsX.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SICKFr.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SICKFr.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SICKFr.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SICKFr.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/STS22.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/STS22.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/STS22.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/STS22.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/STSBenchmarkMultilingualSTS.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SummEvalFr.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SummEvalFr.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SummEvalFr.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SyntecReranking.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SyntecReranking.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SyntecReranking.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SyntecRetrieval.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SyntecRetrieval.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/SyntecRetrieval.json diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/XPQARetrieval.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/XPQARetrieval.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/XPQARetrieval.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloProfClusteringP2P.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloProfClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloProfClusteringP2P.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloProfClusteringS2S.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloProfClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloProfClusteringS2S.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloprofReranking.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloprofReranking.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloprofReranking.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloprofRetrieval.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloprofRetrieval.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AlloprofRetrieval.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AmazonReviewsClassification.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AmazonReviewsClassification.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AmazonReviewsClassification.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/BSARDRetrieval.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/BSARDRetrieval.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/BSARDRetrieval.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/DiaBLaBitextMining.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/DiaBLaBitextMining.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/DiaBLaBitextMining.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/FloresBitextMining.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/FloresBitextMining.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/FloresBitextMining.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/HALClusteringS2S.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/HALClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/HALClusteringS2S.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MLSUMClusteringP2P.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MLSUMClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MLSUMClusteringP2P.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MLSUMClusteringS2S.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MLSUMClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MLSUMClusteringS2S.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPDomainClassification.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPDomainClassification.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPDomainClassification.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPIntentClassification.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPIntentClassification.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClassification.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClassification.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClassification.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClusteringP2P.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MasakhaNEWSClusteringS2S.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveIntentClassification.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveIntentClassification.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveScenarioClassification.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveScenarioClassification.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveScenarioClassification.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MintakaRetrieval.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MintakaRetrieval.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MintakaRetrieval.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/OpusparcusPC.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/OpusparcusPC.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/OpusparcusPC.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/PawsX.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/PawsX.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/PawsX.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/PawsX.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SICKFr.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SICKFr.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SICKFr.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SICKFr.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STS22.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STS22.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STS22.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STS22.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STSBenchmarkMultilingualSTS.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SummEvalFr.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SummEvalFr.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SummEvalFr.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SyntecReranking.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SyntecReranking.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SyntecReranking.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SyntecRetrieval.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SyntecRetrieval.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/SyntecRetrieval.json diff --git a/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/XPQARetrieval.json similarity index 100% rename from results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/XPQARetrieval.json rename to outputs/benchmark_results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/XPQARetrieval.json diff --git a/results/sentence-transformers/sentence-t5-base/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/AlloProfClusteringP2P.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/AlloProfClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/AlloProfClusteringP2P.json diff --git a/results/sentence-transformers/sentence-t5-base/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/AlloProfClusteringS2S.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/AlloProfClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/AlloProfClusteringS2S.json diff --git a/results/sentence-transformers/sentence-t5-base/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/AlloprofReranking.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/AlloprofReranking.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/AlloprofReranking.json diff --git a/results/sentence-transformers/sentence-t5-base/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/AlloprofRetrieval.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/AlloprofRetrieval.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/AlloprofRetrieval.json diff --git a/results/sentence-transformers/sentence-t5-base/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/AmazonReviewsClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/AmazonReviewsClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/AmazonReviewsClassification.json diff --git a/results/sentence-transformers/sentence-t5-base/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/BSARDRetrieval.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/BSARDRetrieval.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/BSARDRetrieval.json diff --git a/results/sentence-transformers/sentence-t5-base/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/DiaBLaBitextMining.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/DiaBLaBitextMining.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/DiaBLaBitextMining.json diff --git a/results/sentence-transformers/sentence-t5-base/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/FloresBitextMining.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/FloresBitextMining.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/FloresBitextMining.json diff --git a/results/sentence-transformers/sentence-t5-base/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/HALClusteringS2S.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/HALClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/HALClusteringS2S.json diff --git a/results/sentence-transformers/sentence-t5-base/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/MLSUMClusteringP2P.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/MLSUMClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/MLSUMClusteringP2P.json diff --git a/results/sentence-transformers/sentence-t5-base/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/MLSUMClusteringS2S.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/MLSUMClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/MLSUMClusteringS2S.json diff --git a/results/sentence-transformers/sentence-t5-base/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/MTOPDomainClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/MTOPDomainClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/MTOPDomainClassification.json diff --git a/results/sentence-transformers/sentence-t5-base/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/MTOPIntentClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/MTOPIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/MTOPIntentClassification.json diff --git a/results/sentence-transformers/sentence-t5-base/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/MasakhaNEWSClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/MasakhaNEWSClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/MasakhaNEWSClassification.json diff --git a/results/sentence-transformers/sentence-t5-base/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/MasakhaNEWSClusteringP2P.json diff --git a/results/sentence-transformers/sentence-t5-base/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/MasakhaNEWSClusteringS2S.json diff --git a/results/sentence-transformers/sentence-t5-base/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/MassiveIntentClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/MassiveIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/MassiveIntentClassification.json diff --git a/results/sentence-transformers/sentence-t5-base/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/MassiveScenarioClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/MassiveScenarioClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/MassiveScenarioClassification.json diff --git a/results/sentence-transformers/sentence-t5-base/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/MintakaRetrieval.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/MintakaRetrieval.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/MintakaRetrieval.json diff --git a/results/sentence-transformers/sentence-t5-base/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/OpusparcusPC.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/OpusparcusPC.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/OpusparcusPC.json diff --git a/results/sentence-transformers/sentence-t5-base/PawsX.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/PawsX.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/PawsX.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/PawsX.json diff --git a/results/sentence-transformers/sentence-t5-base/SICKFr.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/SICKFr.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/SICKFr.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/SICKFr.json diff --git a/results/sentence-transformers/sentence-t5-base/STS22.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/STS22.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/STS22.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/STS22.json diff --git a/results/sentence-transformers/sentence-t5-base/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/STSBenchmarkMultilingualSTS.json diff --git a/results/sentence-transformers/sentence-t5-base/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/SummEvalFr.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/SummEvalFr.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/SummEvalFr.json diff --git a/results/sentence-transformers/sentence-t5-base/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/SyntecReranking.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/SyntecReranking.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/SyntecReranking.json diff --git a/results/sentence-transformers/sentence-t5-base/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/SyntecRetrieval.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/SyntecRetrieval.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/SyntecRetrieval.json diff --git a/results/sentence-transformers/sentence-t5-base/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-base/XPQARetrieval.json similarity index 100% rename from results/sentence-transformers/sentence-t5-base/XPQARetrieval.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-base/XPQARetrieval.json diff --git a/results/sentence-transformers/sentence-t5-large/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/AlloProfClusteringP2P.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/AlloProfClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/AlloProfClusteringP2P.json diff --git a/results/sentence-transformers/sentence-t5-large/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/AlloProfClusteringS2S.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/AlloProfClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/AlloProfClusteringS2S.json diff --git a/results/sentence-transformers/sentence-t5-large/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/AlloprofReranking.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/AlloprofReranking.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/AlloprofReranking.json diff --git a/results/sentence-transformers/sentence-t5-large/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/AlloprofRetrieval.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/AlloprofRetrieval.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/AlloprofRetrieval.json diff --git a/results/sentence-transformers/sentence-t5-large/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/AmazonReviewsClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/AmazonReviewsClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/AmazonReviewsClassification.json diff --git a/results/sentence-transformers/sentence-t5-large/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/BSARDRetrieval.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/BSARDRetrieval.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/BSARDRetrieval.json diff --git a/results/sentence-transformers/sentence-t5-large/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/DiaBLaBitextMining.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/DiaBLaBitextMining.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/DiaBLaBitextMining.json diff --git a/results/sentence-transformers/sentence-t5-large/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/FloresBitextMining.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/FloresBitextMining.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/FloresBitextMining.json diff --git a/results/sentence-transformers/sentence-t5-large/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/HALClusteringS2S.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/HALClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/HALClusteringS2S.json diff --git a/results/sentence-transformers/sentence-t5-large/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/MLSUMClusteringP2P.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/MLSUMClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/MLSUMClusteringP2P.json diff --git a/results/sentence-transformers/sentence-t5-large/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/MLSUMClusteringS2S.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/MLSUMClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/MLSUMClusteringS2S.json diff --git a/results/sentence-transformers/sentence-t5-large/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/MTOPDomainClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/MTOPDomainClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/MTOPDomainClassification.json diff --git a/results/sentence-transformers/sentence-t5-large/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/MTOPIntentClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/MTOPIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/MTOPIntentClassification.json diff --git a/results/sentence-transformers/sentence-t5-large/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/MasakhaNEWSClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/MasakhaNEWSClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/MasakhaNEWSClassification.json diff --git a/results/sentence-transformers/sentence-t5-large/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/MasakhaNEWSClusteringP2P.json diff --git a/results/sentence-transformers/sentence-t5-large/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/MasakhaNEWSClusteringS2S.json diff --git a/results/sentence-transformers/sentence-t5-large/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/MassiveIntentClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/MassiveIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/MassiveIntentClassification.json diff --git a/results/sentence-transformers/sentence-t5-large/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/MassiveScenarioClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/MassiveScenarioClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/MassiveScenarioClassification.json diff --git a/results/sentence-transformers/sentence-t5-large/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/MintakaRetrieval.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/MintakaRetrieval.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/MintakaRetrieval.json diff --git a/results/sentence-transformers/sentence-t5-large/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/OpusparcusPC.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/OpusparcusPC.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/OpusparcusPC.json diff --git a/results/sentence-transformers/sentence-t5-large/PawsX.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/PawsX.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/PawsX.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/PawsX.json diff --git a/results/sentence-transformers/sentence-t5-large/SICKFr.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/SICKFr.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/SICKFr.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/SICKFr.json diff --git a/results/sentence-transformers/sentence-t5-large/STS22.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/STS22.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/STS22.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/STS22.json diff --git a/results/sentence-transformers/sentence-t5-large/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/STSBenchmarkMultilingualSTS.json diff --git a/results/sentence-transformers/sentence-t5-large/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/SummEvalFr.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/SummEvalFr.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/SummEvalFr.json diff --git a/results/sentence-transformers/sentence-t5-large/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/SyntecReranking.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/SyntecReranking.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/SyntecReranking.json diff --git a/results/sentence-transformers/sentence-t5-large/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/SyntecRetrieval.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/SyntecRetrieval.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/SyntecRetrieval.json diff --git a/results/sentence-transformers/sentence-t5-large/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-large/XPQARetrieval.json similarity index 100% rename from results/sentence-transformers/sentence-t5-large/XPQARetrieval.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-large/XPQARetrieval.json diff --git a/results/sentence-transformers/sentence-t5-xl/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/AlloProfClusteringP2P.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/AlloProfClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/AlloProfClusteringP2P.json diff --git a/results/sentence-transformers/sentence-t5-xl/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/AlloProfClusteringS2S.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/AlloProfClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/AlloProfClusteringS2S.json diff --git a/results/sentence-transformers/sentence-t5-xl/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/AlloprofReranking.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/AlloprofReranking.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/AlloprofReranking.json diff --git a/results/sentence-transformers/sentence-t5-xl/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/AlloprofRetrieval.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/AlloprofRetrieval.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/AlloprofRetrieval.json diff --git a/results/sentence-transformers/sentence-t5-xl/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/AmazonReviewsClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/AmazonReviewsClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/AmazonReviewsClassification.json diff --git a/results/sentence-transformers/sentence-t5-xl/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/BSARDRetrieval.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/BSARDRetrieval.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/BSARDRetrieval.json diff --git a/results/sentence-transformers/sentence-t5-xl/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/DiaBLaBitextMining.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/DiaBLaBitextMining.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/DiaBLaBitextMining.json diff --git a/results/sentence-transformers/sentence-t5-xl/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/FloresBitextMining.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/FloresBitextMining.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/FloresBitextMining.json diff --git a/results/sentence-transformers/sentence-t5-xl/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/HALClusteringS2S.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/HALClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/HALClusteringS2S.json diff --git a/results/sentence-transformers/sentence-t5-xl/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MLSUMClusteringP2P.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/MLSUMClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MLSUMClusteringP2P.json diff --git a/results/sentence-transformers/sentence-t5-xl/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MLSUMClusteringS2S.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/MLSUMClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MLSUMClusteringS2S.json diff --git a/results/sentence-transformers/sentence-t5-xl/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MTOPDomainClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/MTOPDomainClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MTOPDomainClassification.json diff --git a/results/sentence-transformers/sentence-t5-xl/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MTOPIntentClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/MTOPIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MTOPIntentClassification.json diff --git a/results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClassification.json diff --git a/results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClusteringP2P.json diff --git a/results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MasakhaNEWSClusteringS2S.json diff --git a/results/sentence-transformers/sentence-t5-xl/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MassiveIntentClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/MassiveIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MassiveIntentClassification.json diff --git a/results/sentence-transformers/sentence-t5-xl/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MassiveScenarioClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/MassiveScenarioClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MassiveScenarioClassification.json diff --git a/results/sentence-transformers/sentence-t5-xl/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MintakaRetrieval.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/MintakaRetrieval.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/MintakaRetrieval.json diff --git a/results/sentence-transformers/sentence-t5-xl/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/OpusparcusPC.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/OpusparcusPC.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/OpusparcusPC.json diff --git a/results/sentence-transformers/sentence-t5-xl/PawsX.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/PawsX.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/PawsX.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/PawsX.json diff --git a/results/sentence-transformers/sentence-t5-xl/SICKFr.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/SICKFr.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/SICKFr.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/SICKFr.json diff --git a/results/sentence-transformers/sentence-t5-xl/STS22.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/STS22.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/STS22.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/STS22.json diff --git a/results/sentence-transformers/sentence-t5-xl/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/STSBenchmarkMultilingualSTS.json diff --git a/results/sentence-transformers/sentence-t5-xl/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/SummEvalFr.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/SummEvalFr.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/SummEvalFr.json diff --git a/results/sentence-transformers/sentence-t5-xl/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/SyntecReranking.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/SyntecReranking.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/SyntecReranking.json diff --git a/results/sentence-transformers/sentence-t5-xl/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/SyntecRetrieval.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/SyntecRetrieval.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/SyntecRetrieval.json diff --git a/results/sentence-transformers/sentence-t5-xl/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xl/XPQARetrieval.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xl/XPQARetrieval.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xl/XPQARetrieval.json diff --git a/results/sentence-transformers/sentence-t5-xxl/AlloProfClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/AlloProfClusteringP2P.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/AlloProfClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/AlloProfClusteringP2P.json diff --git a/results/sentence-transformers/sentence-t5-xxl/AlloProfClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/AlloProfClusteringS2S.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/AlloProfClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/AlloProfClusteringS2S.json diff --git a/results/sentence-transformers/sentence-t5-xxl/AlloprofReranking.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/AlloprofReranking.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/AlloprofReranking.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/AlloprofReranking.json diff --git a/results/sentence-transformers/sentence-t5-xxl/AlloprofRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/AlloprofRetrieval.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/AlloprofRetrieval.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/AlloprofRetrieval.json diff --git a/results/sentence-transformers/sentence-t5-xxl/AmazonReviewsClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/AmazonReviewsClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/AmazonReviewsClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/AmazonReviewsClassification.json diff --git a/results/sentence-transformers/sentence-t5-xxl/BSARDRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/BSARDRetrieval.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/BSARDRetrieval.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/BSARDRetrieval.json diff --git a/results/sentence-transformers/sentence-t5-xxl/DiaBLaBitextMining.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/DiaBLaBitextMining.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/DiaBLaBitextMining.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/DiaBLaBitextMining.json diff --git a/results/sentence-transformers/sentence-t5-xxl/FloresBitextMining.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/FloresBitextMining.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/FloresBitextMining.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/FloresBitextMining.json diff --git a/results/sentence-transformers/sentence-t5-xxl/HALClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/HALClusteringS2S.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/HALClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/HALClusteringS2S.json diff --git a/results/sentence-transformers/sentence-t5-xxl/MLSUMClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MLSUMClusteringP2P.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/MLSUMClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MLSUMClusteringP2P.json diff --git a/results/sentence-transformers/sentence-t5-xxl/MLSUMClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MLSUMClusteringS2S.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/MLSUMClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MLSUMClusteringS2S.json diff --git a/results/sentence-transformers/sentence-t5-xxl/MTOPDomainClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MTOPDomainClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/MTOPDomainClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MTOPDomainClassification.json diff --git a/results/sentence-transformers/sentence-t5-xxl/MTOPIntentClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MTOPIntentClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/MTOPIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MTOPIntentClassification.json diff --git a/results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClassification.json diff --git a/results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClusteringP2P.json diff --git a/results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MasakhaNEWSClusteringS2S.json diff --git a/results/sentence-transformers/sentence-t5-xxl/MassiveIntentClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MassiveIntentClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/MassiveIntentClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MassiveIntentClassification.json diff --git a/results/sentence-transformers/sentence-t5-xxl/MassiveScenarioClassification.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MassiveScenarioClassification.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/MassiveScenarioClassification.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MassiveScenarioClassification.json diff --git a/results/sentence-transformers/sentence-t5-xxl/MintakaRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MintakaRetrieval.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/MintakaRetrieval.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/MintakaRetrieval.json diff --git a/results/sentence-transformers/sentence-t5-xxl/OpusparcusPC.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/OpusparcusPC.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/OpusparcusPC.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/OpusparcusPC.json diff --git a/results/sentence-transformers/sentence-t5-xxl/PawsX.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/PawsX.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/PawsX.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/PawsX.json diff --git a/results/sentence-transformers/sentence-t5-xxl/SICKFr.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/SICKFr.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/SICKFr.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/SICKFr.json diff --git a/results/sentence-transformers/sentence-t5-xxl/STS22.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/STS22.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/STS22.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/STS22.json diff --git a/results/sentence-transformers/sentence-t5-xxl/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/STSBenchmarkMultilingualSTS.json diff --git a/results/sentence-transformers/sentence-t5-xxl/SummEvalFr.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/SummEvalFr.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/SummEvalFr.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/SummEvalFr.json diff --git a/results/sentence-transformers/sentence-t5-xxl/SyntecReranking.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/SyntecReranking.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/SyntecReranking.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/SyntecReranking.json diff --git a/results/sentence-transformers/sentence-t5-xxl/SyntecRetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/SyntecRetrieval.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/SyntecRetrieval.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/SyntecRetrieval.json diff --git a/results/sentence-transformers/sentence-t5-xxl/XPQARetrieval.json b/outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/XPQARetrieval.json similarity index 100% rename from results/sentence-transformers/sentence-t5-xxl/XPQARetrieval.json rename to outputs/benchmark_results/sentence-transformers/sentence-t5-xxl/XPQARetrieval.json diff --git a/results/shibing624/text2vec-base-multilingual/AlloProfClusteringP2P.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/AlloProfClusteringP2P.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/AlloProfClusteringP2P.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/AlloProfClusteringP2P.json diff --git a/results/shibing624/text2vec-base-multilingual/AlloProfClusteringS2S.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/AlloProfClusteringS2S.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/AlloProfClusteringS2S.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/AlloProfClusteringS2S.json diff --git a/results/shibing624/text2vec-base-multilingual/AlloprofReranking.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/AlloprofReranking.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/AlloprofReranking.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/AlloprofReranking.json diff --git a/results/shibing624/text2vec-base-multilingual/AlloprofRetrieval.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/AlloprofRetrieval.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/AlloprofRetrieval.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/AlloprofRetrieval.json diff --git a/results/shibing624/text2vec-base-multilingual/AmazonReviewsClassification.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/AmazonReviewsClassification.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/AmazonReviewsClassification.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/AmazonReviewsClassification.json diff --git a/results/shibing624/text2vec-base-multilingual/BSARDRetrieval.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/BSARDRetrieval.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/BSARDRetrieval.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/BSARDRetrieval.json diff --git a/results/shibing624/text2vec-base-multilingual/DiaBLaBitextMining.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/DiaBLaBitextMining.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/DiaBLaBitextMining.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/DiaBLaBitextMining.json diff --git a/results/shibing624/text2vec-base-multilingual/FloresBitextMining.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/FloresBitextMining.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/FloresBitextMining.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/FloresBitextMining.json diff --git a/results/shibing624/text2vec-base-multilingual/HALClusteringS2S.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/HALClusteringS2S.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/HALClusteringS2S.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/HALClusteringS2S.json diff --git a/results/shibing624/text2vec-base-multilingual/MLSUMClusteringP2P.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/MLSUMClusteringP2P.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/MLSUMClusteringP2P.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/MLSUMClusteringP2P.json diff --git a/results/shibing624/text2vec-base-multilingual/MLSUMClusteringS2S.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/MLSUMClusteringS2S.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/MLSUMClusteringS2S.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/MLSUMClusteringS2S.json diff --git a/results/shibing624/text2vec-base-multilingual/MTOPDomainClassification.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/MTOPDomainClassification.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/MTOPDomainClassification.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/MTOPDomainClassification.json diff --git a/results/shibing624/text2vec-base-multilingual/MTOPIntentClassification.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/MTOPIntentClassification.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/MTOPIntentClassification.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/MTOPIntentClassification.json diff --git a/results/shibing624/text2vec-base-multilingual/MasakhaNEWSClassification.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/MasakhaNEWSClassification.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/MasakhaNEWSClassification.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/MasakhaNEWSClassification.json diff --git a/results/shibing624/text2vec-base-multilingual/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/MasakhaNEWSClusteringP2P.json diff --git a/results/shibing624/text2vec-base-multilingual/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/MasakhaNEWSClusteringS2S.json diff --git a/results/shibing624/text2vec-base-multilingual/MassiveIntentClassification.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/MassiveIntentClassification.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/MassiveIntentClassification.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/MassiveIntentClassification.json diff --git a/results/shibing624/text2vec-base-multilingual/MassiveScenarioClassification.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/MassiveScenarioClassification.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/MassiveScenarioClassification.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/MassiveScenarioClassification.json diff --git a/results/shibing624/text2vec-base-multilingual/MintakaRetrieval.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/MintakaRetrieval.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/MintakaRetrieval.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/MintakaRetrieval.json diff --git a/results/shibing624/text2vec-base-multilingual/OpusparcusPC.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/OpusparcusPC.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/OpusparcusPC.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/OpusparcusPC.json diff --git a/results/shibing624/text2vec-base-multilingual/PawsX.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/PawsX.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/PawsX.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/PawsX.json diff --git a/results/shibing624/text2vec-base-multilingual/SICKFr.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/SICKFr.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/SICKFr.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/SICKFr.json diff --git a/results/shibing624/text2vec-base-multilingual/STS22.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/STS22.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/STS22.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/STS22.json diff --git a/results/shibing624/text2vec-base-multilingual/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/STSBenchmarkMultilingualSTS.json diff --git a/results/shibing624/text2vec-base-multilingual/SummEvalFr.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/SummEvalFr.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/SummEvalFr.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/SummEvalFr.json diff --git a/results/shibing624/text2vec-base-multilingual/SyntecReranking.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/SyntecReranking.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/SyntecReranking.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/SyntecReranking.json diff --git a/results/shibing624/text2vec-base-multilingual/SyntecRetrieval.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/SyntecRetrieval.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/SyntecRetrieval.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/SyntecRetrieval.json diff --git a/results/shibing624/text2vec-base-multilingual/XPQARetrieval.json b/outputs/benchmark_results/shibing624/text2vec-base-multilingual/XPQARetrieval.json similarity index 100% rename from results/shibing624/text2vec-base-multilingual/XPQARetrieval.json rename to outputs/benchmark_results/shibing624/text2vec-base-multilingual/XPQARetrieval.json diff --git a/results/text-embedding-3-large/AlloprofRetrieval.json b/outputs/benchmark_results/text-embedding-3-large/AlloprofRetrieval.json similarity index 100% rename from results/text-embedding-3-large/AlloprofRetrieval.json rename to outputs/benchmark_results/text-embedding-3-large/AlloprofRetrieval.json diff --git a/results/text-embedding-3-large/MintakaRetrieval.json b/outputs/benchmark_results/text-embedding-3-large/MintakaRetrieval.json similarity index 100% rename from results/text-embedding-3-large/MintakaRetrieval.json rename to outputs/benchmark_results/text-embedding-3-large/MintakaRetrieval.json diff --git a/results/text-embedding-3-large/SyntecRetrieval.json b/outputs/benchmark_results/text-embedding-3-large/SyntecRetrieval.json similarity index 100% rename from results/text-embedding-3-large/SyntecRetrieval.json rename to outputs/benchmark_results/text-embedding-3-large/SyntecRetrieval.json diff --git a/results/text-embedding-3-large/XPQARetrieval.json b/outputs/benchmark_results/text-embedding-3-large/XPQARetrieval.json similarity index 100% rename from results/text-embedding-3-large/XPQARetrieval.json rename to outputs/benchmark_results/text-embedding-3-large/XPQARetrieval.json diff --git a/results/text-embedding-3-small/AlloprofRetrieval.json b/outputs/benchmark_results/text-embedding-3-small/AlloprofRetrieval.json similarity index 100% rename from results/text-embedding-3-small/AlloprofRetrieval.json rename to outputs/benchmark_results/text-embedding-3-small/AlloprofRetrieval.json diff --git a/results/text-embedding-3-small/MintakaRetrieval.json b/outputs/benchmark_results/text-embedding-3-small/MintakaRetrieval.json similarity index 100% rename from results/text-embedding-3-small/MintakaRetrieval.json rename to outputs/benchmark_results/text-embedding-3-small/MintakaRetrieval.json diff --git a/results/text-embedding-3-small/SyntecRetrieval.json b/outputs/benchmark_results/text-embedding-3-small/SyntecRetrieval.json similarity index 100% rename from results/text-embedding-3-small/SyntecRetrieval.json rename to outputs/benchmark_results/text-embedding-3-small/SyntecRetrieval.json diff --git a/results/text-embedding-3-small/XPQARetrieval.json b/outputs/benchmark_results/text-embedding-3-small/XPQARetrieval.json similarity index 100% rename from results/text-embedding-3-small/XPQARetrieval.json rename to outputs/benchmark_results/text-embedding-3-small/XPQARetrieval.json diff --git a/results/text-embedding-ada-002/AlloProfClusteringP2P.json b/outputs/benchmark_results/text-embedding-ada-002/AlloProfClusteringP2P.json similarity index 100% rename from results/text-embedding-ada-002/AlloProfClusteringP2P.json rename to outputs/benchmark_results/text-embedding-ada-002/AlloProfClusteringP2P.json diff --git a/results/text-embedding-ada-002/AlloProfClusteringS2S.json b/outputs/benchmark_results/text-embedding-ada-002/AlloProfClusteringS2S.json similarity index 100% rename from results/text-embedding-ada-002/AlloProfClusteringS2S.json rename to outputs/benchmark_results/text-embedding-ada-002/AlloProfClusteringS2S.json diff --git a/results/text-embedding-ada-002/AlloprofRetrieval.json b/outputs/benchmark_results/text-embedding-ada-002/AlloprofRetrieval.json similarity index 100% rename from results/text-embedding-ada-002/AlloprofRetrieval.json rename to outputs/benchmark_results/text-embedding-ada-002/AlloprofRetrieval.json diff --git a/results/text-embedding-ada-002/AmazonReviewsClassification.json b/outputs/benchmark_results/text-embedding-ada-002/AmazonReviewsClassification.json similarity index 100% rename from results/text-embedding-ada-002/AmazonReviewsClassification.json rename to outputs/benchmark_results/text-embedding-ada-002/AmazonReviewsClassification.json diff --git a/results/text-embedding-ada-002/BSARDRetrieval.json b/outputs/benchmark_results/text-embedding-ada-002/BSARDRetrieval.json similarity index 100% rename from results/text-embedding-ada-002/BSARDRetrieval.json rename to outputs/benchmark_results/text-embedding-ada-002/BSARDRetrieval.json diff --git a/results/text-embedding-ada-002/DiaBLaBitextMining.json b/outputs/benchmark_results/text-embedding-ada-002/DiaBLaBitextMining.json similarity index 100% rename from results/text-embedding-ada-002/DiaBLaBitextMining.json rename to outputs/benchmark_results/text-embedding-ada-002/DiaBLaBitextMining.json diff --git a/results/text-embedding-ada-002/FloresBitextMining.json b/outputs/benchmark_results/text-embedding-ada-002/FloresBitextMining.json similarity index 100% rename from results/text-embedding-ada-002/FloresBitextMining.json rename to outputs/benchmark_results/text-embedding-ada-002/FloresBitextMining.json diff --git a/results/text-embedding-ada-002/HALClusteringS2S.json b/outputs/benchmark_results/text-embedding-ada-002/HALClusteringS2S.json similarity index 100% rename from results/text-embedding-ada-002/HALClusteringS2S.json rename to outputs/benchmark_results/text-embedding-ada-002/HALClusteringS2S.json diff --git a/results/text-embedding-ada-002/MLSUMClusteringP2P.json b/outputs/benchmark_results/text-embedding-ada-002/MLSUMClusteringP2P.json similarity index 100% rename from results/text-embedding-ada-002/MLSUMClusteringP2P.json rename to outputs/benchmark_results/text-embedding-ada-002/MLSUMClusteringP2P.json diff --git a/results/text-embedding-ada-002/MLSUMClusteringS2S.json b/outputs/benchmark_results/text-embedding-ada-002/MLSUMClusteringS2S.json similarity index 100% rename from results/text-embedding-ada-002/MLSUMClusteringS2S.json rename to outputs/benchmark_results/text-embedding-ada-002/MLSUMClusteringS2S.json diff --git a/results/text-embedding-ada-002/MTOPDomainClassification.json b/outputs/benchmark_results/text-embedding-ada-002/MTOPDomainClassification.json similarity index 100% rename from results/text-embedding-ada-002/MTOPDomainClassification.json rename to outputs/benchmark_results/text-embedding-ada-002/MTOPDomainClassification.json diff --git a/results/text-embedding-ada-002/MTOPIntentClassification.json b/outputs/benchmark_results/text-embedding-ada-002/MTOPIntentClassification.json similarity index 100% rename from results/text-embedding-ada-002/MTOPIntentClassification.json rename to outputs/benchmark_results/text-embedding-ada-002/MTOPIntentClassification.json diff --git a/results/text-embedding-ada-002/MasakhaNEWSClassification.json b/outputs/benchmark_results/text-embedding-ada-002/MasakhaNEWSClassification.json similarity index 100% rename from results/text-embedding-ada-002/MasakhaNEWSClassification.json rename to outputs/benchmark_results/text-embedding-ada-002/MasakhaNEWSClassification.json diff --git a/results/text-embedding-ada-002/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/text-embedding-ada-002/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/text-embedding-ada-002/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/text-embedding-ada-002/MasakhaNEWSClusteringP2P.json diff --git a/results/text-embedding-ada-002/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/text-embedding-ada-002/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/text-embedding-ada-002/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/text-embedding-ada-002/MasakhaNEWSClusteringS2S.json diff --git a/results/text-embedding-ada-002/MassiveIntentClassification.json b/outputs/benchmark_results/text-embedding-ada-002/MassiveIntentClassification.json similarity index 100% rename from results/text-embedding-ada-002/MassiveIntentClassification.json rename to outputs/benchmark_results/text-embedding-ada-002/MassiveIntentClassification.json diff --git a/results/text-embedding-ada-002/MassiveScenarioClassification.json b/outputs/benchmark_results/text-embedding-ada-002/MassiveScenarioClassification.json similarity index 100% rename from results/text-embedding-ada-002/MassiveScenarioClassification.json rename to outputs/benchmark_results/text-embedding-ada-002/MassiveScenarioClassification.json diff --git a/results/text-embedding-ada-002/MintakaRetrieval.json b/outputs/benchmark_results/text-embedding-ada-002/MintakaRetrieval.json similarity index 100% rename from results/text-embedding-ada-002/MintakaRetrieval.json rename to outputs/benchmark_results/text-embedding-ada-002/MintakaRetrieval.json diff --git a/results/text-embedding-ada-002/OpusparcusPC.json b/outputs/benchmark_results/text-embedding-ada-002/OpusparcusPC.json similarity index 100% rename from results/text-embedding-ada-002/OpusparcusPC.json rename to outputs/benchmark_results/text-embedding-ada-002/OpusparcusPC.json diff --git a/results/text-embedding-ada-002/PawsX.json b/outputs/benchmark_results/text-embedding-ada-002/PawsX.json similarity index 100% rename from results/text-embedding-ada-002/PawsX.json rename to outputs/benchmark_results/text-embedding-ada-002/PawsX.json diff --git a/results/text-embedding-ada-002/SICKFr.json b/outputs/benchmark_results/text-embedding-ada-002/SICKFr.json similarity index 100% rename from results/text-embedding-ada-002/SICKFr.json rename to outputs/benchmark_results/text-embedding-ada-002/SICKFr.json diff --git a/results/text-embedding-ada-002/STS22.json b/outputs/benchmark_results/text-embedding-ada-002/STS22.json similarity index 100% rename from results/text-embedding-ada-002/STS22.json rename to outputs/benchmark_results/text-embedding-ada-002/STS22.json diff --git a/results/text-embedding-ada-002/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/text-embedding-ada-002/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/text-embedding-ada-002/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/text-embedding-ada-002/STSBenchmarkMultilingualSTS.json diff --git a/results/text-embedding-ada-002/SummEvalFr.json b/outputs/benchmark_results/text-embedding-ada-002/SummEvalFr.json similarity index 100% rename from results/text-embedding-ada-002/SummEvalFr.json rename to outputs/benchmark_results/text-embedding-ada-002/SummEvalFr.json diff --git a/results/text-embedding-ada-002/SyntecReranking.json b/outputs/benchmark_results/text-embedding-ada-002/SyntecReranking.json similarity index 100% rename from results/text-embedding-ada-002/SyntecReranking.json rename to outputs/benchmark_results/text-embedding-ada-002/SyntecReranking.json diff --git a/results/text-embedding-ada-002/SyntecRetrieval.json b/outputs/benchmark_results/text-embedding-ada-002/SyntecRetrieval.json similarity index 100% rename from results/text-embedding-ada-002/SyntecRetrieval.json rename to outputs/benchmark_results/text-embedding-ada-002/SyntecRetrieval.json diff --git a/results/text-embedding-ada-002/XPQARetrieval.json b/outputs/benchmark_results/text-embedding-ada-002/XPQARetrieval.json similarity index 100% rename from results/text-embedding-ada-002/XPQARetrieval.json rename to outputs/benchmark_results/text-embedding-ada-002/XPQARetrieval.json diff --git a/results/voyage-2/AlloProfClusteringP2P.json b/outputs/benchmark_results/voyage-2/AlloProfClusteringP2P.json similarity index 100% rename from results/voyage-2/AlloProfClusteringP2P.json rename to outputs/benchmark_results/voyage-2/AlloProfClusteringP2P.json diff --git a/results/voyage-2/AlloProfClusteringS2S.json b/outputs/benchmark_results/voyage-2/AlloProfClusteringS2S.json similarity index 100% rename from results/voyage-2/AlloProfClusteringS2S.json rename to outputs/benchmark_results/voyage-2/AlloProfClusteringS2S.json diff --git a/results/voyage-2/AlloprofReranking.json b/outputs/benchmark_results/voyage-2/AlloprofReranking.json similarity index 100% rename from results/voyage-2/AlloprofReranking.json rename to outputs/benchmark_results/voyage-2/AlloprofReranking.json diff --git a/results/voyage-2/AlloprofRetrieval.json b/outputs/benchmark_results/voyage-2/AlloprofRetrieval.json similarity index 100% rename from results/voyage-2/AlloprofRetrieval.json rename to outputs/benchmark_results/voyage-2/AlloprofRetrieval.json diff --git a/results/voyage-2/AmazonReviewsClassification.json b/outputs/benchmark_results/voyage-2/AmazonReviewsClassification.json similarity index 100% rename from results/voyage-2/AmazonReviewsClassification.json rename to outputs/benchmark_results/voyage-2/AmazonReviewsClassification.json diff --git a/results/voyage-2/BSARDRetrieval.json b/outputs/benchmark_results/voyage-2/BSARDRetrieval.json similarity index 100% rename from results/voyage-2/BSARDRetrieval.json rename to outputs/benchmark_results/voyage-2/BSARDRetrieval.json diff --git a/results/voyage-2/DiaBLaBitextMining.json b/outputs/benchmark_results/voyage-2/DiaBLaBitextMining.json similarity index 100% rename from results/voyage-2/DiaBLaBitextMining.json rename to outputs/benchmark_results/voyage-2/DiaBLaBitextMining.json diff --git a/results/voyage-2/FloresBitextMining.json b/outputs/benchmark_results/voyage-2/FloresBitextMining.json similarity index 100% rename from results/voyage-2/FloresBitextMining.json rename to outputs/benchmark_results/voyage-2/FloresBitextMining.json diff --git a/results/voyage-2/HALClusteringS2S.json b/outputs/benchmark_results/voyage-2/HALClusteringS2S.json similarity index 100% rename from results/voyage-2/HALClusteringS2S.json rename to outputs/benchmark_results/voyage-2/HALClusteringS2S.json diff --git a/results/voyage-2/MLSUMClusteringP2P.json b/outputs/benchmark_results/voyage-2/MLSUMClusteringP2P.json similarity index 100% rename from results/voyage-2/MLSUMClusteringP2P.json rename to outputs/benchmark_results/voyage-2/MLSUMClusteringP2P.json diff --git a/results/voyage-2/MLSUMClusteringS2S.json b/outputs/benchmark_results/voyage-2/MLSUMClusteringS2S.json similarity index 100% rename from results/voyage-2/MLSUMClusteringS2S.json rename to outputs/benchmark_results/voyage-2/MLSUMClusteringS2S.json diff --git a/results/voyage-2/MTOPDomainClassification.json b/outputs/benchmark_results/voyage-2/MTOPDomainClassification.json similarity index 100% rename from results/voyage-2/MTOPDomainClassification.json rename to outputs/benchmark_results/voyage-2/MTOPDomainClassification.json diff --git a/results/voyage-2/MTOPIntentClassification.json b/outputs/benchmark_results/voyage-2/MTOPIntentClassification.json similarity index 100% rename from results/voyage-2/MTOPIntentClassification.json rename to outputs/benchmark_results/voyage-2/MTOPIntentClassification.json diff --git a/results/voyage-2/MasakhaNEWSClassification.json b/outputs/benchmark_results/voyage-2/MasakhaNEWSClassification.json similarity index 100% rename from results/voyage-2/MasakhaNEWSClassification.json rename to outputs/benchmark_results/voyage-2/MasakhaNEWSClassification.json diff --git a/results/voyage-2/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/voyage-2/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/voyage-2/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/voyage-2/MasakhaNEWSClusteringP2P.json diff --git a/results/voyage-2/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/voyage-2/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/voyage-2/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/voyage-2/MasakhaNEWSClusteringS2S.json diff --git a/results/voyage-2/MassiveIntentClassification.json b/outputs/benchmark_results/voyage-2/MassiveIntentClassification.json similarity index 100% rename from results/voyage-2/MassiveIntentClassification.json rename to outputs/benchmark_results/voyage-2/MassiveIntentClassification.json diff --git a/results/voyage-2/MassiveScenarioClassification.json b/outputs/benchmark_results/voyage-2/MassiveScenarioClassification.json similarity index 100% rename from results/voyage-2/MassiveScenarioClassification.json rename to outputs/benchmark_results/voyage-2/MassiveScenarioClassification.json diff --git a/results/voyage-2/MintakaRetrieval.json b/outputs/benchmark_results/voyage-2/MintakaRetrieval.json similarity index 100% rename from results/voyage-2/MintakaRetrieval.json rename to outputs/benchmark_results/voyage-2/MintakaRetrieval.json diff --git a/results/voyage-2/OpusparcusPC.json b/outputs/benchmark_results/voyage-2/OpusparcusPC.json similarity index 100% rename from results/voyage-2/OpusparcusPC.json rename to outputs/benchmark_results/voyage-2/OpusparcusPC.json diff --git a/results/voyage-2/PawsX.json b/outputs/benchmark_results/voyage-2/PawsX.json similarity index 100% rename from results/voyage-2/PawsX.json rename to outputs/benchmark_results/voyage-2/PawsX.json diff --git a/results/voyage-2/SICKFr.json b/outputs/benchmark_results/voyage-2/SICKFr.json similarity index 100% rename from results/voyage-2/SICKFr.json rename to outputs/benchmark_results/voyage-2/SICKFr.json diff --git a/results/voyage-2/STS22.json b/outputs/benchmark_results/voyage-2/STS22.json similarity index 100% rename from results/voyage-2/STS22.json rename to outputs/benchmark_results/voyage-2/STS22.json diff --git a/results/voyage-2/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/voyage-2/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/voyage-2/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/voyage-2/STSBenchmarkMultilingualSTS.json diff --git a/results/voyage-2/SummEvalFr.json b/outputs/benchmark_results/voyage-2/SummEvalFr.json similarity index 100% rename from results/voyage-2/SummEvalFr.json rename to outputs/benchmark_results/voyage-2/SummEvalFr.json diff --git a/results/voyage-2/SyntecReranking.json b/outputs/benchmark_results/voyage-2/SyntecReranking.json similarity index 100% rename from results/voyage-2/SyntecReranking.json rename to outputs/benchmark_results/voyage-2/SyntecReranking.json diff --git a/results/voyage-2/SyntecRetrieval.json b/outputs/benchmark_results/voyage-2/SyntecRetrieval.json similarity index 100% rename from results/voyage-2/SyntecRetrieval.json rename to outputs/benchmark_results/voyage-2/SyntecRetrieval.json diff --git a/results/voyage-2/XPQARetrieval.json b/outputs/benchmark_results/voyage-2/XPQARetrieval.json similarity index 100% rename from results/voyage-2/XPQARetrieval.json rename to outputs/benchmark_results/voyage-2/XPQARetrieval.json diff --git a/results/voyage-code-2/AlloProfClusteringP2P.json b/outputs/benchmark_results/voyage-code-2/AlloProfClusteringP2P.json similarity index 100% rename from results/voyage-code-2/AlloProfClusteringP2P.json rename to outputs/benchmark_results/voyage-code-2/AlloProfClusteringP2P.json diff --git a/results/voyage-code-2/AlloProfClusteringS2S.json b/outputs/benchmark_results/voyage-code-2/AlloProfClusteringS2S.json similarity index 100% rename from results/voyage-code-2/AlloProfClusteringS2S.json rename to outputs/benchmark_results/voyage-code-2/AlloProfClusteringS2S.json diff --git a/results/voyage-code-2/AlloprofReranking.json b/outputs/benchmark_results/voyage-code-2/AlloprofReranking.json similarity index 100% rename from results/voyage-code-2/AlloprofReranking.json rename to outputs/benchmark_results/voyage-code-2/AlloprofReranking.json diff --git a/results/voyage-code-2/AlloprofRetrieval.json b/outputs/benchmark_results/voyage-code-2/AlloprofRetrieval.json similarity index 100% rename from results/voyage-code-2/AlloprofRetrieval.json rename to outputs/benchmark_results/voyage-code-2/AlloprofRetrieval.json diff --git a/results/voyage-code-2/AmazonReviewsClassification.json b/outputs/benchmark_results/voyage-code-2/AmazonReviewsClassification.json similarity index 100% rename from results/voyage-code-2/AmazonReviewsClassification.json rename to outputs/benchmark_results/voyage-code-2/AmazonReviewsClassification.json diff --git a/results/voyage-code-2/BSARDRetrieval.json b/outputs/benchmark_results/voyage-code-2/BSARDRetrieval.json similarity index 100% rename from results/voyage-code-2/BSARDRetrieval.json rename to outputs/benchmark_results/voyage-code-2/BSARDRetrieval.json diff --git a/results/voyage-code-2/DiaBLaBitextMining.json b/outputs/benchmark_results/voyage-code-2/DiaBLaBitextMining.json similarity index 100% rename from results/voyage-code-2/DiaBLaBitextMining.json rename to outputs/benchmark_results/voyage-code-2/DiaBLaBitextMining.json diff --git a/results/voyage-code-2/FloresBitextMining.json b/outputs/benchmark_results/voyage-code-2/FloresBitextMining.json similarity index 100% rename from results/voyage-code-2/FloresBitextMining.json rename to outputs/benchmark_results/voyage-code-2/FloresBitextMining.json diff --git a/results/voyage-code-2/HALClusteringS2S.json b/outputs/benchmark_results/voyage-code-2/HALClusteringS2S.json similarity index 100% rename from results/voyage-code-2/HALClusteringS2S.json rename to outputs/benchmark_results/voyage-code-2/HALClusteringS2S.json diff --git a/results/voyage-code-2/MLSUMClusteringP2P.json b/outputs/benchmark_results/voyage-code-2/MLSUMClusteringP2P.json similarity index 100% rename from results/voyage-code-2/MLSUMClusteringP2P.json rename to outputs/benchmark_results/voyage-code-2/MLSUMClusteringP2P.json diff --git a/results/voyage-code-2/MLSUMClusteringS2S.json b/outputs/benchmark_results/voyage-code-2/MLSUMClusteringS2S.json similarity index 100% rename from results/voyage-code-2/MLSUMClusteringS2S.json rename to outputs/benchmark_results/voyage-code-2/MLSUMClusteringS2S.json diff --git a/results/voyage-code-2/MTOPDomainClassification.json b/outputs/benchmark_results/voyage-code-2/MTOPDomainClassification.json similarity index 100% rename from results/voyage-code-2/MTOPDomainClassification.json rename to outputs/benchmark_results/voyage-code-2/MTOPDomainClassification.json diff --git a/results/voyage-code-2/MTOPIntentClassification.json b/outputs/benchmark_results/voyage-code-2/MTOPIntentClassification.json similarity index 100% rename from results/voyage-code-2/MTOPIntentClassification.json rename to outputs/benchmark_results/voyage-code-2/MTOPIntentClassification.json diff --git a/results/voyage-code-2/MasakhaNEWSClassification.json b/outputs/benchmark_results/voyage-code-2/MasakhaNEWSClassification.json similarity index 100% rename from results/voyage-code-2/MasakhaNEWSClassification.json rename to outputs/benchmark_results/voyage-code-2/MasakhaNEWSClassification.json diff --git a/results/voyage-code-2/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/voyage-code-2/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/voyage-code-2/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/voyage-code-2/MasakhaNEWSClusteringP2P.json diff --git a/results/voyage-code-2/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/voyage-code-2/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/voyage-code-2/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/voyage-code-2/MasakhaNEWSClusteringS2S.json diff --git a/results/voyage-code-2/MassiveIntentClassification.json b/outputs/benchmark_results/voyage-code-2/MassiveIntentClassification.json similarity index 100% rename from results/voyage-code-2/MassiveIntentClassification.json rename to outputs/benchmark_results/voyage-code-2/MassiveIntentClassification.json diff --git a/results/voyage-code-2/MassiveScenarioClassification.json b/outputs/benchmark_results/voyage-code-2/MassiveScenarioClassification.json similarity index 100% rename from results/voyage-code-2/MassiveScenarioClassification.json rename to outputs/benchmark_results/voyage-code-2/MassiveScenarioClassification.json diff --git a/results/voyage-code-2/MintakaRetrieval.json b/outputs/benchmark_results/voyage-code-2/MintakaRetrieval.json similarity index 100% rename from results/voyage-code-2/MintakaRetrieval.json rename to outputs/benchmark_results/voyage-code-2/MintakaRetrieval.json diff --git a/results/voyage-code-2/OpusparcusPC.json b/outputs/benchmark_results/voyage-code-2/OpusparcusPC.json similarity index 100% rename from results/voyage-code-2/OpusparcusPC.json rename to outputs/benchmark_results/voyage-code-2/OpusparcusPC.json diff --git a/results/voyage-code-2/PawsX.json b/outputs/benchmark_results/voyage-code-2/PawsX.json similarity index 100% rename from results/voyage-code-2/PawsX.json rename to outputs/benchmark_results/voyage-code-2/PawsX.json diff --git a/results/voyage-code-2/SICKFr.json b/outputs/benchmark_results/voyage-code-2/SICKFr.json similarity index 100% rename from results/voyage-code-2/SICKFr.json rename to outputs/benchmark_results/voyage-code-2/SICKFr.json diff --git a/results/voyage-code-2/STS22.json b/outputs/benchmark_results/voyage-code-2/STS22.json similarity index 100% rename from results/voyage-code-2/STS22.json rename to outputs/benchmark_results/voyage-code-2/STS22.json diff --git a/results/voyage-code-2/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/voyage-code-2/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/voyage-code-2/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/voyage-code-2/STSBenchmarkMultilingualSTS.json diff --git a/results/voyage-code-2/SummEvalFr.json b/outputs/benchmark_results/voyage-code-2/SummEvalFr.json similarity index 100% rename from results/voyage-code-2/SummEvalFr.json rename to outputs/benchmark_results/voyage-code-2/SummEvalFr.json diff --git a/results/voyage-code-2/SyntecReranking.json b/outputs/benchmark_results/voyage-code-2/SyntecReranking.json similarity index 100% rename from results/voyage-code-2/SyntecReranking.json rename to outputs/benchmark_results/voyage-code-2/SyntecReranking.json diff --git a/results/voyage-code-2/SyntecRetrieval.json b/outputs/benchmark_results/voyage-code-2/SyntecRetrieval.json similarity index 100% rename from results/voyage-code-2/SyntecRetrieval.json rename to outputs/benchmark_results/voyage-code-2/SyntecRetrieval.json diff --git a/results/voyage-code-2/XPQARetrieval.json b/outputs/benchmark_results/voyage-code-2/XPQARetrieval.json similarity index 100% rename from results/voyage-code-2/XPQARetrieval.json rename to outputs/benchmark_results/voyage-code-2/XPQARetrieval.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/AlloProfClusteringP2P.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/AlloProfClusteringP2P.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/AlloProfClusteringP2P.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/AlloProfClusteringP2P.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/AlloProfClusteringS2S.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/AlloProfClusteringS2S.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/AlloProfClusteringS2S.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/AlloProfClusteringS2S.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/AlloprofReranking.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/AlloprofReranking.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/AlloprofReranking.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/AlloprofReranking.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/AlloprofRetrieval.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/AlloprofRetrieval.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/AlloprofRetrieval.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/AlloprofRetrieval.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/AmazonReviewsClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/AmazonReviewsClassification.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/AmazonReviewsClassification.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/AmazonReviewsClassification.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/BSARDRetrieval.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/BSARDRetrieval.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/BSARDRetrieval.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/BSARDRetrieval.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/DiaBLaBitextMining.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/DiaBLaBitextMining.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/DiaBLaBitextMining.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/DiaBLaBitextMining.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/FloresBitextMining.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/FloresBitextMining.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/FloresBitextMining.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/FloresBitextMining.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/HALClusteringS2S.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/HALClusteringS2S.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/HALClusteringS2S.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/HALClusteringS2S.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/MLSUMClusteringP2P.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MLSUMClusteringP2P.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/MLSUMClusteringP2P.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MLSUMClusteringP2P.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/MLSUMClusteringS2S.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MLSUMClusteringS2S.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/MLSUMClusteringS2S.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MLSUMClusteringS2S.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/MTOPDomainClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MTOPDomainClassification.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/MTOPDomainClassification.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MTOPDomainClassification.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/MTOPIntentClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MTOPIntentClassification.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/MTOPIntentClassification.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MTOPIntentClassification.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClassification.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClassification.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClassification.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClusteringP2P.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MasakhaNEWSClusteringS2S.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/MassiveIntentClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MassiveIntentClassification.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/MassiveIntentClassification.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MassiveIntentClassification.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/MassiveScenarioClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MassiveScenarioClassification.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/MassiveScenarioClassification.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MassiveScenarioClassification.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/MintakaRetrieval.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MintakaRetrieval.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/MintakaRetrieval.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/MintakaRetrieval.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/OpusparcusPC.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/OpusparcusPC.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/OpusparcusPC.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/OpusparcusPC.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/PawsX.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/PawsX.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/PawsX.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/PawsX.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/SICKFr.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/SICKFr.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/SICKFr.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/SICKFr.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/STS22.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/STS22.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/STS22.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/STS22.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/STSBenchmarkMultilingualSTS.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/SummEvalFr.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/SummEvalFr.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/SummEvalFr.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/SummEvalFr.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/SyntecReranking.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/SyntecReranking.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/SyntecReranking.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/SyntecReranking.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/SyntecRetrieval.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/SyntecRetrieval.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/SyntecRetrieval.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/SyntecRetrieval.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-3/XPQARetrieval.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/XPQARetrieval.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-3/XPQARetrieval.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-3/XPQARetrieval.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloProfClusteringP2P.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloProfClusteringP2P.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloProfClusteringP2P.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloProfClusteringP2P.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloProfClusteringS2S.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloProfClusteringS2S.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloProfClusteringS2S.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloProfClusteringS2S.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloprofReranking.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloprofReranking.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloprofReranking.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloprofReranking.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloprofRetrieval.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloprofRetrieval.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloprofRetrieval.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/AlloprofRetrieval.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/AmazonReviewsClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/AmazonReviewsClassification.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/AmazonReviewsClassification.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/AmazonReviewsClassification.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/BSARDRetrieval.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/BSARDRetrieval.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/BSARDRetrieval.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/BSARDRetrieval.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/DiaBLaBitextMining.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/DiaBLaBitextMining.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/DiaBLaBitextMining.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/DiaBLaBitextMining.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/FloresBitextMining.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/FloresBitextMining.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/FloresBitextMining.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/FloresBitextMining.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/HALClusteringS2S.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/HALClusteringS2S.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/HALClusteringS2S.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/HALClusteringS2S.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/MLSUMClusteringP2P.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MLSUMClusteringP2P.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/MLSUMClusteringP2P.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MLSUMClusteringP2P.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/MLSUMClusteringS2S.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MLSUMClusteringS2S.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/MLSUMClusteringS2S.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MLSUMClusteringS2S.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/MTOPDomainClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MTOPDomainClassification.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/MTOPDomainClassification.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MTOPDomainClassification.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/MTOPIntentClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MTOPIntentClassification.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/MTOPIntentClassification.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MTOPIntentClassification.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClassification.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClassification.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClassification.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClusteringP2P.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MasakhaNEWSClusteringS2S.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/MassiveIntentClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MassiveIntentClassification.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/MassiveIntentClassification.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MassiveIntentClassification.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/MassiveScenarioClassification.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MassiveScenarioClassification.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/MassiveScenarioClassification.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MassiveScenarioClassification.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/MintakaRetrieval.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MintakaRetrieval.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/MintakaRetrieval.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/MintakaRetrieval.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/OpusparcusPC.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/OpusparcusPC.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/OpusparcusPC.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/OpusparcusPC.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/PawsX.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/PawsX.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/PawsX.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/PawsX.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/SICKFr.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/SICKFr.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/SICKFr.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/SICKFr.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/STS22.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/STS22.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/STS22.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/STS22.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/STSBenchmarkMultilingualSTS.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/SummEvalFr.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/SummEvalFr.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/SummEvalFr.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/SummEvalFr.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/SyntecReranking.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/SyntecReranking.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/SyntecReranking.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/SyntecReranking.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/SyntecRetrieval.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/SyntecRetrieval.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/SyntecRetrieval.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/SyntecRetrieval.json diff --git a/results/vprelovac/universal-sentence-encoder-multilingual-large-3/XPQARetrieval.json b/outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/XPQARetrieval.json similarity index 100% rename from results/vprelovac/universal-sentence-encoder-multilingual-large-3/XPQARetrieval.json rename to outputs/benchmark_results/vprelovac/universal-sentence-encoder-multilingual-large-3/XPQARetrieval.json diff --git a/results/xlm-roberta-base/AlloProfClusteringP2P.json b/outputs/benchmark_results/xlm-roberta-base/AlloProfClusteringP2P.json similarity index 100% rename from results/xlm-roberta-base/AlloProfClusteringP2P.json rename to outputs/benchmark_results/xlm-roberta-base/AlloProfClusteringP2P.json diff --git a/results/xlm-roberta-base/AlloProfClusteringS2S.json b/outputs/benchmark_results/xlm-roberta-base/AlloProfClusteringS2S.json similarity index 100% rename from results/xlm-roberta-base/AlloProfClusteringS2S.json rename to outputs/benchmark_results/xlm-roberta-base/AlloProfClusteringS2S.json diff --git a/results/xlm-roberta-base/AlloprofReranking.json b/outputs/benchmark_results/xlm-roberta-base/AlloprofReranking.json similarity index 100% rename from results/xlm-roberta-base/AlloprofReranking.json rename to outputs/benchmark_results/xlm-roberta-base/AlloprofReranking.json diff --git a/results/xlm-roberta-base/AlloprofRetrieval.json b/outputs/benchmark_results/xlm-roberta-base/AlloprofRetrieval.json similarity index 100% rename from results/xlm-roberta-base/AlloprofRetrieval.json rename to outputs/benchmark_results/xlm-roberta-base/AlloprofRetrieval.json diff --git a/results/xlm-roberta-base/AmazonReviewsClassification.json b/outputs/benchmark_results/xlm-roberta-base/AmazonReviewsClassification.json similarity index 100% rename from results/xlm-roberta-base/AmazonReviewsClassification.json rename to outputs/benchmark_results/xlm-roberta-base/AmazonReviewsClassification.json diff --git a/results/xlm-roberta-base/BSARDRetrieval.json b/outputs/benchmark_results/xlm-roberta-base/BSARDRetrieval.json similarity index 100% rename from results/xlm-roberta-base/BSARDRetrieval.json rename to outputs/benchmark_results/xlm-roberta-base/BSARDRetrieval.json diff --git a/results/xlm-roberta-base/DiaBLaBitextMining.json b/outputs/benchmark_results/xlm-roberta-base/DiaBLaBitextMining.json similarity index 100% rename from results/xlm-roberta-base/DiaBLaBitextMining.json rename to outputs/benchmark_results/xlm-roberta-base/DiaBLaBitextMining.json diff --git a/results/xlm-roberta-base/FloresBitextMining.json b/outputs/benchmark_results/xlm-roberta-base/FloresBitextMining.json similarity index 100% rename from results/xlm-roberta-base/FloresBitextMining.json rename to outputs/benchmark_results/xlm-roberta-base/FloresBitextMining.json diff --git a/results/xlm-roberta-base/HALClusteringS2S.json b/outputs/benchmark_results/xlm-roberta-base/HALClusteringS2S.json similarity index 100% rename from results/xlm-roberta-base/HALClusteringS2S.json rename to outputs/benchmark_results/xlm-roberta-base/HALClusteringS2S.json diff --git a/results/xlm-roberta-base/MLSUMClusteringP2P.json b/outputs/benchmark_results/xlm-roberta-base/MLSUMClusteringP2P.json similarity index 100% rename from results/xlm-roberta-base/MLSUMClusteringP2P.json rename to outputs/benchmark_results/xlm-roberta-base/MLSUMClusteringP2P.json diff --git a/results/xlm-roberta-base/MLSUMClusteringS2S.json b/outputs/benchmark_results/xlm-roberta-base/MLSUMClusteringS2S.json similarity index 100% rename from results/xlm-roberta-base/MLSUMClusteringS2S.json rename to outputs/benchmark_results/xlm-roberta-base/MLSUMClusteringS2S.json diff --git a/results/xlm-roberta-base/MTOPDomainClassification.json b/outputs/benchmark_results/xlm-roberta-base/MTOPDomainClassification.json similarity index 100% rename from results/xlm-roberta-base/MTOPDomainClassification.json rename to outputs/benchmark_results/xlm-roberta-base/MTOPDomainClassification.json diff --git a/results/xlm-roberta-base/MTOPIntentClassification.json b/outputs/benchmark_results/xlm-roberta-base/MTOPIntentClassification.json similarity index 100% rename from results/xlm-roberta-base/MTOPIntentClassification.json rename to outputs/benchmark_results/xlm-roberta-base/MTOPIntentClassification.json diff --git a/results/xlm-roberta-base/MasakhaNEWSClassification.json b/outputs/benchmark_results/xlm-roberta-base/MasakhaNEWSClassification.json similarity index 100% rename from results/xlm-roberta-base/MasakhaNEWSClassification.json rename to outputs/benchmark_results/xlm-roberta-base/MasakhaNEWSClassification.json diff --git a/results/xlm-roberta-base/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/xlm-roberta-base/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/xlm-roberta-base/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/xlm-roberta-base/MasakhaNEWSClusteringP2P.json diff --git a/results/xlm-roberta-base/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/xlm-roberta-base/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/xlm-roberta-base/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/xlm-roberta-base/MasakhaNEWSClusteringS2S.json diff --git a/results/xlm-roberta-base/MassiveIntentClassification.json b/outputs/benchmark_results/xlm-roberta-base/MassiveIntentClassification.json similarity index 100% rename from results/xlm-roberta-base/MassiveIntentClassification.json rename to outputs/benchmark_results/xlm-roberta-base/MassiveIntentClassification.json diff --git a/results/xlm-roberta-base/MassiveScenarioClassification.json b/outputs/benchmark_results/xlm-roberta-base/MassiveScenarioClassification.json similarity index 100% rename from results/xlm-roberta-base/MassiveScenarioClassification.json rename to outputs/benchmark_results/xlm-roberta-base/MassiveScenarioClassification.json diff --git a/results/xlm-roberta-base/MintakaRetrieval.json b/outputs/benchmark_results/xlm-roberta-base/MintakaRetrieval.json similarity index 100% rename from results/xlm-roberta-base/MintakaRetrieval.json rename to outputs/benchmark_results/xlm-roberta-base/MintakaRetrieval.json diff --git a/results/xlm-roberta-base/OpusparcusPC.json b/outputs/benchmark_results/xlm-roberta-base/OpusparcusPC.json similarity index 100% rename from results/xlm-roberta-base/OpusparcusPC.json rename to outputs/benchmark_results/xlm-roberta-base/OpusparcusPC.json diff --git a/results/xlm-roberta-base/PawsX.json b/outputs/benchmark_results/xlm-roberta-base/PawsX.json similarity index 100% rename from results/xlm-roberta-base/PawsX.json rename to outputs/benchmark_results/xlm-roberta-base/PawsX.json diff --git a/results/xlm-roberta-base/SICKFr.json b/outputs/benchmark_results/xlm-roberta-base/SICKFr.json similarity index 100% rename from results/xlm-roberta-base/SICKFr.json rename to outputs/benchmark_results/xlm-roberta-base/SICKFr.json diff --git a/results/xlm-roberta-base/STS22.json b/outputs/benchmark_results/xlm-roberta-base/STS22.json similarity index 100% rename from results/xlm-roberta-base/STS22.json rename to outputs/benchmark_results/xlm-roberta-base/STS22.json diff --git a/results/xlm-roberta-base/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/xlm-roberta-base/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/xlm-roberta-base/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/xlm-roberta-base/STSBenchmarkMultilingualSTS.json diff --git a/results/xlm-roberta-base/SummEvalFr.json b/outputs/benchmark_results/xlm-roberta-base/SummEvalFr.json similarity index 100% rename from results/xlm-roberta-base/SummEvalFr.json rename to outputs/benchmark_results/xlm-roberta-base/SummEvalFr.json diff --git a/results/xlm-roberta-base/SyntecReranking.json b/outputs/benchmark_results/xlm-roberta-base/SyntecReranking.json similarity index 100% rename from results/xlm-roberta-base/SyntecReranking.json rename to outputs/benchmark_results/xlm-roberta-base/SyntecReranking.json diff --git a/results/xlm-roberta-base/SyntecRetrieval.json b/outputs/benchmark_results/xlm-roberta-base/SyntecRetrieval.json similarity index 100% rename from results/xlm-roberta-base/SyntecRetrieval.json rename to outputs/benchmark_results/xlm-roberta-base/SyntecRetrieval.json diff --git a/results/xlm-roberta-base/XPQARetrieval.json b/outputs/benchmark_results/xlm-roberta-base/XPQARetrieval.json similarity index 100% rename from results/xlm-roberta-base/XPQARetrieval.json rename to outputs/benchmark_results/xlm-roberta-base/XPQARetrieval.json diff --git a/results/xlm-roberta-large/AlloProfClusteringP2P.json b/outputs/benchmark_results/xlm-roberta-large/AlloProfClusteringP2P.json similarity index 100% rename from results/xlm-roberta-large/AlloProfClusteringP2P.json rename to outputs/benchmark_results/xlm-roberta-large/AlloProfClusteringP2P.json diff --git a/results/xlm-roberta-large/AlloProfClusteringS2S.json b/outputs/benchmark_results/xlm-roberta-large/AlloProfClusteringS2S.json similarity index 100% rename from results/xlm-roberta-large/AlloProfClusteringS2S.json rename to outputs/benchmark_results/xlm-roberta-large/AlloProfClusteringS2S.json diff --git a/results/xlm-roberta-large/AlloprofReranking.json b/outputs/benchmark_results/xlm-roberta-large/AlloprofReranking.json similarity index 100% rename from results/xlm-roberta-large/AlloprofReranking.json rename to outputs/benchmark_results/xlm-roberta-large/AlloprofReranking.json diff --git a/results/xlm-roberta-large/AlloprofRetrieval.json b/outputs/benchmark_results/xlm-roberta-large/AlloprofRetrieval.json similarity index 100% rename from results/xlm-roberta-large/AlloprofRetrieval.json rename to outputs/benchmark_results/xlm-roberta-large/AlloprofRetrieval.json diff --git a/results/xlm-roberta-large/AmazonReviewsClassification.json b/outputs/benchmark_results/xlm-roberta-large/AmazonReviewsClassification.json similarity index 100% rename from results/xlm-roberta-large/AmazonReviewsClassification.json rename to outputs/benchmark_results/xlm-roberta-large/AmazonReviewsClassification.json diff --git a/results/xlm-roberta-large/BSARDRetrieval.json b/outputs/benchmark_results/xlm-roberta-large/BSARDRetrieval.json similarity index 100% rename from results/xlm-roberta-large/BSARDRetrieval.json rename to outputs/benchmark_results/xlm-roberta-large/BSARDRetrieval.json diff --git a/results/xlm-roberta-large/DiaBLaBitextMining.json b/outputs/benchmark_results/xlm-roberta-large/DiaBLaBitextMining.json similarity index 100% rename from results/xlm-roberta-large/DiaBLaBitextMining.json rename to outputs/benchmark_results/xlm-roberta-large/DiaBLaBitextMining.json diff --git a/results/xlm-roberta-large/FloresBitextMining.json b/outputs/benchmark_results/xlm-roberta-large/FloresBitextMining.json similarity index 100% rename from results/xlm-roberta-large/FloresBitextMining.json rename to outputs/benchmark_results/xlm-roberta-large/FloresBitextMining.json diff --git a/results/xlm-roberta-large/HALClusteringS2S.json b/outputs/benchmark_results/xlm-roberta-large/HALClusteringS2S.json similarity index 100% rename from results/xlm-roberta-large/HALClusteringS2S.json rename to outputs/benchmark_results/xlm-roberta-large/HALClusteringS2S.json diff --git a/results/xlm-roberta-large/MLSUMClusteringP2P.json b/outputs/benchmark_results/xlm-roberta-large/MLSUMClusteringP2P.json similarity index 100% rename from results/xlm-roberta-large/MLSUMClusteringP2P.json rename to outputs/benchmark_results/xlm-roberta-large/MLSUMClusteringP2P.json diff --git a/results/xlm-roberta-large/MLSUMClusteringS2S.json b/outputs/benchmark_results/xlm-roberta-large/MLSUMClusteringS2S.json similarity index 100% rename from results/xlm-roberta-large/MLSUMClusteringS2S.json rename to outputs/benchmark_results/xlm-roberta-large/MLSUMClusteringS2S.json diff --git a/results/xlm-roberta-large/MTOPDomainClassification.json b/outputs/benchmark_results/xlm-roberta-large/MTOPDomainClassification.json similarity index 100% rename from results/xlm-roberta-large/MTOPDomainClassification.json rename to outputs/benchmark_results/xlm-roberta-large/MTOPDomainClassification.json diff --git a/results/xlm-roberta-large/MTOPIntentClassification.json b/outputs/benchmark_results/xlm-roberta-large/MTOPIntentClassification.json similarity index 100% rename from results/xlm-roberta-large/MTOPIntentClassification.json rename to outputs/benchmark_results/xlm-roberta-large/MTOPIntentClassification.json diff --git a/results/xlm-roberta-large/MasakhaNEWSClassification.json b/outputs/benchmark_results/xlm-roberta-large/MasakhaNEWSClassification.json similarity index 100% rename from results/xlm-roberta-large/MasakhaNEWSClassification.json rename to outputs/benchmark_results/xlm-roberta-large/MasakhaNEWSClassification.json diff --git a/results/xlm-roberta-large/MasakhaNEWSClusteringP2P.json b/outputs/benchmark_results/xlm-roberta-large/MasakhaNEWSClusteringP2P.json similarity index 100% rename from results/xlm-roberta-large/MasakhaNEWSClusteringP2P.json rename to outputs/benchmark_results/xlm-roberta-large/MasakhaNEWSClusteringP2P.json diff --git a/results/xlm-roberta-large/MasakhaNEWSClusteringS2S.json b/outputs/benchmark_results/xlm-roberta-large/MasakhaNEWSClusteringS2S.json similarity index 100% rename from results/xlm-roberta-large/MasakhaNEWSClusteringS2S.json rename to outputs/benchmark_results/xlm-roberta-large/MasakhaNEWSClusteringS2S.json diff --git a/results/xlm-roberta-large/MassiveIntentClassification.json b/outputs/benchmark_results/xlm-roberta-large/MassiveIntentClassification.json similarity index 100% rename from results/xlm-roberta-large/MassiveIntentClassification.json rename to outputs/benchmark_results/xlm-roberta-large/MassiveIntentClassification.json diff --git a/results/xlm-roberta-large/MassiveScenarioClassification.json b/outputs/benchmark_results/xlm-roberta-large/MassiveScenarioClassification.json similarity index 100% rename from results/xlm-roberta-large/MassiveScenarioClassification.json rename to outputs/benchmark_results/xlm-roberta-large/MassiveScenarioClassification.json diff --git a/results/xlm-roberta-large/MintakaRetrieval.json b/outputs/benchmark_results/xlm-roberta-large/MintakaRetrieval.json similarity index 100% rename from results/xlm-roberta-large/MintakaRetrieval.json rename to outputs/benchmark_results/xlm-roberta-large/MintakaRetrieval.json diff --git a/results/xlm-roberta-large/OpusparcusPC.json b/outputs/benchmark_results/xlm-roberta-large/OpusparcusPC.json similarity index 100% rename from results/xlm-roberta-large/OpusparcusPC.json rename to outputs/benchmark_results/xlm-roberta-large/OpusparcusPC.json diff --git a/results/xlm-roberta-large/PawsX.json b/outputs/benchmark_results/xlm-roberta-large/PawsX.json similarity index 100% rename from results/xlm-roberta-large/PawsX.json rename to outputs/benchmark_results/xlm-roberta-large/PawsX.json diff --git a/results/xlm-roberta-large/SICKFr.json b/outputs/benchmark_results/xlm-roberta-large/SICKFr.json similarity index 100% rename from results/xlm-roberta-large/SICKFr.json rename to outputs/benchmark_results/xlm-roberta-large/SICKFr.json diff --git a/results/xlm-roberta-large/STS22.json b/outputs/benchmark_results/xlm-roberta-large/STS22.json similarity index 100% rename from results/xlm-roberta-large/STS22.json rename to outputs/benchmark_results/xlm-roberta-large/STS22.json diff --git a/results/xlm-roberta-large/STSBenchmarkMultilingualSTS.json b/outputs/benchmark_results/xlm-roberta-large/STSBenchmarkMultilingualSTS.json similarity index 100% rename from results/xlm-roberta-large/STSBenchmarkMultilingualSTS.json rename to outputs/benchmark_results/xlm-roberta-large/STSBenchmarkMultilingualSTS.json diff --git a/results/xlm-roberta-large/SummEvalFr.json b/outputs/benchmark_results/xlm-roberta-large/SummEvalFr.json similarity index 100% rename from results/xlm-roberta-large/SummEvalFr.json rename to outputs/benchmark_results/xlm-roberta-large/SummEvalFr.json diff --git a/results/xlm-roberta-large/SyntecReranking.json b/outputs/benchmark_results/xlm-roberta-large/SyntecReranking.json similarity index 100% rename from results/xlm-roberta-large/SyntecReranking.json rename to outputs/benchmark_results/xlm-roberta-large/SyntecReranking.json diff --git a/results/xlm-roberta-large/SyntecRetrieval.json b/outputs/benchmark_results/xlm-roberta-large/SyntecRetrieval.json similarity index 100% rename from results/xlm-roberta-large/SyntecRetrieval.json rename to outputs/benchmark_results/xlm-roberta-large/SyntecRetrieval.json diff --git a/results/xlm-roberta-large/XPQARetrieval.json b/outputs/benchmark_results/xlm-roberta-large/XPQARetrieval.json similarity index 100% rename from results/xlm-roberta-large/XPQARetrieval.json rename to outputs/benchmark_results/xlm-roberta-large/XPQARetrieval.json diff --git a/cost_estimation.json b/outputs/models_analysis/cost_estimation.json similarity index 100% rename from cost_estimation.json rename to outputs/models_analysis/cost_estimation.json diff --git a/script_mteb_french/results_analysis/model_specs.json b/outputs/models_analysis/model_specs.json similarity index 100% rename from script_mteb_french/results_analysis/model_specs.json rename to outputs/models_analysis/model_specs.json diff --git a/analyses_outputs/datasets_similarity/PCA_components_all.pdf b/outputs/results_analysis/datasets_similarity/PCA_components_all.pdf similarity index 100% rename from analyses_outputs/datasets_similarity/PCA_components_all.pdf rename to outputs/results_analysis/datasets_similarity/PCA_components_all.pdf diff --git a/analyses_outputs/datasets_similarity/PCA_components_all.png b/outputs/results_analysis/datasets_similarity/PCA_components_all.png similarity index 100% rename from analyses_outputs/datasets_similarity/PCA_components_all.png rename to outputs/results_analysis/datasets_similarity/PCA_components_all.png diff --git a/analyses_outputs/datasets_similarity/PCA_components_all.svg b/outputs/results_analysis/datasets_similarity/PCA_components_all.svg similarity index 100% rename from analyses_outputs/datasets_similarity/PCA_components_all.svg rename to outputs/results_analysis/datasets_similarity/PCA_components_all.svg diff --git a/analyses_outputs/datasets_similarity/PCA_components_all_with names.png b/outputs/results_analysis/datasets_similarity/PCA_components_all_with names.png similarity index 100% rename from analyses_outputs/datasets_similarity/PCA_components_all_with names.png rename to outputs/results_analysis/datasets_similarity/PCA_components_all_with names.png diff --git a/analyses_outputs/datasets_similarity/PCA_components_classification.png b/outputs/results_analysis/datasets_similarity/PCA_components_classification.png similarity index 100% rename from analyses_outputs/datasets_similarity/PCA_components_classification.png rename to outputs/results_analysis/datasets_similarity/PCA_components_classification.png diff --git a/analyses_outputs/datasets_similarity/PCA_components_clustering.png b/outputs/results_analysis/datasets_similarity/PCA_components_clustering.png similarity index 100% rename from analyses_outputs/datasets_similarity/PCA_components_clustering.png rename to outputs/results_analysis/datasets_similarity/PCA_components_clustering.png diff --git a/analyses_outputs/datasets_similarity/PCA_components_pairclassification.png b/outputs/results_analysis/datasets_similarity/PCA_components_pairclassification.png similarity index 100% rename from analyses_outputs/datasets_similarity/PCA_components_pairclassification.png rename to outputs/results_analysis/datasets_similarity/PCA_components_pairclassification.png diff --git a/analyses_outputs/datasets_similarity/PCA_components_reranking.png b/outputs/results_analysis/datasets_similarity/PCA_components_reranking.png similarity index 100% rename from analyses_outputs/datasets_similarity/PCA_components_reranking.png rename to outputs/results_analysis/datasets_similarity/PCA_components_reranking.png diff --git a/analyses_outputs/datasets_similarity/PCA_components_retrieval.pdf b/outputs/results_analysis/datasets_similarity/PCA_components_retrieval.pdf similarity index 100% rename from analyses_outputs/datasets_similarity/PCA_components_retrieval.pdf rename to outputs/results_analysis/datasets_similarity/PCA_components_retrieval.pdf diff --git a/analyses_outputs/datasets_similarity/PCA_components_retrieval.png b/outputs/results_analysis/datasets_similarity/PCA_components_retrieval.png similarity index 100% rename from analyses_outputs/datasets_similarity/PCA_components_retrieval.png rename to outputs/results_analysis/datasets_similarity/PCA_components_retrieval.png diff --git a/analyses_outputs/datasets_similarity/PCA_components_sts.png b/outputs/results_analysis/datasets_similarity/PCA_components_sts.png similarity index 100% rename from analyses_outputs/datasets_similarity/PCA_components_sts.png rename to outputs/results_analysis/datasets_similarity/PCA_components_sts.png diff --git a/analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_all.png b/outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_all.png similarity index 100% rename from analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_all.png rename to outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_all.png diff --git a/analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_classification.png b/outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_classification.png similarity index 100% rename from analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_classification.png rename to outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_classification.png diff --git a/analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_clustering.png b/outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_clustering.png similarity index 100% rename from analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_clustering.png rename to outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_clustering.png diff --git a/analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_pairclassification.png b/outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_pairclassification.png similarity index 100% rename from analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_pairclassification.png rename to outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_pairclassification.png diff --git a/analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_reranking.png b/outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_reranking.png similarity index 100% rename from analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_reranking.png rename to outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_reranking.png diff --git a/analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_retrieval.png b/outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_retrieval.png similarity index 100% rename from analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_retrieval.png rename to outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_retrieval.png diff --git a/analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_sts.png b/outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_sts.png similarity index 100% rename from analyses_outputs/datasets_similarity/PCA_explained_variance_ratio_sts.png rename to outputs/results_analysis/datasets_similarity/PCA_explained_variance_ratio_sts.png diff --git a/analyses_outputs/datasets_similarity/cosim_all.pdf b/outputs/results_analysis/datasets_similarity/cosim_all.pdf similarity index 100% rename from analyses_outputs/datasets_similarity/cosim_all.pdf rename to outputs/results_analysis/datasets_similarity/cosim_all.pdf diff --git a/analyses_outputs/datasets_similarity/cosim_all.png b/outputs/results_analysis/datasets_similarity/cosim_all.png similarity index 100% rename from analyses_outputs/datasets_similarity/cosim_all.png rename to outputs/results_analysis/datasets_similarity/cosim_all.png diff --git a/analyses_outputs/datasets_similarity/cosim_classification.png b/outputs/results_analysis/datasets_similarity/cosim_classification.png similarity index 100% rename from analyses_outputs/datasets_similarity/cosim_classification.png rename to outputs/results_analysis/datasets_similarity/cosim_classification.png diff --git a/analyses_outputs/datasets_similarity/cosim_clustering.png b/outputs/results_analysis/datasets_similarity/cosim_clustering.png similarity index 100% rename from analyses_outputs/datasets_similarity/cosim_clustering.png rename to outputs/results_analysis/datasets_similarity/cosim_clustering.png diff --git a/analyses_outputs/datasets_similarity/cosim_pairclassification.png b/outputs/results_analysis/datasets_similarity/cosim_pairclassification.png similarity index 100% rename from analyses_outputs/datasets_similarity/cosim_pairclassification.png rename to outputs/results_analysis/datasets_similarity/cosim_pairclassification.png diff --git a/analyses_outputs/datasets_similarity/cosim_reranking.png b/outputs/results_analysis/datasets_similarity/cosim_reranking.png similarity index 100% rename from analyses_outputs/datasets_similarity/cosim_reranking.png rename to outputs/results_analysis/datasets_similarity/cosim_reranking.png diff --git a/analyses_outputs/datasets_similarity/cosim_retrieval.pdf b/outputs/results_analysis/datasets_similarity/cosim_retrieval.pdf similarity index 100% rename from analyses_outputs/datasets_similarity/cosim_retrieval.pdf rename to outputs/results_analysis/datasets_similarity/cosim_retrieval.pdf diff --git a/analyses_outputs/datasets_similarity/cosim_retrieval.png b/outputs/results_analysis/datasets_similarity/cosim_retrieval.png similarity index 100% rename from analyses_outputs/datasets_similarity/cosim_retrieval.png rename to outputs/results_analysis/datasets_similarity/cosim_retrieval.png diff --git a/analyses_outputs/datasets_similarity/cosim_sts.png b/outputs/results_analysis/datasets_similarity/cosim_sts.png similarity index 100% rename from analyses_outputs/datasets_similarity/cosim_sts.png rename to outputs/results_analysis/datasets_similarity/cosim_sts.png diff --git a/analyses_outputs/performance_vs_characteristics/correlation_heatmap.pdf b/outputs/results_analysis/performance_vs_characteristics/correlation_heatmap.pdf similarity index 100% rename from analyses_outputs/performance_vs_characteristics/correlation_heatmap.pdf rename to outputs/results_analysis/performance_vs_characteristics/correlation_heatmap.pdf diff --git a/analyses_outputs/performance_vs_characteristics/correlation_heatmap.png b/outputs/results_analysis/performance_vs_characteristics/correlation_heatmap.png similarity index 100% rename from analyses_outputs/performance_vs_characteristics/correlation_heatmap.png rename to outputs/results_analysis/performance_vs_characteristics/correlation_heatmap.png diff --git a/analyses_outputs/performance_vs_characteristics/correlation_matrix.csv b/outputs/results_analysis/performance_vs_characteristics/correlation_matrix.csv similarity index 100% rename from analyses_outputs/performance_vs_characteristics/correlation_matrix.csv rename to outputs/results_analysis/performance_vs_characteristics/correlation_matrix.csv diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_embedding_dim_avg.pdf b/outputs/results_analysis/performance_vs_characteristics/perf_vs_embedding_dim_avg.pdf similarity index 100% rename from analyses_outputs/performance_vs_characteristics/perf_vs_embedding_dim_avg.pdf rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_embedding_dim_avg.pdf diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_embedding_dim_avg.png b/outputs/results_analysis/performance_vs_characteristics/perf_vs_embedding_dim_avg.png similarity index 100% rename from analyses_outputs/performance_vs_characteristics/perf_vs_embedding_dim_avg.png rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_embedding_dim_avg.png diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_finetuned_avg.pdf b/outputs/results_analysis/performance_vs_characteristics/perf_vs_finetuned_avg.pdf similarity index 100% rename from analyses_outputs/performance_vs_characteristics/perf_vs_finetuned_avg.pdf rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_finetuned_avg.pdf diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_finetuned_avg.png b/outputs/results_analysis/performance_vs_characteristics/perf_vs_finetuned_avg.png similarity index 100% rename from analyses_outputs/performance_vs_characteristics/perf_vs_finetuned_avg.png rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_finetuned_avg.png diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_license_avg.pdf b/outputs/results_analysis/performance_vs_characteristics/perf_vs_license_avg.pdf similarity index 100% rename from analyses_outputs/performance_vs_characteristics/perf_vs_license_avg.pdf rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_license_avg.pdf diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_license_avg.png b/outputs/results_analysis/performance_vs_characteristics/perf_vs_license_avg.png similarity index 100% rename from analyses_outputs/performance_vs_characteristics/perf_vs_license_avg.png rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_license_avg.png diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_model_type_avg.pdf b/outputs/results_analysis/performance_vs_characteristics/perf_vs_model_type_avg.pdf similarity index 100% rename from analyses_outputs/performance_vs_characteristics/perf_vs_model_type_avg.pdf rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_model_type_avg.pdf diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_model_type_avg.png b/outputs/results_analysis/performance_vs_characteristics/perf_vs_model_type_avg.png similarity index 100% rename from analyses_outputs/performance_vs_characteristics/perf_vs_model_type_avg.png rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_model_type_avg.png diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_multilingual_or_french_avg.pdf b/outputs/results_analysis/performance_vs_characteristics/perf_vs_multilingual_or_french_avg.pdf similarity index 100% rename from analyses_outputs/performance_vs_characteristics/perf_vs_multilingual_or_french_avg.pdf rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_multilingual_or_french_avg.pdf diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_multilingual_or_french_avg.png b/outputs/results_analysis/performance_vs_characteristics/perf_vs_multilingual_or_french_avg.png similarity index 100% rename from analyses_outputs/performance_vs_characteristics/perf_vs_multilingual_or_french_avg.png rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_multilingual_or_french_avg.png diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_number_params_avg.pdf b/outputs/results_analysis/performance_vs_characteristics/perf_vs_number_params_avg.pdf similarity index 100% rename from analyses_outputs/performance_vs_characteristics/perf_vs_number_params_avg.pdf rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_number_params_avg.pdf diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_number_params_avg.png b/outputs/results_analysis/performance_vs_characteristics/perf_vs_number_params_avg.png similarity index 100% rename from analyses_outputs/performance_vs_characteristics/perf_vs_number_params_avg.png rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_number_params_avg.png diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_seq_len_avg.pdf b/outputs/results_analysis/performance_vs_characteristics/perf_vs_seq_len_avg.pdf similarity index 100% rename from analyses_outputs/performance_vs_characteristics/perf_vs_seq_len_avg.pdf rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_seq_len_avg.pdf diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_seq_len_avg.png b/outputs/results_analysis/performance_vs_characteristics/perf_vs_seq_len_avg.png similarity index 100% rename from analyses_outputs/performance_vs_characteristics/perf_vs_seq_len_avg.png rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_seq_len_avg.png diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_size_gb_avg.pdf b/outputs/results_analysis/performance_vs_characteristics/perf_vs_size_gb_avg.pdf similarity index 100% rename from analyses_outputs/performance_vs_characteristics/perf_vs_size_gb_avg.pdf rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_size_gb_avg.pdf diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_size_gb_avg.png b/outputs/results_analysis/performance_vs_characteristics/perf_vs_size_gb_avg.png similarity index 100% rename from analyses_outputs/performance_vs_characteristics/perf_vs_size_gb_avg.png rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_size_gb_avg.png diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_tuned_on_sentence_sim_avg.pdf b/outputs/results_analysis/performance_vs_characteristics/perf_vs_tuned_on_sentence_sim_avg.pdf similarity index 100% rename from analyses_outputs/performance_vs_characteristics/perf_vs_tuned_on_sentence_sim_avg.pdf rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_tuned_on_sentence_sim_avg.pdf diff --git a/analyses_outputs/performance_vs_characteristics/perf_vs_tuned_on_sentence_sim_avg.png b/outputs/results_analysis/performance_vs_characteristics/perf_vs_tuned_on_sentence_sim_avg.png similarity index 100% rename from analyses_outputs/performance_vs_characteristics/perf_vs_tuned_on_sentence_sim_avg.png rename to outputs/results_analysis/performance_vs_characteristics/perf_vs_tuned_on_sentence_sim_avg.png diff --git a/analyses_outputs/results.xlsx b/outputs/results_analysis/results.xlsx similarity index 100% rename from analyses_outputs/results.xlsx rename to outputs/results_analysis/results.xlsx diff --git a/analyses_outputs/results_correlations/spearman_corr_heatmap_datasets.pdf b/outputs/results_analysis/results_correlations/spearman_corr_heatmap_datasets.pdf similarity index 100% rename from analyses_outputs/results_correlations/spearman_corr_heatmap_datasets.pdf rename to outputs/results_analysis/results_correlations/spearman_corr_heatmap_datasets.pdf diff --git a/analyses_outputs/results_correlations/spearman_corr_heatmap_datasets.png b/outputs/results_analysis/results_correlations/spearman_corr_heatmap_datasets.png similarity index 100% rename from analyses_outputs/results_correlations/spearman_corr_heatmap_datasets.png rename to outputs/results_analysis/results_correlations/spearman_corr_heatmap_datasets.png diff --git a/analyses_outputs/results_correlations/spearman_corr_heatmap_models.pdf b/outputs/results_analysis/results_correlations/spearman_corr_heatmap_models.pdf similarity index 100% rename from analyses_outputs/results_correlations/spearman_corr_heatmap_models.pdf rename to outputs/results_analysis/results_correlations/spearman_corr_heatmap_models.pdf diff --git a/analyses_outputs/results_correlations/spearman_corr_heatmap_models.png b/outputs/results_analysis/results_correlations/spearman_corr_heatmap_models.png similarity index 100% rename from analyses_outputs/results_correlations/spearman_corr_heatmap_models.png rename to outputs/results_analysis/results_correlations/spearman_corr_heatmap_models.png diff --git a/analyses_outputs/results_correlations/spearman_corr_matrix_datasets.csv b/outputs/results_analysis/results_correlations/spearman_corr_matrix_datasets.csv similarity index 100% rename from analyses_outputs/results_correlations/spearman_corr_matrix_datasets.csv rename to outputs/results_analysis/results_correlations/spearman_corr_matrix_datasets.csv diff --git a/analyses_outputs/results_correlations/spearman_corr_matrix_models.csv b/outputs/results_analysis/results_correlations/spearman_corr_matrix_models.csv similarity index 100% rename from analyses_outputs/results_correlations/spearman_corr_matrix_models.csv rename to outputs/results_analysis/results_correlations/spearman_corr_matrix_models.csv diff --git a/analyses_outputs/statistical_tests/conover_friedman.pdf b/outputs/results_analysis/statistical_tests/conover_friedman.pdf similarity index 100% rename from analyses_outputs/statistical_tests/conover_friedman.pdf rename to outputs/results_analysis/statistical_tests/conover_friedman.pdf diff --git a/analyses_outputs/statistical_tests/conover_friedman.png b/outputs/results_analysis/statistical_tests/conover_friedman.png similarity index 100% rename from analyses_outputs/statistical_tests/conover_friedman.png rename to outputs/results_analysis/statistical_tests/conover_friedman.png diff --git a/analyses_outputs/statistical_tests/critical_difference_diagram.pdf b/outputs/results_analysis/statistical_tests/critical_difference_diagram.pdf similarity index 100% rename from analyses_outputs/statistical_tests/critical_difference_diagram.pdf rename to outputs/results_analysis/statistical_tests/critical_difference_diagram.pdf diff --git a/analyses_outputs/statistical_tests/critical_difference_diagram.png b/outputs/results_analysis/statistical_tests/critical_difference_diagram.png similarity index 100% rename from analyses_outputs/statistical_tests/critical_difference_diagram.png rename to outputs/results_analysis/statistical_tests/critical_difference_diagram.png diff --git a/paper/mteb.pdf b/paper/mteb.pdf deleted file mode 100644 index 8f8b1e80..00000000 Binary files a/paper/mteb.pdf and /dev/null differ diff --git a/paper/mteb.zip b/paper/mteb.zip deleted file mode 100644 index 64303a08..00000000 Binary files a/paper/mteb.zip and /dev/null differ diff --git a/plotstables/MTEB_EACL2023_POSTER.pdf b/plotstables/MTEB_EACL2023_POSTER.pdf deleted file mode 100644 index 1cc2ea1d..00000000 Binary files a/plotstables/MTEB_EACL2023_POSTER.pdf and /dev/null differ diff --git a/plotstables/MTEB_EACL2023_PRES.pdf b/plotstables/MTEB_EACL2023_PRES.pdf deleted file mode 100644 index f608d7cb..00000000 Binary files a/plotstables/MTEB_EACL2023_PRES.pdf and /dev/null differ diff --git a/plotstables/all_en.txt b/plotstables/all_en.txt deleted file mode 100644 index d32c23d3..00000000 --- a/plotstables/all_en.txt +++ /dev/null @@ -1,58 +0,0 @@ -Dataset & Language & Glove & Komninos & BERT & SimCSE-BERT-unsup & SimCSE-BERT-sup & coCondenser-msmarco & Contriever & SPECTER & LaBSE & LASER2 & MiniLM-L6 & MiniLM-L12 & MiniLM-L12-multilingual & MPNet & MPNet-multilingual & Ada Similarity & SGPT-125M-nli & SGPT-5.8B-nli & SGPT-125M-msmarco & SGPT-1.3B-msmarco & SGPT-2.7B-msmarco & SGPT-5.8B-msmarco & SGPT-BLOOM-7.1B-msmarco & GTR-Base & GTR-Large & GTR-XL & GTR-XXL & ST5-Base & ST5-Large & ST5-XL & ST5-XXL \\ -AmazonCounterfactualClassification & en & 56.91 & 60.54 & 74.25 & 67.09 & 75.75 & 64.06 & 72.19 & 58.7 & 75.93 & 76.84 & 64.15 & 65.28 & 71.57 & 65.27 & 75.81 & 76.4 & 65.88 & 74.07 & 61.24 & 65.21 & 67.57 & 69.22 & 68.06 & 69.33 & 70.03 & 68.6 & 67.3 & 75.82 & 75.51 & 76.01 & 77.07 \\ -AmazonPolarityClassification & en & 60.32 & 59.59 & 71.33 & 74.48 & 82.47 & 66.88 & 68.63 & 57.77 & 68.95 & 61.01 & 62.58 & 62.98 & 69.21 & 67.13 & 76.41 & 92.83 & 74.94 & 82.31 & 65.4 & 73.21 & 71.44 & 71.26 & 68.97 & 67.82 & 73.92 & 74.58 & 75.05 & 85.12 & 92.87 & 93.17 & 92.79 \\ -AmazonReviewsClassification & en & 29.67 & 31.01 & 33.56 & 33.85 & 39.6 & 34.85 & 37.42 & 26.26 & 35.8 & 28.71 & 31.79 & 30.79 & 35.11 & 31.92 & 38.51 & 47.45 & 35.1 & 41.58 & 31.17 & 34.96 & 35.75 & 39.19 & 33.86 & 38.48 & 37.21 & 38.2 & 37.3 & 44.94 & 47.12 & 48.18 & 48.93 \\ -Banking77Classification & en & 67.69 & 67.05 & 63.41 & 73.55 & 75.76 & 82.35 & 80.02 & 66.66 & 69.85 & 57.76 & 79.75 & 80.4 & 79.77 & 81.86 & 81.07 & 68.04 & 74.68 & 81.74 & 77.7 & 82.06 & 83.22 & 84.49 & 84.33 & 79.26 & 81.21 & 82.22 & 82.32 & 76.48 & 78.46 & 80.88 & 82.31 \\ -EmotionClassification & en & 36.93 & 33.18 & 35.28 & 42.22 & 44.81 & 41.91 & 44.77 & 24.82 & 37.22 & 24.83 & 38.43 & 41.17 & 42.37 & 39.73 & 45.84 & 50.32 & 42.23 & 49.92 & 39.08 & 46.39 & 49.21 & 49.66 & 44.87 & 42.2 & 46.32 & 45.55 & 43.19 & 51.36 & 51.73 & 51.95 & 48.57 \\ -ImdbClassification & en & 62.57 & 63.98 & 65.35 & 69.63 & 73.53 & 60.17 & 67.04 & 56.35 & 62.04 & 57.58 & 60.66 & 59.76 & 60.46 & 70.72 & 64.57 & 89.38 & 62.9 & 74.33 & 58.67 & 64.05 & 63.53 & 66.64 & 61.77 & 65.99 & 70.86 & 68.15 & 70.8 & 77.34 & 87.01 & 87.54 & 90.23 \\ -MassiveIntentClassification & en & 56.19 & 57.21 & 59.88 & 59.84 & 65.95 & 70.4 & 67.78 & 51.73 & 61.46 & 47.91 & 67.4 & 67.15 & 66.84 & 69.57 & 69.32 & 65.17 & 58.08 & 70.0 & 61.41 & 68.65 & 69.01 & 70.39 & 69.67 & 67.05 & 70.06 & 70.23 & 70.61 & 69.74 & 71.78 & 72.09 & 73.44 \\ -MassiveScenarioClassification & en & 66.03 & 66.11 & 64.28 & 66.25 & 70.78 & 73.73 & 76.0 & 58.58 & 66.41 & 55.92 & 75.76 & 74.58 & 71.51 & 76.01 & 75.35 & 67.67 & 66.34 & 75.03 & 69.74 & 76.04 & 75.9 & 76.28 & 75.34 & 75.4 & 75.49 & 75.94 & 77.77 & 72.32 & 73.16 & 73.26 & 74.82 \\ -MTOPDomainClassification & en & 79.11 & 78.57 & 82.63 & 81.71 & 84.29 & 91.34 & 93.18 & 74.53 & 86.06 & 75.36 & 91.56 & 91.9 & 87.06 & 92.08 & 89.24 & 89.89 & 81.52 & 89.64 & 86.96 & 92.08 & 92.56 & 93.47 & 93.68 & 92.42 & 94.01 & 93.6 & 93.84 & 90.34 & 90.99 & 90.73 & 92.49 \\ -MTOPIntentClassification & en & 55.85 & 57.07 & 68.14 & 59.23 & 63.14 & 71.07 & 69.31 & 50.05 & 63.03 & 49.47 & 62.18 & 62.84 & 65.52 & 70.21 & 68.69 & 64.8 & 58.24 & 70.68 & 62.25 & 71.19 & 71.85 & 72.42 & 71.34 & 62.44 & 63.86 & 65.93 & 67.71 & 63.32 & 64.98 & 68.15 & 68.33 \\ -ToxicConversationsClassification & en & 65.4 & 67.76 & 70.0 & 68.82 & 72.04 & 64.01 & 67.77 & 57.44 & 66.9 & 54.05 & 66.99 & 67.47 & 66.07 & 60.86 & 71.02 & 70.0 & 62.79 & 69.93 & 62.66 & 68.73 & 68.84 & 67.71 & 66.55 & 66.6 & 68.65 & 67.56 & 68.48 & 68.2 & 71.73 & 70.95 & 70.04 \\ -TweetSentimentExtractionClassification & en & 50.8 & 49.68 & 51.81 & 53.36 & 59.73 & 55.74 & 56.1 & 45.52 & 58.82 & 48.73 & 55.41 & 54.25 & 56.12 & 55.46 & 59.03 & 63.35 & 54.82 & 62.44 & 52.41 & 55.67 & 56.69 & 56.85 & 55.85 & 56.02 & 54.09 & 54.77 & 54.54 & 62.71 & 62.33 & 61.21 & 62.01 \\ -ArxivClusteringP2P & en & 32.56 & 34.73 & 35.19 & 32.61 & 35.18 & 36.94 & 42.61 & 44.75 & 32.13 & 17.77 & 46.55 & 46.07 & 38.33 & 48.38 & 37.78 & 41.49 & 34.74 & 40.55 & 39.71 & 43.38 & 44.72 & 45.59 & 44.59 & 35.49 & 37.5 & 37.9 & 37.9 & 39.28 & 41.62 & 41.62 & 42.89 \\ -ArxivClusteringS2S & en & 23.14 & 26.01 & 27.51 & 24.68 & 27.54 & 29.03 & 32.32 & 35.27 & 22.05 & 12.39 & 37.86 & 37.5 & 31.55 & 39.72 & 31.68 & 28.47 & 24.68 & 32.49 & 28.24 & 33.71 & 35.08 & 38.86 & 38.03 & 27.18 & 30.55 & 30.45 & 32.39 & 27.26 & 29.44 & 31.17 & 33.47 \\ -BiorxivClusteringP2P & en & 29.27 & 29.76 & 30.12 & 24.9 & 30.15 & 32.35 & 34.97 & 39.52 & 29.84 & 12.4 & 38.48 & 36.99 & 33.49 & 39.62 & 33.09 & 36.86 & 28.93 & 33.59 & 33.63 & 35.06 & 34.41 & 36.55 & 36.03 & 27.66 & 29.59 & 30.52 & 30.48 & 33.99 & 35.99 & 36.43 & 36.53 \\ -BiorxivClusteringS2S & en & 19.18 & 20.71 & 24.77 & 19.55 & 24.67 & 28.16 & 29.08 & 34.53 & 20.57 & 8.83 & 33.17 & 33.21 & 29.44 & 35.02 & 29.6 & 27.55 & 23.08 & 29.13 & 27.04 & 30.71 & 30.53 & 33.7 & 32.48 & 23.25 & 25.72 & 26.06 & 27.5 & 22.92 & 24.02 & 26.47 & 28.66 \\ -MedrxivClusteringP2P & en & 26.12 & 26.65 & 26.09 & 23.6 & 26.25 & 30.23 & 31.19 & 35.04 & 30.13 & 17.91 & 34.41 & 34.25 & 31.52 & 35.58 & 31.96 & 31.09 & 28.3 & 30.33 & 31.37 & 32.08 & 31.35 & 31.51 & 31.05 & 27.57 & 28.72 & 28.69 & 29.12 & 33.2 & 32.4 & 32.3 & 32.09 \\ -MedrxivClusteringS2S & en & 20.38 & 21.5 & 23.6 & 21.97 & 24.12 & 27.01 & 27.27 & 31.66 & 24.82 & 16.63 & 32.29 & 32.24 & 30.87 & 32.87 & 31.7 & 26.5 & 24.93 & 28.02 & 26.87 & 29.45 & 28.77 & 28.76 & 29.26 & 25.13 & 27.39 & 26.69 & 27.56 & 26.13 & 26.33 & 26.93 & 26.82 \\ -RedditClustering & en & 28.46 & 28.84 & 27.24 & 32.18 & 40.23 & 48.04 & 54.89 & 24.13 & 28.79 & 9.96 & 50.67 & 51.18 & 42.02 & 54.82 & 45.24 & 42.47 & 33.76 & 42.17 & 40.23 & 48.23 & 46.47 & 40.45 & 35.53 & 56.13 & 61.69 & 61.34 & 64.13 & 52.93 & 54.53 & 57.03 & 58.99 \\ -RedditClusteringP2P & en & 35.82 & 7.37 & 43.32 & 45.14 & 47.74 & 53.53 & 57.58 & 35.06 & 49.14 & 26.42 & 54.15 & 54.8 & 50.73 & 56.77 & 51.31 & 58.1 & 41.01 & 48.02 & 49.09 & 53.18 & 54.17 & 55.75 & 54.52 & 58.53 & 61.67 & 61.11 & 62.84 & 59.67 & 62.5 & 62.34 & 64.46 \\ -StackExchangeClustering & en & 35.8 & 39.04 & 43.58 & 43.07 & 47.55 & 59.54 & 63.15 & 39.01 & 35.43 & 15.79 & 53.36 & 53.05 & 49.6 & 53.8 & 52.98 & 53.52 & 44.59 & 54.13 & 52.74 & 60.86 & 59.19 & 59.21 & 55.13 & 64.21 & 69.93 & 69.95 & 71.43 & 63.13 & 65.11 & 67.13 & 70.78 \\ -StackExchangeClusteringP2P & en & 28.51 & 30.23 & 26.55 & 28.5 & 29.45 & 30.48 & 32.25 & 31.46 & 28.83 & 18.63 & 38.0 & 33.13 & 31.69 & 34.28 & 32.94 & 30.43 & 28.23 & 31.12 & 32.66 & 32.36 & 32.57 & 33.95 & 34.31 & 33.01 & 33.21 & 32.73 & 32.85 & 35.68 & 36.86 & 34.79 & 35.25 \\ -TwentyNewsgroupsClustering & en & 25.83 & 27.42 & 23.35 & 23.21 & 34.86 & 38.68 & 46.82 & 24.22 & 23.28 & 11.38 & 46.86 & 47.47 & 39.28 & 49.74 & 44.1 & 36.26 & 28.24 & 37.2 & 32.13 & 40.06 & 40.89 & 39.46 & 37.28 & 46.72 & 51.64 & 51.15 & 50.44 & 48.1 & 49.33 & 49.53 & 50.93 \\ -SprintDuplicateQuestions & en & 86.96 & 85.55 & 36.81 & 69.41 & 69.39 & 96.09 & 95.55 & 71.63 & 89.26 & 65.54 & 94.55 & 92.45 & 89.46 & 90.15 & 90.55 & 77.85 & 77.73 & 80.54 & 89.89 & 92.58 & 93.47 & 93.84 & 94.93 & 94.55 & 95.05 & 95.45 & 95.68 & 91.23 & 89.01 & 91.44 & 88.89 \\ -TwitterSemEval2015 & en & 48.45 & 53.85 & 55.9 & 60.21 & 67.75 & 65.95 & 66.85 & 43.25 & 62.78 & 59.57 & 67.86 & 70.02 & 62.06 & 73.85 & 66.75 & 69.04 & 57.09 & 66.0 & 54.75 & 62.37 & 63.68 & 66.87 & 65.31 & 72.23 & 76.03 & 77.81 & 77.54 & 78.25 & 79.75 & 80.89 & 80.28 \\ -TwitterURLCorpus & en & 77.35 & 79.41 & 76.29 & 81.37 & 83.89 & 83.17 & 85.21 & 69.22 & 84.58 & 81.47 & 84.7 & 84.77 & 83.83 & 85.11 & 85.14 & 83.69 & 80.51 & 84.54 & 81.06 & 83.79 & 84.8 & 85.29 & 85.46 & 84.77 & 84.89 & 85.14 & 85.13 & 86.05 & 86.14 & 85.86 & 86.01 \\ -AskUbuntuDupQuestions & en & 49.57 & 50.88 & 45.84 & 51.57 & 51.8 & 58.99 & 56.69 & 50.07 & 52.75 & 48.99 & 63.48 & 64.06 & 60.49 & 65.85 & 60.16 & 53.49 & 52.63 & 55.9 & 55.84 & 58.13 & 59.63 & 61.63 & 59.97 & 60.86 & 61.64 & 63.08 & 63.23 & 59.73 & 61.51 & 62.86 & 66.16 \\ -MindSmallReranking & en & 27.01 & 28.92 & 28.37 & 28.62 & 29.3 & 27.13 & 31.58 & 24.8 & 29.81 & 24.79 & 30.8 & 31.02 & 30.37 & 30.97 & 30.15 & 30.71 & 29.27 & 31.11 & 30.4 & 31.34 & 31.72 & 32.29 & 31.79 & 31.33 & 31.84 & 31.5 & 31.93 & 30.2 & 30.27 & 29.77 & 30.6 \\ -SciDocsRR & en & 62.56 & 63.55 & 64.94 & 66.33 & 70.14 & 72.78 & 76.51 & 81.31 & 68.72 & 54.99 & 87.12 & 87.2 & 77.78 & 88.65 & 78.09 & 71.04 & 68.36 & 77.54 & 71.34 & 77.21 & 77.72 & 80.79 & 79.77 & 73.71 & 76.39 & 76.49 & 77.96 & 73.96 & 74.88 & 75.16 & 76.09 \\ -StackOverflowDupQuestions & en & 34.03 & 35.65 & 34.62 & 39.35 & 38.9 & 48.48 & 47.78 & 36.22 & 42.42 & 36.98 & 50.76 & 51.47 & 45.85 & 51.98 & 46.79 & 40.85 & 39.97 & 44.77 & 44.74 & 49.32 & 49.61 & 51.53 & 51.07 & 51.01 & 51.58 & 52.79 & 53.5 & 48.46 & 49.34 & 51.05 & 52.85 \\ -ArguAna & en & 36.3 & 30.96 & 28.29 & 38.34 & 38.33 & 45.15 & 48.32 & 32.67 & 34.18 & 12.86 & 50.17 & 47.13 & 44.88 & 46.52 & 48.91 & & 31.04 & 35.07 & 45.42 & 49.68 & 50.49 & 51.38 & 47.28 & 50.83 & 52.09 & 52.81 & 53.77 & 44.85 & 39.27 & 39.4 & 39.85 \\ -ClimateFEVER & en & 14.44 & 14.87 & 5.41 & 11.8 & 11.98 & 16.96 & 24.79 & 6.86 & 3.83 & 0.36 & 20.27 & 21.57 & 18.49 & 21.97 & 15.27 & & 11.01 & 17.57 & 21.86 & 26.6 & 27.11 & 30.46 & 29.39 & 24.88 & 26.9 & 27.01 & 27.21 & 10.37 & 11.36 & 10.61 & 14.63 \\ -CQADupstackRetrieval & en & 15.47 & 16.79 & 5.51 & 13.22 & 14.5 & 27.72 & 33.67 & 14.6 & 18.75 & 4.12 & 41.32 & 42.53 & 30.71 & 44.96 & 31.32 & & 20.29 & 29.98 & 27.25 & 33.33 & 36.53 & 39.4 & 39.62 & 34.55 & 36.62 & 37.35 & 38.56 & 35.23 & 38.96 & 40.78 & 44.65 \\ -DBPedia & en & 18.29 & 15.88 & 4.13 & 15.04 & 19.73 & 27.86 & 38.1 & 4.14 & 15.57 & 1.53 & 32.33 & 33.36 & 22.63 & 32.09 & 26.22 & & 10.87 & 26.1 & 22.72 & 31.51 & 34.7 & 39.87 & 39.03 & 35.24 & 39.55 & 39.74 & 41.28 & 27.77 & 31.55 & 33.65 & 39.19 \\ -FEVER & en & 14.99 & 15.56 & 3.3 & 21.05 & 20.41 & 45.68 & 59.29 & 5.45 & 12.17 & 0.77 & 51.93 & 55.91 & 52.66 & 50.86 & 56.76 & & 18.4 & 38.64 & 60.45 & 68.12 & 72.73 & 78.24 & 73.97 & 68.93 & 72.66 & 72.18 & 74.08 & 26.16 & 36.21 & 36.12 & 51.2 \\ -FiQA2018 & en & 10.09 & 10.49 & 2.19 & 9.84 & 10.41 & 15.62 & 27.42 & 5.64 & 7.0 & 1.73 & 36.87 & 37.27 & 20.33 & 49.96 & 22.96 & & 8.94 & 18.59 & 21.12 & 29.99 & 33.29 & 37.2 & 35.84 & 35.15 & 42.79 & 44.19 & 46.78 & 34.83 & 43.55 & 44.71 & 46.68 \\ -HotpotQA & en & 19.18 & 20.77 & 8.26 & 19.75 & 22.89 & 35.61 & 56.81 & 5.46 & 18.75 & 5.5 & 46.51 & 44.59 & 30.01 & 39.29 & 37.03 & & 17.73 & 33.99 & 40.88 & 49.93 & 52.84 & 59.26 & 57.26 & 54.93 & 57.85 & 58.91 & 59.67 & 33.2 & 33.95 & 37.17 & 42.14 \\ -MSMARCO & en & 9.6 & 9.75 & 1.91 & 9.35 & 11.0 & 29.57 & 36.77 & 5.58 & 7.6 & 1.09 & 36.54 & 39.03 & 23.72 & 39.75 & 26.6 & & 6.27 & 15.83 & 27.98 & 36.05 & 38.83 & 39.91 & 41.12 & 41.16 & 42.73 & 43.52 & 44.05 & 20.71 & 23.96 & 25.17 & 27.68 \\ -NFCorpus & en & 13.87 & 11.79 & 4.3 & 9.88 & 12.42 & 22.29 & 31.31 & 0.84 & 16.54 & 2.44 & 31.59 & 32.25 & 23.45 & 33.29 & 25.49 & & 11.8 & 28.26 & 22.79 & 32.08 & 33.89 & 36.21 & 35.78 & 30.22 & 32.63 & 33.34 & 34.18 & 28.64 & 31.1 & 33.18 & 35.08 \\ -NQ & en & 12.87 & 12.75 & 2.61 & 11.69 & 16.08 & 29.85 & 41.83 & 5.99 & 8.42 & 0.64 & 43.87 & 46.47 & 29.8 & 50.45 & 33.6 & & 7.63 & 24.63 & 29.73 & 42.94 & 46.7 & 52.41 & 53.15 & 50.47 & 55.09 & 56.16 & 57.24 & 36.32 & 42.02 & 46.29 & 52.87 \\ -QuoraRetrieval & en & 71.32 & 71.58 & 61.03 & 78.03 & 79.62 & 86.51 & 86.72 & 64.65 & 77.03 & 71.14 & 87.56 & 87.75 & 86.55 & 87.46 & 86.41 & & 78.96 & 84.68 & 72.98 & 85.28 & 85.6 & 84.58 & 74.71 & 87.98 & 88.47 & 88.91 & 89.09 & 85.49 & 85.73 & 85.85 & 85.96 \\ -SCIDOCS & en & 8.04 & 8.47 & 2.81 & 5.5 & 7.53 & 10.13 & 17.12 & 0.0 & 5.63 & 0.78 & 21.64 & 21.82 & 0.03 & 23.77 & 13.96 & & 7.13 & 13.55 & 12.21 & 16.18 & 16.57 & 19.87 & 18.62 & 14.0 & 15.51 & 15.71 & 15.88 & 14.16 & 15.38 & 15.97 & 17.17 \\ -SciFact & en & 29.58 & 29.53 & 13.34 & 25.72 & 29.59 & 52.31 & 65.51 & 47.88 & 38.2 & 4.04 & 64.51 & 62.64 & 48.37 & 65.57 & 50.3 & & 31.79 & 46.66 & 56.9 & 68.29 & 70.17 & 74.7 & 72.11 & 59.74 & 63.42 & 64.2 & 66.77 & 45.76 & 49.91 & 50.91 & 55.38 \\ -Touche2020 & en & 13.99 & 13.17 & 0.97 & 8.9 & 9.89 & 8.57 & 15.79 & 8.46 & 4.88 & 1.06 & 16.9 & 17.22 & 16.06 & 19.93 & 17.4 & & 12.27 & 16.18 & 22.97 & 24.45 & 23.44 & 25.43 & 23.98 & 25.89 & 28.29 & 25.26 & 26.76 & 20.3 & 21.63 & 22.51 & 21.65 \\ -TRECCOVID & en & 36.22 & 35.92 & 14.74 & 26.2 & 22.93 & 40.54 & 44.77 & 29.91 & 16.34 & 10.97 & 47.25 & 50.82 & 39.12 & 51.33 & 37.87 & & 39.31 & 55.35 & 70.3 & 72.98 & 75.17 & 84.88 & 81.37 & 56.05 & 56.68 & 60.09 & 51.9 & 40.7 & 46.11 & 54.77 & 59.48 \\ -BIOSSES & en & 44.93 & 50.25 & 54.7 & 72.31 & 68.38 & 77.32 & 83.32 & 64.95 & 78.7 & 62.01 & 81.64 & 83.57 & 74.18 & 80.43 & 76.27 & 78.04 & 70.93 & 79.5 & 75.21 & 83.02 & 84.84 & 86.25 & 85.31 & 79.0 & 84.86 & 78.94 & 81.91 & 75.89 & 78.93 & 73.12 & 80.43 \\ -SICK-R & en & 55.43 & 55.49 & 58.65 & 72.24 & 80.77 & 72.0 & 70.2 & 56.39 & 69.99 & 62.86 & 77.58 & 79.32 & 79.61 & 80.59 & 79.62 & 77.48 & 74.57 & 79.59 & 65.93 & 67.23 & 68.2 & 69.63 & 69.82 & 71.45 & 73.39 & 73.63 & 74.29 & 80.18 & 80.34 & 79.98 & 80.47 \\ -STS12 & en & 54.64 & 53.51 & 30.87 & 66.05 & 75.3 & 68.19 & 64.34 & 62.49 & 65.08 & 62.6 & 72.37 & 73.08 & 76.02 & 72.63 & 77.9 & 72.3 & 69.17 & 74.29 & 66.53 & 66.59 & 66.99 & 67.5 & 69.66 & 68.59 & 70.33 & 69.11 & 70.12 & 78.05 & 79.11 & 79.02 & 78.85 \\ -STS13 & en & 69.16 & 70.8 & 59.89 & 81.49 & 84.67 & 80.4 & 80.03 & 58.7 & 67.98 & 59.62 & 80.6 & 82.13 & 80.7 & 83.48 & 85.11 & 81.49 & 77.23 & 85.35 & 76.17 & 77.33 & 77.58 & 79.16 & 79.67 & 79.09 & 82.19 & 81.82 & 82.72 & 85.85 & 87.33 & 88.8 & 88.94 \\ -STS14 & en & 60.81 & 63.56 & 47.73 & 73.61 & 80.19 & 74.02 & 74.51 & 54.87 & 64.03 & 57.03 & 75.59 & 76.73 & 78.85 & 78.0 & 80.81 & 74.74 & 70.99 & 79.21 & 69.05 & 71.83 & 72.78 & 74.46 & 74.61 & 74.64 & 77.16 & 77.07 & 78.24 & 82.19 & 83.17 & 84.33 & 84.86 \\ -STS15 & en & 72.31 & 74.08 & 60.29 & 79.72 & 85.4 & 82.57 & 83.3 & 62.54 & 76.59 & 71.57 & 85.39 & 85.58 & 85.84 & 85.66 & 87.48 & 84.28 & 79.74 & 85.52 & 79.24 & 80.66 & 82.62 & 84.47 & 83.81 & 84.85 & 86.31 & 86.01 & 86.26 & 87.46 & 88.28 & 88.89 & 89.32 \\ -STS16 & en & 65.34 & 64.6 & 63.73 & 78.12 & 80.82 & 79.78 & 79.67 & 64.27 & 72.98 & 70.75 & 78.99 & 80.23 & 81.05 & 80.03 & 83.2 & 82.06 & 77.93 & 82.54 & 76.07 & 78.91 & 80.1 & 80.96 & 80.4 & 81.57 & 81.85 & 82.23 & 81.61 & 84.03 & 84.36 & 85.31 & 84.67 \\ -STS17 & en-en & 77.95 & 76.91 & 64.1 & 83.58 & 89.44 & 85.94 & 86.32 & 69.63 & 79.45 & 76.73 & 87.59 & 88.63 & 86.87 & 90.6 & 86.99 & 87.08 & 87.33 & 90.44 & 84.95 & 86.99 & 87.25 & 87.78 & 87.07 & 85.8 & 83.93 & 84.9 & 85.18 & 89.57 & 88.99 & 88.91 & 89.46 \\ -STS22 & en & 56.35 & 53.89 & 56.37 & 59.65 & 61.96 & 67.54 & 64.64 & 55.06 & 60.97 & 39.75 & 67.21 & 65.67 & 61.72 & 67.95 & 63.06 & 64.71 & 59.64 & 63.2 & 65.66 & 67.3 & 68.75 & 69.35 & 66.13 & 66.17 & 64.3 & 66.61 & 65.76 & 62.66 & 62.39 & 64.32 & 65.33 \\ -STSBenchmark & en & 61.54 & 61.55 & 47.29 & 76.52 & 84.25 & 76.97 & 78.81 & 61.26 & 72.25 & 69.77 & 82.03 & 83.09 & 84.42 & 83.42 & 86.82 & 83.78 & 79.54 & 85.67 & 75.34 & 77.59 & 79.21 & 81.39 & 80.9 & 79.58 & 77.6 & 77.65 & 77.73 & 85.52 & 85.36 & 83.93 & 84.01 \\ -SummEval & en & 28.87 & 30.49 & 29.82 & 31.15 & 23.31 & 29.5 & 30.36 & 27.66 & 31.05 & 26.8 & 30.81 & 27.9 & 30.67 & 27.49 & 31.57 & 26.94 & 30.26 & 30.38 & 28.9 & 25.44 & 27.87 & 24.75 & 24.99 & 29.67 & 29.5 & 30.21 & 30.64 & 31.39 & 29.64 & 29.91 & 30.08 \\ -Average & en & 41.97 & 42.06 & 38.33 & 45.45 & 48.72 & 52.35 & 56.0 & 40.28 & 45.21 & 34.95 & 56.26 & 56.53 & 52.44 & 57.78 & 54.71 & & 45.97 & 53.74 & 51.23 & 56.11 & 57.12 & 58.81 & 57.44 & 56.19 & 58.28 & 58.42 & 58.97 & 55.27 & 57.06 & 57.87 & 59.51 \\ diff --git a/plotstables/avg_table.txt b/plotstables/avg_table.txt deleted file mode 100644 index 1885217e..00000000 --- a/plotstables/avg_table.txt +++ /dev/null @@ -1,34 +0,0 @@ -Task ($\ rightarrow$) & Class. & Clust. & PairClass. & Rerank. & Retr. & STS & Summ. & Avg. \\ -Num. Datasets ($\ rightarrow$) & 12 & 11 & 3 & 4 & 15 & 10 & 1 & 56 \\ -Model ($\downarrow$) & Class. & Clust. & PairClass. & Rerank. & Retr. & STS & Summ. & Avg. \\ -Glove & 57.29 & 27.73 & 70.92 & 43.29 & 21.62 & 61.85 & 28.87 & 41.97 \\ -Komninos & 57.65 & 26.57 & 72.94 & 44.75 & 21.22 & 62.47 & 30.49 & 42.06 \\ -LASER2 & 53.18 & 15.28 & 68.86 & 41.44 & 7.93 & 63.27 & 26.8 & 34.95 \\ -LaBSE & 62.71 & 29.55 & 78.87 & 48.42 & 18.99 & 70.8 & 31.05 & 45.21 \\ -BERT & 61.66 & 30.12 & 56.33 & 43.44 & 10.59 & 54.36 & 29.82 & 38.33 \\ -coCondenser-msmarco & 64.71 & 37.64 & 81.74 & 51.84 & 32.96 & 76.47 & 29.5 & 52.35 \\ -SPECTER & 52.37 & 34.06 & 61.37 & 48.1 & 15.88 & 61.02 & 27.66 & 40.28 \\ -SimCSE-BERT-unsup & 62.5 & 29.04 & 70.33 & 46.47 & 20.29 & 74.33 & 31.15 & 45.45 \\ -SimCSE-BERT-sup & 67.32 & 33.43 & 73.68 & 47.54 & 21.82 & 79.12 & 23.31 & 48.72 \\ -MiniLM-L6 & 63.06 & 42.35 & 82.37 & 58.04 & 41.95 & 78.9 & 30.81 & 56.26 \\ -MiniLM-L12 & 63.21 & 41.81 & 82.41 & 58.44 & 42.69 & 79.8 & 27.9 & 56.53 \\ -MiniLM-L12-multilingual & 64.3 & 37.14 & 78.45 & 53.62 & 32.45 & 78.92 & 30.67 & 52.44 \\ -MPNet & 65.07 & 43.69 & 83.04 & 59.36 & 43.81 & 80.28 & 27.49 & 57.78 \\ -MPNet-multilingual & 67.91 & 38.4 & 80.81 & 53.8 & 35.34 & 80.73 & 31.57 & 54.71 \\ -Contriever & 66.68 & 41.1 & 82.53 & 53.14 & 41.88 & 76.51 & 30.36 & 56.0 \\ -Ada Similarity & 70.44 & 37.52 & 76.86 & 49.02 & & 78.6 & 26.94 & \\ -SGPT-125M-nli & 61.46 & 30.95 & 71.78 & 47.56 & 20.9 & 74.71 & 30.26 & 45.97 \\ -SGPT-5.8B-nli & 70.14 & 36.98 & 77.03 & 52.33 & 32.34 & 80.53 & 30.38 & 53.74 \\ -SGPT-125M-msmarco & 60.72 & 35.79 & 75.23 & 50.58 & 37.04 & 73.41 & 28.9 & 51.23 \\ -SGPT-1.3B-msmarco & 66.52 & 39.92 & 79.58 & 54.0 & 44.49 & 75.74 & 25.44 & 56.11 \\ -SGPT-2.7B-msmarco & 67.13 & 39.83 & 80.65 & 54.67 & 46.54 & 76.83 & 27.87 & 57.12 \\ -SGPT-5.8B-msmarco & 68.13 & 40.35 & 82.0 & 56.56 & 50.25 & 78.1 & 24.75 & 58.81 \\ -SGPT-BLOOM-7.1B-msmarco & 66.19 & 38.93 & 81.9 & 55.65 & 48.21 & 77.74 & 24.99 & 57.44 \\ -GTR-Base & 65.25 & 38.63 & 83.85 & 54.23 & 44.67 & 77.07 & 29.67 & 56.19 \\ -GTR-Large & 67.14 & 41.6 & 85.33 & 55.36 & 47.42 & 78.19 & 29.5 & 58.28 \\ -GTR-XL & 67.11 & 41.51 & 86.13 & 55.96 & 47.96 & 77.8 & 30.21 & 58.42 \\ -GTR-XXL & 67.41 & 42.42 & 86.12 & 56.65 & 48.48 & 78.38 & 30.64 & 58.97 \\ -ST5-Base & 69.81 & 40.21 & 85.17 & 53.09 & 33.63 & 81.14 & 31.39 & 55.27 \\ -ST5-Large & 72.31 & 41.65 & 84.97 & 54.0 & 36.71 & 81.83 & 29.64 & 57.06 \\ -ST5-XL & 72.84 & 42.34 & 86.06 & 54.71 & 38.47 & 81.66 & 29.91 & 57.87 \\ -ST5-XXL & 73.42 & 43.71 & 85.06 & 56.43 & 42.24 & 82.63 & 30.08 & 59.51 \\ diff --git a/plotstables/benchmark.pdf b/plotstables/benchmark.pdf deleted file mode 100644 index 4879780c..00000000 Binary files a/plotstables/benchmark.pdf and /dev/null differ diff --git a/plotstables/benchmark.png b/plotstables/benchmark.png deleted file mode 100644 index babd9a2b..00000000 Binary files a/plotstables/benchmark.png and /dev/null differ diff --git a/plotstables/benchmark_gpu.json b/plotstables/benchmark_gpu.json deleted file mode 100644 index e8f840fa..00000000 --- a/plotstables/benchmark_gpu.json +++ /dev/null @@ -1,134 +0,0 @@ -{ - "LASER2": { - "STS15": { - "speed_ms": 1.095552682876587, - "embedding_size_kb": 4.096 - } - }, - "komninos": { - "STS15": { - "speed_ms": 0.014420787493387857, - "embedding_size_kb": 1.2 - } - }, - "glove.6B.300d": { - "STS15": { - "speed_ms": 0.015402833620707195, - "embedding_size_kb": 1.2 - } - }, - "SGPT-125M-weightedmean-nli-bitfit": { - "STS15": { - "speed_ms": 0.3835549751917521, - "embedding_size_kb": 3.072 - } - }, - "SGPT-125M-weightedmean-msmarco-specb-bitfit": { - "STS15": { - "speed_ms": 0.40986963113149005, - "embedding_size_kb": 3.072 - } - }, - "SGPT-5.8B-weightedmean-nli-bitfit": { - "STS15": { - "speed_ms": 13.105161627133688, - "embedding_size_kb": 16.384 - } - }, - "SGPT-5.8B-weightedmean-msmarco-specb-bitfit": { - "STS15": { - "speed_ms": 14.537005225817362, - "embedding_size_kb": 16.384 - } - }, - "all-MiniLM-L6-v2": { - "STS15": { - "speed_ms": 0.1488305727640788, - "embedding_size_kb": 1.536 - } - }, - "all-mpnet-base-v2": { - "STS15": { - "speed_ms": 0.35439515113830566, - "embedding_size_kb": 3.072 - } - }, - "paraphrase-multilingual-mpnet-base-v2": { - "STS15": { - "speed_ms": 0.39108145236968994, - "embedding_size_kb": 3.072 - } - }, - "sentence-t5-base": { - "STS15": { - "speed_ms": 0.41539565722147626, - "embedding_size_kb": 3.072 - } - }, - "sentence-t5-xxl": { - "STS15": { - "speed_ms": 15.400389790534973, - "embedding_size_kb": 3.072 - } - }, - "gtr-t5-base": { - "STS15": { - "speed_ms": 0.41492275396982825, - "embedding_size_kb": 3.072 - } - }, - "gtr-t5-xxl": { - "STS15": { - "speed_ms": 15.39513130982717, - "embedding_size_kb": 3.072 - } - }, - "contriever-base-msmarco": { - "STS15": { - "speed_ms": 0.34681657950083417, - "embedding_size_kb": 3.072 - } - }, - "msmarco-bert-co-condensor": { - "STS15": { - "speed_ms": 0.3462672630945841, - "embedding_size_kb": 3.072 - } - }, - "bert-base-uncased": { - "STS15": { - "speed_ms": 0.34756950537363684, - "embedding_size_kb": 3.072 - } - }, - "sup-simcse-bert-base-uncased": { - "STS15": { - "speed_ms": 0.3426841100056966, - "embedding_size_kb": 3.072 - } - }, - "unsup-simcse-bert-base-uncased": { - "STS15": { - "speed_ms": 0.3423287868499756, - "embedding_size_kb": 3.072 - } - }, - "LaBSE": { - "STS15": { - "speed_ms": 0.3441281318664551, - "embedding_size_kb": 3.072 - } - }, - "all-MiniLM-L12-v2": { - "STS15": { - "speed_ms": 0.2361156940460205, - "embedding_size_kb": 1.536 - } - }, - "allenai-specter": { - "STS15": { - "speed_ms": 0.3867043654123942, - "embedding_size_kb": 3.072 - } - } -} diff --git a/plotstables/benchmark_to_plot.py b/plotstables/benchmark_to_plot.py deleted file mode 100644 index ea7b30d8..00000000 --- a/plotstables/benchmark_to_plot.py +++ /dev/null @@ -1,348 +0,0 @@ -""" -Usage: -Inspired by Fig 3 from https://arxiv.org/pdf/2011.04006.pdf -""" -import json -import os -import sys - -import matplotlib.pyplot as plt -from mteb import MTEB - - -### GLOBAL VARIABLES ### - -TASK_LIST_CLASSIFICATION = [ - "AmazonCounterfactualClassification", - "AmazonPolarityClassification", - "AmazonReviewsClassification", - "Banking77Classification", - "EmotionClassification", - "ImdbClassification", - "MassiveIntentClassification", - "MassiveScenarioClassification", - "MTOPDomainClassification", - "MTOPIntentClassification", - "ToxicConversationsClassification", - "TweetSentimentExtractionClassification", -] - -TASK_LIST_CLUSTERING = [ - "ArxivClusteringP2P", - "ArxivClusteringS2S", - "BiorxivClusteringP2P", - "BiorxivClusteringS2S", - "MedrxivClusteringP2P", - "MedrxivClusteringS2S", - "RedditClustering", - "RedditClusteringP2P", - "StackExchangeClustering", - "StackExchangeClusteringP2P", - "TwentyNewsgroupsClustering", -] - -TASK_LIST_PAIR_CLASSIFICATION = [ - "SprintDuplicateQuestions", - "TwitterSemEval2015", - "TwitterURLCorpus", -] - -TASK_LIST_RERANKING = [ - "AskUbuntuDupQuestions", - "MindSmallReranking", - "SciDocsRR", - "StackOverflowDupQuestions", -] - -TASK_LIST_RETRIEVAL = [ - "ArguAna", - "ClimateFEVER", - "CQADupstackRetrieval", - "DBPedia", - "FEVER", - "FiQA2018", - "HotpotQA", - "MSMARCO", - "NFCorpus", - "NQ", - "QuoraRetrieval", - "SCIDOCS", - "SciFact", - "Touche2020", - "TRECCOVID", -] - -TASK_LIST_STS = [ - "BIOSSES", - "SICK-R", - "STS12", - "STS13", - "STS14", - "STS15", - "STS16", - "STS17", - "STS22", - "STSBenchmark", -] - - -TASK_LIST_SUMMARIZATION = [ - "SummEval", -] - -TASK_LIST_EN = ( - TASK_LIST_CLASSIFICATION - + TASK_LIST_CLUSTERING - + TASK_LIST_PAIR_CLASSIFICATION - + TASK_LIST_RERANKING - + TASK_LIST_RETRIEVAL - + TASK_LIST_STS - + TASK_LIST_SUMMARIZATION -) - -MODEL_TO_NAME = { - "bert-base-uncased": "BERT", - "gtr-t5-base": "GTR-Base", - "gtr-t5-large": "GTR-Large", - "gtr-t5-xl": "GTR-XL", - "gtr-t5-xxl": "GTR-XXL", - "sentence-t5-base": "ST5-Base", - "sentence-t5-large": "ST5-Large", - "sentence-t5-xl": "ST5-XL", - "sentence-t5-xxl": "ST5-XXL", - "SGPT-125M-weightedmean-msmarco-specb-bitfit": "SGPT-125M-msmarco", - "SGPT-1.3B-weightedmean-msmarco-specb-bitfit": "SGPT-1.3B-msmarco", - "SGPT-2.7B-weightedmean-msmarco-specb-bitfit": "SGPT-2.7B-msmarco", - "SGPT-5.8B-weightedmean-msmarco-specb-bitfit": "SGPT-5.8B-msmarco", - "sgpt-bloom-7b1-msmarco": "SGPT-BLOOM-7.1B-msmarco", - "SGPT-125M-weightedmean-nli-bitfit": "SGPT-125M-nli", - "SGPT-5.8B-weightedmean-nli-bitfit": "SGPT-5.8B-nli", - "sup-simcse-bert-base-uncased": "SimCSE-BERT-sup", - "contriever-base-msmarco": "Contriever", - "msmarco-bert-co-condensor": "coCondenser-msmarco", # They write it as coCondenser in the paper - "unsup-simcse-bert-base-uncased": "SimCSE-BERT-unsup", - "glove.6B.300d": "Glove", - "komninos": "Komninos", - "all-MiniLM-L6-v2": "MiniLM-L6", - "all-MiniLM-L12-v2": "MiniLM-L12", - "paraphrase-multilingual-MiniLM-L12-v2": "MiniLM-L12-multilingual", - "all-mpnet-base-v2": "MPNet", - "paraphrase-multilingual-mpnet-base-v2": "MPNet-multilingual", - "allenai-specter": "SPECTER", - "text-similarity-ada-001": "Ada Similarity", -} - -NAME_TO_ARCH = { - "gtr": "T5", - "st5": "T5", - "sgpt": "GPT", - "simcse": "BERT", - "contriever": "BERT", - "bert": "BERT", - "cocondenser": "BERT", - "specter": "SciBERT", - "mpnet": "MPNet", - "minilm": "MiniLM", - "laser2": "LASER", - "labse": "BERT", - "glove": "WordEmbeddings", - "komninos": "WordEmbeddings", -} - -# Base from: -# https://coolors.co/palette/ff5400-ff6d00-ff8500-ff9100-ff9e00-00b4d8-0096c7-0077b6-023e8a-03045e -# Yellow tones from: -# https://coolors.co/palette/6ab6dc-49a6d4-2f94c6-277ba5-1f6284-e0b700-ffd20a-ffda33-ffe15c-ffe570 -# Green from: -# https://coolors.co/palette/f94144-f3722c-f8961e-f9844a-f9c74f-90be6d-43aa8b-4d908e-577590-277da1 -MODEL_TO_COLOR = { - "MiniLM": "#BAF19C",#"#017600", # Green - "MPNet": "#F94144",#"#007A7A", # Light Green - "GTR": "#FF5400",#"#221D91", # Blue 1 - "ST5": "#FF9E00",#"#86D4F1", # Blue 2 - "SGPT": "#00B4D8",#"#7B3FB9", # Purple - "SimCSE": "#F9C74F",#"#2070B4", # Blue 3 - "LaBSE": "#F9C74F",#"#2070B4", # Blue 3 - "SPECTER": "#E0B700", # Shade of #2070B4 - "Glove": "#023E8A",#"#9BC7DD", # Light Blue - "LASER2": "#03045E", # Grey -} - -ARCH_TO_COLOR = { - "T5": MODEL_TO_COLOR["GTR"], - "GPT": MODEL_TO_COLOR["SGPT"], - "BERT": MODEL_TO_COLOR["SimCSE"], - "SciBERT": MODEL_TO_COLOR["SPECTER"], - "MiniLM": MODEL_TO_COLOR["MiniLM"], - "MPNet": MODEL_TO_COLOR["MPNet"], - "WordEmbeddings": MODEL_TO_COLOR["Glove"], - "LASER": MODEL_TO_COLOR["LASER2"], -} - - -### LOGIC ### - -# Get average MTEB performance - -results_folder = sys.argv[1].strip("/") -benchmark_json = sys.argv[2] - -all_results = {} - -for model_name in os.listdir(results_folder): - model_res_folder = os.path.join(results_folder, model_name) - if os.path.isdir(model_res_folder): - all_results.setdefault(model_name, {}) - for file_name in os.listdir(model_res_folder): - if not file_name.endswith(".json"): - print(f"Skipping non-json {file_name}") - continue - with open(os.path.join(model_res_folder, file_name), "r", encoding="utf-8") as f: - results = json.load(f) - all_results[model_name] = {**all_results[model_name], **{file_name.replace(".json", ""): results}} - -def get_row(dataset, model_name, limit_langs=[], skip_langs=[]): - # CQADupstackRetrieval uses the same metric as its subsets - tasks = MTEB(tasks=[dataset.replace("CQADupstackRetrieval", "CQADupstackTexRetrieval")]).tasks - assert len(tasks) == 1, f"Found {len(tasks)} for {dataset}. Expected 1." - main_metric = tasks[0].description["main_score"] - test_result = all_results.get(model_name, {}). get(dataset, {}) - - # Dev / Val set is used for MSMARCO (See BEIR paper) - if "MSMARCO" in dataset: - test_result = ( - test_result.get("dev") if "dev" in test_result else test_result.get("validation") - ) - else: - test_result = test_result.get("test") - - for lang in tasks[0].description["eval_langs"]: - if (limit_langs and lang not in limit_langs) or (skip_langs and lang in skip_langs): - continue - elif test_result is None: - raise NotImplementedError(f"Got no test result {test_result} for ds: {dataset} model: {model_name}") - - test_result_lang = test_result.get(lang, test_result) - if main_metric == "cosine_spearman": - test_result_lang = test_result_lang.get("cos_sim", {}).get("spearman") - elif main_metric == "ap": - test_result_lang = test_result_lang.get("cos_sim", {}).get("ap") - else: - test_result_lang = test_result_lang.get(main_metric) - - if test_result_lang is None: - raise NotImplementedError - - return test_result_lang - raise NotImplementedError - -results_avg = {} - -for model in all_results: - try: - model_task_results = [get_row(task, model, limit_langs=["en", "en-en"]) for task in TASK_LIST_EN] - except: - continue - results_avg[model] = 100 * (sum(model_task_results) / len(model_task_results)) - - -with open(benchmark_json, "r") as f: - gpu_bench = json.load(f) - -import numpy as np - -fig, ax = plt.subplots(figsize=(14,8)) - -for k, v in gpu_bench.items(): - if k in ("specs", "sgpt-bloom-7b1-msmarco", "paraphrase-multilingual-MiniLM-L12-v2", "paraphrase-multilingual-mpnet-base-v2"): - continue - - model_name = MODEL_TO_NAME.get(k, k) - model_arch = NAME_TO_ARCH.get(model_name.split(" ")[0].split("-")[0].lower(), (model_name)) - color = ARCH_TO_COLOR[model_arch] - - if k not in results_avg: - print(f"Missing average score for {k}") - continue - - speed = 1000 / v["STS15"]["speed_ms"] - score = results_avg[k] - - ax.scatter( - speed, - score, - label=model_arch, - color=color, - s=v["STS15"]["embedding_size_kb"] * 150, - alpha=.5 - ) - # Empirical offsets - x_offset = y_offset = 0 - if model_name in ("ST5-Base"): - x_offset = 0.5 * speed - elif model_name in ("GTR-Base"): - x_offset = 0.5 * speed - y_offset = -0.01 * score - elif model_name in ("Contriever"): - x_offset = -0.14 * speed - y_offset = 0.018 * score - elif model_name in ("LaBSE"): - x_offset = 0.45 * speed - y_offset = 0.01 * score - elif model_name in ("GTR-XXL", "ST5-XXL"): - x_offset = -0.65 * speed - if model_name == "GTR-XXL": - y_offset = 0.01 * score - elif model_name == "Komninos": - x_offset = 0.4 * speed - y_offset = 0.05 * score - elif model_name in ("Glove", "SPECTER"): - x_offset = 0.2 * speed - y_offset = -0.025 * score - elif model_name.startswith("SGPT-5.8B"): - x_offset = 0.3 * speed - y_offset = 0.05 * score - elif model_name.startswith("SGPT-125M-nli"): - x_offset = -0.45 * speed - y_offset = -0.008 * score - elif model_name.startswith("SGPT-125M-msmarco"): - x_offset = -0.2 * speed - y_offset = 0.01 * score - elif model_name.startswith("MiniLM-L12"): - y_offset = -0.01 * score - x_offset = -0.15 * speed - elif model_arch in ("BERT", "MiniLM", "MPNet", "LASER") or model_name.startswith("SGPT-125M"): - x_offset = -0.2 * speed - - ax.text( - speed - x_offset, - score - y_offset, - model_name, - ) - - # Annotate does not work with logscale, https://stackoverflow.com/questions/21140385/matplotlib-annotate-doesnt-work-on-log-scale - #ax.annotate( - # MODEL_TO_NAME.get(k, k), - # xy=(np.log10(1000 / v["STS15"]["speed_ms"]), results_avg[k] - offset) - #) - -ax.set_xlabel("Speed (examples per sec)") -ax.set_ylabel("MTEB Score") -ax.set_xscale('log') -ax.grid(alpha=0.5) - -# Create deduplicated Global Legend -handles, labels = plt.gca().get_legend_handles_labels() -by_label = dict(zip(labels, handles)) -lgnd = plt.legend( - by_label.values(), - by_label.keys(), - title="Base Architecture", - loc=(0.08,0.08), # "lower left", -) -# Rescale bubbles to have the same size -for handle in lgnd.legendHandles: - handle.set_sizes([70.0]) - - -plt.savefig('benchmark.pdf', dpi=300, bbox_inches='tight') diff --git a/plotstables/bitext.txt b/plotstables/bitext.txt deleted file mode 100644 index fbe13655..00000000 --- a/plotstables/bitext.txt +++ /dev/null @@ -1,118 +0,0 @@ -Dataset & Language & LASER2 & LaBSE & MiniLM-L12-multilingual & MPNet-multilingual & SGPT-BLOOM-7.1B-msmarco \\ -BUCC & de-en & 99.21 & 99.35 & 97.11 & 98.59 & 54.0 \\ -BUCC & fr-en & 98.39 & 98.72 & 94.99 & 96.89 & 97.06 \\ -BUCC & ru-en & 97.62 & 97.78 & 95.06 & 96.44 & 45.3 \\ -BUCC & zh-en & 97.7 & 99.16 & 95.63 & 97.56 & 97.96 \\ -Tatoeba & sqi-eng & 97.22 & 96.76 & 98.17 & 98.57 & 10.38 \\ -Tatoeba & fry-eng & 42.07 & 89.31 & 31.13 & 43.54 & 24.62 \\ -Tatoeba & kur-eng & 19.09 & 83.59 & 46.94 & 61.44 & 8.26 \\ -Tatoeba & tur-eng & 98.03 & 98.0 & 95.08 & 96.17 & 6.15 \\ -Tatoeba & deu-eng & 99.07 & 99.2 & 97.02 & 97.73 & 70.1 \\ -Tatoeba & nld-eng & 95.35 & 96.07 & 94.58 & 95.5 & 29.74 \\ -Tatoeba & ron-eng & 96.52 & 96.92 & 95.3 & 96.43 & 27.23 \\ -Tatoeba & ang-eng & 25.22 & 59.28 & 10.24 & 16.72 & 28.76 \\ -Tatoeba & ido-eng & 80.86 & 89.42 & 40.25 & 43.91 & 43.91 \\ -Tatoeba & jav-eng & 9.95 & 79.77 & 17.04 & 23.39 & 15.02 \\ -Tatoeba & isl-eng & 94.32 & 94.75 & 24.07 & 59.25 & 6.29 \\ -Tatoeba & slv-eng & 95.4 & 96.03 & 96.92 & 97.08 & 10.14 \\ -Tatoeba & cym-eng & 5.85 & 92.0 & 13.25 & 22.31 & 6.97 \\ -Tatoeba & kaz-eng & 53.3 & 87.49 & 34.89 & 61.49 & 3.32 \\ -Tatoeba & est-eng & 96.43 & 96.55 & 97.33 & 98.4 & 4.76 \\ -Tatoeba & heb-eng & 0.0 & 91.53 & 86.88 & 88.26 & 1.69 \\ -Tatoeba & gla-eng & 1.52 & 85.66 & 3.61 & 4.72 & 2.09 \\ -Tatoeba & mar-eng & 92.93 & 92.65 & 92.38 & 93.83 & 45.53 \\ -Tatoeba & lat-eng & 64.81 & 80.07 & 19.47 & 24.25 & 28.76 \\ -Tatoeba & bel-eng & 79.54 & 95.0 & 67.73 & 79.94 & 8.03 \\ -Tatoeba & pms-eng & 36.23 & 64.57 & 30.7 & 34.19 & 31.94 \\ -Tatoeba & gle-eng & 4.2 & 93.8 & 11.62 & 16.85 & 3.26 \\ -Tatoeba & pes-eng & 93.13 & 94.7 & 92.59 & 93.47 & 12.13 \\ -Tatoeba & nob-eng & 95.77 & 98.4 & 97.73 & 98.53 & 21.07 \\ -Tatoeba & bul-eng & 93.57 & 94.58 & 92.65 & 93.52 & 20.09 \\ -Tatoeba & cbk-eng & 77.17 & 79.44 & 55.37 & 58.68 & 64.63 \\ -Tatoeba & hun-eng & 95.2 & 96.55 & 91.58 & 94.18 & 5.07 \\ -Tatoeba & uig-eng & 56.49 & 92.4 & 24.39 & 48.35 & 1.27 \\ -Tatoeba & rus-eng & 92.58 & 93.75 & 91.87 & 92.92 & 59.84 \\ -Tatoeba & spa-eng & 97.33 & 98.4 & 95.42 & 97.0 & 94.48 \\ -Tatoeba & hye-eng & 88.72 & 94.09 & 93.28 & 94.38 & 0.5 \\ -Tatoeba & tel-eng & 96.72 & 97.86 & 36.4 & 79.73 & 64.62 \\ -Tatoeba & afr-eng & 92.59 & 96.18 & 58.22 & 72.96 & 16.62 \\ -Tatoeba & mon-eng & 3.42 & 95.91 & 95.04 & 96.14 & 2.85 \\ -Tatoeba & arz-eng & 66.16 & 76.0 & 51.26 & 55.69 & 70.66 \\ -Tatoeba & hrv-eng & 96.72 & 96.95 & 95.98 & 97.0 & 12.79 \\ -Tatoeba & nov-eng & 60.02 & 74.38 & 47.99 & 50.23 & 52.23 \\ -Tatoeba & gsw-eng & 27.52 & 46.5 & 25.74 & 25.12 & 21.03 \\ -Tatoeba & nds-eng & 77.13 & 79.42 & 32.16 & 38.88 & 23.92 \\ -Tatoeba & ukr-eng & 93.52 & 93.97 & 92.82 & 92.67 & 22.06 \\ -Tatoeba & uzb-eng & 23.2 & 84.23 & 17.14 & 23.19 & 4.71 \\ -Tatoeba & lit-eng & 96.2 & 96.47 & 93.16 & 95.37 & 4.49 \\ -Tatoeba & ina-eng & 93.93 & 95.37 & 79.13 & 84.32 & 73.67 \\ -Tatoeba & lfn-eng & 63.39 & 67.54 & 47.02 & 49.56 & 44.85 \\ -Tatoeba & zsm-eng & 95.41 & 95.62 & 95.31 & 95.8 & 79.95 \\ -Tatoeba & ita-eng & 94.32 & 92.72 & 93.05 & 93.76 & 65.04 \\ -Tatoeba & cmn-eng & 85.62 & 95.1 & 94.93 & 95.83 & 91.45 \\ -Tatoeba & lvs-eng & 95.33 & 95.88 & 97.87 & 97.53 & 6.55 \\ -Tatoeba & glg-eng & 96.14 & 96.82 & 94.0 & 95.32 & 79.86 \\ -Tatoeba & ceb-eng & 9.93 & 64.42 & 8.05 & 7.39 & 6.64 \\ -Tatoeba & bre-eng & 31.2 & 15.07 & 5.56 & 6.42 & 4.67 \\ -Tatoeba & ben-eng & 89.43 & 88.55 & 36.48 & 64.9 & 75.98 \\ -Tatoeba & swg-eng & 33.1 & 59.36 & 26.31 & 22.8 & 16.89 \\ -Tatoeba & arq-eng & 26.63 & 42.69 & 18.6 & 19.84 & 27.75 \\ -Tatoeba & kab-eng & 65.88 & 4.31 & 1.16 & 1.41 & 1.69 \\ -Tatoeba & fra-eng & 94.28 & 94.86 & 91.72 & 93.12 & 91.44 \\ -Tatoeba & por-eng & 94.54 & 94.14 & 92.13 & 93.02 & 92.62 \\ -Tatoeba & tat-eng & 34.74 & 85.92 & 10.25 & 10.89 & 3.59 \\ -Tatoeba & oci-eng & 58.13 & 65.81 & 38.57 & 43.49 & 40.17 \\ -Tatoeba & pol-eng & 97.32 & 97.22 & 94.28 & 96.95 & 14.09 \\ -Tatoeba & war-eng & 8.25 & 60.29 & 7.25 & 7.42 & 10.38 \\ -Tatoeba & aze-eng & 82.41 & 94.93 & 62.1 & 76.36 & 6.32 \\ -Tatoeba & vie-eng & 96.73 & 97.2 & 95.12 & 97.23 & 94.2 \\ -Tatoeba & nno-eng & 72.75 & 94.48 & 76.34 & 81.41 & 16.28 \\ -Tatoeba & cha-eng & 14.86 & 31.77 & 15.98 & 12.59 & 23.26 \\ -Tatoeba & mhr-eng & 6.86 & 15.74 & 6.89 & 7.57 & 1.56 \\ -Tatoeba & dan-eng & 95.22 & 95.71 & 94.8 & 96.17 & 23.52 \\ -Tatoeba & ell-eng & 96.2 & 95.35 & 95.43 & 94.93 & 5.34 \\ -Tatoeba & amh-eng & 80.82 & 91.47 & 36.21 & 53.49 & 0.03 \\ -Tatoeba & pam-eng & 3.24 & 10.73 & 5.41 & 5.39 & 5.85 \\ -Tatoeba & hsb-eng & 45.75 & 67.11 & 36.1 & 44.32 & 9.68 \\ -Tatoeba & srp-eng & 93.64 & 94.43 & 92.24 & 94.12 & 11.69 \\ -Tatoeba & epo-eng & 96.61 & 98.2 & 41.73 & 55.12 & 26.2 \\ -Tatoeba & kzj-eng & 4.46 & 11.33 & 6.24 & 5.88 & 5.17 \\ -Tatoeba & awa-eng & 33.74 & 71.7 & 33.43 & 42.83 & 35.01 \\ -Tatoeba & fao-eng & 57.04 & 87.4 & 27.51 & 38.24 & 12.61 \\ -Tatoeba & mal-eng & 98.16 & 98.45 & 32.2 & 88.46 & 83.3 \\ -Tatoeba & ile-eng & 87.88 & 85.58 & 57.71 & 60.36 & 59.59 \\ -Tatoeba & bos-eng & 95.86 & 94.92 & 93.27 & 94.02 & 13.65 \\ -Tatoeba & cor-eng & 4.45 & 10.11 & 3.42 & 3.53 & 2.83 \\ -Tatoeba & cat-eng & 95.8 & 95.38 & 94.42 & 96.05 & 88.31 \\ -Tatoeba & eus-eng & 93.32 & 95.01 & 23.18 & 31.33 & 53.38 \\ -Tatoeba & yue-eng & 87.75 & 89.58 & 71.45 & 77.58 & 77.03 \\ -Tatoeba & swe-eng & 95.31 & 95.63 & 94.42 & 95.45 & 19.53 \\ -Tatoeba & dtp-eng & 7.39 & 10.85 & 5.69 & 5.03 & 3.41 \\ -Tatoeba & kat-eng & 81.16 & 95.02 & 95.44 & 95.46 & 0.42 \\ -Tatoeba & jpn-eng & 93.78 & 95.38 & 90.41 & 92.51 & 71.36 \\ -Tatoeba & csb-eng & 27.03 & 52.57 & 21.56 & 23.73 & 10.03 \\ -Tatoeba & xho-eng & 4.68 & 91.55 & 4.52 & 6.53 & 5.51 \\ -Tatoeba & orv-eng & 23.24 & 38.93 & 15.1 & 23.77 & 5.79 \\ -Tatoeba & ind-eng & 92.98 & 93.66 & 92.74 & 93.5 & 88.04 \\ -Tatoeba & tuk-eng & 16.35 & 75.27 & 15.16 & 14.91 & 5.48 \\ -Tatoeba & max-eng & 36.96 & 63.26 & 45.25 & 48.77 & 36.14 \\ -Tatoeba & swh-eng & 55.66 & 84.5 & 14.48 & 16.02 & 16.74 \\ -Tatoeba & hin-eng & 95.32 & 96.87 & 97.62 & 97.75 & 85.23 \\ -Tatoeba & dsb-eng & 42.34 & 64.81 & 33.43 & 36.85 & 8.78 \\ -Tatoeba & ber-eng & 77.63 & 8.4 & 4.43 & 4.88 & 4.92 \\ -Tatoeba & tam-eng & 87.32 & 89.0 & 24.64 & 73.6 & 72.76 \\ -Tatoeba & slk-eng & 95.82 & 96.5 & 95.15 & 96.62 & 9.98 \\ -Tatoeba & tgl-eng & 63.19 & 96.02 & 13.09 & 17.67 & 10.7 \\ -Tatoeba & ast-eng & 76.35 & 90.68 & 62.17 & 70.08 & 71.13 \\ -Tatoeba & mkd-eng & 93.63 & 93.6 & 91.0 & 93.02 & 10.47 \\ -Tatoeba & khm-eng & 74.19 & 78.37 & 32.11 & 58.8 & 0.37 \\ -Tatoeba & ces-eng & 95.52 & 96.68 & 95.12 & 95.73 & 9.55 \\ -Tatoeba & tzl-eng & 36.56 & 58.88 & 25.46 & 34.21 & 27.82 \\ -Tatoeba & urd-eng & 84.23 & 93.22 & 94.57 & 95.12 & 70.1 \\ -Tatoeba & ara-eng & 90.14 & 88.8 & 87.93 & 90.19 & 85.37 \\ -Tatoeba & kor-eng & 87.97 & 90.95 & 92.52 & 93.07 & 22.39 \\ -Tatoeba & yid-eng & 2.49 & 88.79 & 14.38 & 30.73 & 0.16 \\ -Tatoeba & fin-eng & 96.98 & 96.37 & 93.1 & 95.92 & 3.41 \\ -Tatoeba & tha-eng & 96.38 & 96.14 & 96.72 & 95.99 & 2.22 \\ -Tatoeba & wuu-eng & 75.09 & 90.18 & 76.0 & 78.25 & 79.58 \\ -Average & mix & 67.42 & 81.75 & 57.98 & 63.38 & 31.08 \\ diff --git a/plotstables/dataset_sim.py b/plotstables/dataset_sim.py deleted file mode 100644 index f61eaf96..00000000 --- a/plotstables/dataset_sim.py +++ /dev/null @@ -1,304 +0,0 @@ -# pip install GitPython mteb beir seaborn -import os -import random - -import seaborn as sns -import matplotlib.pyplot as plt -from mteb import MTEB -from mteb.evaluation.evaluators.utils import cos_sim -import numpy as np -import pandas as pd -from sentence_transformers import SentenceTransformer -import torch - - -if os.path.exists("sim_data.csv"): - data_emb_df = (pd.read_csv("sim_data.csv", index_col=0) * 100).round(0).astype(int) - plt.figure(figsize=(40, 24)) - # define the mask to set the values in the upper triangle to True - mask = np.triu(np.ones_like(data_emb_df, dtype=np.bool)) - heatmap = sns.heatmap( - data_emb_df, - mask=mask, - vmin=data_emb_df.values.min(), - vmax=data_emb_df.values.max(), - annot=True, - cmap='Blues', - fmt='g', - ) - heatmap.set_xticklabels(heatmap.get_xmajorticklabels(), fontsize=16)#, fontweight="bold") - heatmap.set_yticklabels(heatmap.get_ymajorticklabels(), fontsize=16)#, fontweight="bold") - # Save - plt.savefig('heatmap_data.pdf', dpi=450, bbox_inches='tight') - exit() - - -### GLOBAL VARIABLES ### - -DATAPATH = "./" - -SEED = 42 - -K_SAMPLES = 100 -LEN_KEYS = { - "text", - "sentences", - "sentence1", - "sentence2", - "sent1", - "sent2" - "query", - "positive", - "negative" - "queries", - "corpus", - "machine_summaries", - "human_summaries", -} - -TASK_LIST_CLASSIFICATION = [ - "AmazonCounterfactualClassification", - "AmazonPolarityClassification", - "AmazonReviewsClassification", - "Banking77Classification", - "EmotionClassification", - "ImdbClassification", - "MassiveIntentClassification", - "MassiveScenarioClassification", - "MTOPDomainClassification", - "MTOPIntentClassification", - "ToxicConversationsClassification", - "TweetSentimentExtractionClassification", -] - -TASK_LIST_CLUSTERING = [ - "ArxivClusteringP2P", - "ArxivClusteringS2S", - "BiorxivClusteringP2P", - "BiorxivClusteringS2S", - "MedrxivClusteringP2P", - "MedrxivClusteringS2S", - "RedditClustering", - "RedditClusteringP2P", - "StackExchangeClustering", - "StackExchangeClusteringP2P", - "TwentyNewsgroupsClustering", -] - -TASK_LIST_PAIR_CLASSIFICATION = [ - "SprintDuplicateQuestions", - "TwitterSemEval2015", - "TwitterURLCorpus", -] - -TASK_LIST_RERANKING = [ - "AskUbuntuDupQuestions", - "MindSmallReranking", - "SciDocsRR", - "StackOverflowDupQuestions", -] - -TASK_LIST_RETRIEVAL = [ - "ArguAna", - "ClimateFEVER", - "CQADupstackAndroidRetrieval", - "CQADupstackEnglishRetrieval", - "CQADupstackGamingRetrieval", - "CQADupstackGisRetrieval", - "CQADupstackMathematicaRetrieval", - "CQADupstackPhysicsRetrieval", - "CQADupstackProgrammersRetrieval", - "CQADupstackStatsRetrieval", - "CQADupstackTexRetrieval", - "CQADupstackUnixRetrieval", - "CQADupstackWebmastersRetrieval", - "CQADupstackWordpressRetrieval", - "DBPedia", - "FEVER", - "FiQA2018", - "HotpotQA", - "MSMARCO", - "NFCorpus", - "NQ", - "QuoraRetrieval", - "SCIDOCS", - "SciFact", - "Touche2020", - "TRECCOVID", -] - -TASK_LIST_STS = [ - "BIOSSES", - "SICK-R", - "STS12", - "STS13", - "STS14", - "STS15", - "STS16", - "STS17", - "STS22", - "STSBenchmark", -] - - -TASK_LIST_SUMMARIZATION = [ - "SummEval", -] - -TASK_LIST_EN = ( - TASK_LIST_CLASSIFICATION - + TASK_LIST_CLUSTERING - + TASK_LIST_PAIR_CLASSIFICATION - + TASK_LIST_RERANKING - + TASK_LIST_RETRIEVAL - + TASK_LIST_STS - + TASK_LIST_SUMMARIZATION -) - -### LOGIC ### - -def get_samples_beir(hf_hub_name): - # Somehow needs to be set in the function scope - random.seed(SEED) - from beir.datasets.data_loader import GenericDataLoader as BeirDataLoader - path = os.path.join(DATAPATH, hf_hub_name) - print("GOT PATH", path) - split = "validation" if "MSMARCO" in hf_hub_name else "test" - if not os.path.exists(path): - from beir import util - if "cqadupstack" in hf_hub_name: - hf_hub_name = "cqadupstack" - url = f"https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{hf_hub_name}.zip" - util.download_and_unzip(url, DATAPATH) - corpus, queries, relevant_docs = BeirDataLoader(path).load(split=split) - # Pick shortest k samples - samples = [v["text"] + " " + v["title"] for v in sorted(list(corpus.values()), key=lambda x: len(x["text"]))[:K_SAMPLES]] - # Optionally randomly pick - #samples = [v["text"] + " " + v["title"] for v in random.choices(sorted(list(corpus.values()), key=lambda x: len(x["text"])), k=K_SAMPLES)] - return samples - -def load_data(hf_hub_name, subset=None): - """ - Load dataset from Hub via cloning for easy offline usage with HF_DATASETS_OFFLINE=1 - Can be replaced with just `load_dataset(hf_hub_name, subset)` if preferred - """ - from datasets import load_dataset - path = os.path.join(DATAPATH, hf_hub_name) - if os.path.exists(path): - dataset = load_dataset(path, subset) - else: - from git import Repo - Repo.clone_from("https://huggingface.co/datasets/mteb/" + hf_hub_name, path) - dataset = load_dataset(path, subset) - return dataset - -def get_samples_ds(hf_hub_name): - ds = load_data(hf_hub_name) - # Optionally shuffle - # .shuffle(seed=SEED) - assert "test" in ds, f"No test set for {hf_hub_name}" - len_keys = list(set(ds["test"].features.keys()) & LEN_KEYS) - split = "test" - k = len_keys[0] - if isinstance(ds[split][k][0], str): - # Select K shortest examples - samples = sorted([x for x in ds[split][k]], key=len)[:K_SAMPLES] - elif isinstance(ds[split][k][0], list): - assert isinstance(ds[split][k][0][0], str), f"Too nested: {k}" - # Select K shortest examples - samples = [y for x in ds[split][k] for y in x] - samples = sorted(samples, key=len)[:K_SAMPLES] - # Optionally randomly select - # random.choices(samples, k=K_SAMPLES) - else: - raise ValueError(f"Unknown type {type(ds[split][k])}") - return samples - - -embeddings = {} -model = SentenceTransformer("sentence-transformers/sentence-t5-xxl") - -# Optionally custom selection -# TASKS = ["ArguAna", "ClimateFEVER", "DBPedia", "FEVER", "FiQA2018", "HotpotQA", "NFCorpus", "NQ", "QuoraRetrieval", "SCIDOCS", "SciFact", "Touche2020", "TRECCOVID"] - -TASKS = TASK_LIST_EN - - -for task in MTEB(tasks=TASKS).tasks: - print("Task: ", task) - if "hf_hub_name" in task.description: - hub_name = hub_url = task.description.get("hf_hub_name") - samples = get_samples_ds(hub_name.split("/")[-1]) - if "beir_name" in task.description: - hub_name = hub_url = "BeIR/" + task.description.get("beir_name") - samples = get_samples_beir("/".join(hub_name.split("/")[1:])) - embeddings[task.description["name"]] = model.encode(samples) - -# Plot 1: Compute all cos sims & then average -""" -data_dict = [] -for i, task_1 in enumerate(TASKS): - data_dict.append({task_2: torch.mean(cos_sim(embeddings[task_1], embeddings[task_2])).item() for j, task_2 in enumerate(TASKS)}) - -data_df = pd.DataFrame(data_dict) -data_df.set_index(data_df.columns, inplace=True) - - -# Save -data_df.to_csv("data.csv") - -import seaborn as sns -import matplotlib.pyplot as plt - -plt.figure(figsize=(32, 16)) -# define the mask to set the values in the upper triangle to True -mask = np.triu(np.ones_like(data_df, dtype=np.bool)) -#heatmap = sns.heatmap(data_df, mask=mask, vmin=-1, vmax=1, annot=True, cmap='Blues') -heatmap = sns.heatmap(data_df, mask=mask, vmin=data_df.values.min(), vmax=data_df.values.max(), annot=True, cmap='Blues') -heatmap.set_title('Similarity of MTEB datasets', fontdict={'fontsize':18}, pad=16) - -plt.savefig('heatmap_data.pdf', dpi=300, bbox_inches='tight') -""" - - -# Plot 2: Average embeddings & then compute cos_sim - -data_dict_emb = [] -for i, task_1 in enumerate(TASKS): - data_dict_emb.append({task_2: cos_sim(np.mean(embeddings[task_1], axis=0), np.mean(embeddings[task_2], axis=0)).item() for j, task_2 in enumerate(TASKS)}) - -data_emb_df = pd.DataFrame(data_dict_emb) -data_emb_df.set_index(data_emb_df.columns, inplace=True) - - -plt.figure(figsize=(36, 24)) -# define the mask to set the values in the upper triangle to True -mask = np.triu(np.ones_like(data_emb_df, dtype=np.bool)) -heatmap = sns.heatmap(data_emb_df, mask=mask, vmin=data_emb_df.values.min(), vmax=data_emb_df.values.max(), annot=True, cmap='Blues') -#heatmap.set_title('Similarity of MTEB datasets', fontdict={'fontsize':18}, pad=16) - -# Save -data_emb_df.to_csv("sim_data.csv") -plt.savefig('heatmap_data.pdf', dpi=450, bbox_inches='tight') - - -# Plot 3: Min (/Max) embeddings & then compute cos_sim -""" -data_dict_emb = [] -for i, task_1 in enumerate(TASKS): - data_dict_emb.append({task_2: cos_sim(np.min(embeddings[i], axis=0), np.min(embeddings[j], axis=0)).item() for j, task_2 in enumerate(TASKS)}) - -data_emb_df = pd.DataFrame(data_dict_emb) -data_emb_df.set_index(data_emb_df.columns, inplace=True) - -import seaborn as sns -import matplotlib.pyplot as plt - -plt.figure(figsize=(32, 16)) -# define the mask to set the values in the upper triangle to True -mask = np.triu(np.ones_like(data_emb_df, dtype=np.bool)) -heatmap = sns.heatmap(data_emb_df, mask=mask, vmin=data_emb_df.values.min(), vmax=data_emb_df.values.max(), annot=True, cmap='Blues') -heatmap.set_title('Similarity of MTEB datasets', fontdict={'fontsize':18}, pad=16) - -plt.savefig('heatmap_data.pdf', dpi=300, bbox_inches='tight') -""" diff --git a/plotstables/heatmap_data.pdf b/plotstables/heatmap_data.pdf deleted file mode 100644 index c796621b..00000000 Binary files a/plotstables/heatmap_data.pdf and /dev/null differ diff --git a/plotstables/heatmap_mean_emb.pdf b/plotstables/heatmap_mean_emb.pdf deleted file mode 100644 index 04054bdd..00000000 Binary files a/plotstables/heatmap_mean_emb.pdf and /dev/null differ diff --git a/plotstables/heatmap_mean_emb_small.png b/plotstables/heatmap_mean_emb_small.png deleted file mode 100644 index ac5d2b47..00000000 Binary files a/plotstables/heatmap_mean_emb_small.png and /dev/null differ diff --git a/plotstables/heatmap_model.pdf b/plotstables/heatmap_model.pdf deleted file mode 100644 index 41569ce4..00000000 Binary files a/plotstables/heatmap_model.pdf and /dev/null differ diff --git a/plotstables/heatmap_model.png b/plotstables/heatmap_model.png deleted file mode 100644 index b8ea3078..00000000 Binary files a/plotstables/heatmap_model.png and /dev/null differ diff --git a/plotstables/heatmap_tasks.pdf b/plotstables/heatmap_tasks.pdf deleted file mode 100644 index 42918f34..00000000 Binary files a/plotstables/heatmap_tasks.pdf and /dev/null differ diff --git a/plotstables/heatmap_tasks.png b/plotstables/heatmap_tasks.png deleted file mode 100644 index 31b1519b..00000000 Binary files a/plotstables/heatmap_tasks.png and /dev/null differ diff --git a/plotstables/mteb_diagram.drawio b/plotstables/mteb_diagram.drawio deleted file mode 100644 index ca51accd..00000000 --- a/plotstables/mteb_diagram.drawio +++ /dev/null @@ -1 +0,0 @@  \ No newline at end of file diff --git a/plotstables/mteb_diagram.pdf b/plotstables/mteb_diagram.pdf deleted file mode 100644 index b39d212b..00000000 Binary files a/plotstables/mteb_diagram.pdf and /dev/null differ diff --git a/plotstables/mteb_diagram.png b/plotstables/mteb_diagram.png deleted file mode 100644 index d209768e..00000000 Binary files a/plotstables/mteb_diagram.png and /dev/null differ diff --git a/plotstables/multilingclf.txt b/plotstables/multilingclf.txt deleted file mode 100644 index 86efe576..00000000 --- a/plotstables/multilingclf.txt +++ /dev/null @@ -1,119 +0,0 @@ -Dataset & Language & LASER2 & LaBSE & MiniLM-L12-multilingual & MPNet-multilingual & SGPT-BLOOM-7.1B-msmarco \\ -AmazonCounterfactualClassification & de & 67.82 & 73.17 & 68.35 & 69.95 & 61.35 \\ -AmazonCounterfactualClassification & ja & 68.76 & 76.42 & 63.45 & 69.79 & 58.23 \\ -AmazonReviewsClassification & de & 31.07 & 39.92 & 35.91 & 39.52 & 29.7 \\ -AmazonReviewsClassification & es & 32.72 & 39.39 & 37.49 & 39.99 & 35.97 \\ -AmazonReviewsClassification & fr & 31.12 & 38.52 & 35.3 & 39.0 & 35.92 \\ -AmazonReviewsClassification & ja & 28.94 & 36.44 & 33.24 & 36.64 & 27.64 \\ -AmazonReviewsClassification & zh & 30.89 & 36.45 & 35.26 & 37.74 & 32.63 \\ -MassiveIntentClassification & af & 38.01 & 56.12 & 45.88 & 52.32 & 47.85 \\ -MassiveIntentClassification & am & 12.7 & 55.71 & 36.75 & 41.55 & 33.3 \\ -MassiveIntentClassification & ar & 37.16 & 50.86 & 45.14 & 51.43 & 59.25 \\ -MassiveIntentClassification & az & 19.98 & 58.97 & 47.42 & 56.98 & 45.24 \\ -MassiveIntentClassification & bn & 42.51 & 58.22 & 35.34 & 48.79 & 61.59 \\ -MassiveIntentClassification & cy & 17.33 & 50.16 & 26.12 & 27.87 & 44.92 \\ -MassiveIntentClassification & da & 45.61 & 58.25 & 57.73 & 62.77 & 51.23 \\ -MassiveIntentClassification & de & 44.79 & 56.21 & 50.71 & 59.57 & 56.1 \\ -MassiveIntentClassification & el & 46.71 & 57.03 & 58.7 & 62.62 & 46.13 \\ -MassiveIntentClassification & es & 45.44 & 58.32 & 59.66 & 64.43 & 66.35 \\ -MassiveIntentClassification & fa & 45.01 & 62.33 & 61.02 & 65.34 & 51.2 \\ -MassiveIntentClassification & fi & 45.94 & 60.12 & 57.54 & 62.28 & 45.33 \\ -MassiveIntentClassification & fr & 46.13 & 60.47 & 60.25 & 64.82 & 66.95 \\ -MassiveIntentClassification & he & 42.55 & 56.55 & 52.51 & 58.21 & 43.18 \\ -MassiveIntentClassification & hi & 40.2 & 59.4 & 58.37 & 62.77 & 63.54 \\ -MassiveIntentClassification & hu & 42.77 & 59.52 & 60.41 & 63.87 & 44.73 \\ -MassiveIntentClassification & hy & 28.07 & 56.2 & 51.6 & 57.74 & 38.13 \\ -MassiveIntentClassification & id & 45.81 & 61.12 & 59.85 & 65.43 & 64.06 \\ -MassiveIntentClassification & is & 39.86 & 54.9 & 30.83 & 37.05 & 44.35 \\ -MassiveIntentClassification & it & 48.25 & 59.83 & 59.61 & 64.68 & 60.77 \\ -MassiveIntentClassification & ja & 45.3 & 63.11 & 60.89 & 63.74 & 61.22 \\ -MassiveIntentClassification & jv & 24.3 & 50.98 & 32.37 & 36.49 & 50.94 \\ -MassiveIntentClassification & ka & 22.7 & 48.35 & 43.03 & 49.85 & 33.84 \\ -MassiveIntentClassification & km & 22.48 & 48.55 & 40.04 & 45.47 & 37.34 \\ -MassiveIntentClassification & kn & 4.32 & 56.24 & 40.98 & 50.63 & 53.54 \\ -MassiveIntentClassification & ko & 44.26 & 60.99 & 50.3 & 61.82 & 53.36 \\ -MassiveIntentClassification & lv & 39.75 & 57.1 & 54.68 & 61.29 & 46.5 \\ -MassiveIntentClassification & ml & 41.33 & 57.91 & 42.41 & 54.34 & 58.27 \\ -MassiveIntentClassification & mn & 16.2 & 58.5 & 51.77 & 56.59 & 40.28 \\ -MassiveIntentClassification & ms & 43.23 & 58.6 & 54.76 & 60.7 & 59.65 \\ -MassiveIntentClassification & my & 25.37 & 57.35 & 52.01 & 57.09 & 37.42 \\ -MassiveIntentClassification & nb & 37.74 & 57.91 & 55.5 & 62.6 & 49.41 \\ -MassiveIntentClassification & nl & 45.0 & 59.37 & 59.51 & 63.57 & 52.09 \\ -MassiveIntentClassification & pl & 44.99 & 59.71 & 59.43 & 64.3 & 50.48 \\ -MassiveIntentClassification & pt & 48.55 & 60.16 & 61.27 & 64.89 & 66.69 \\ -MassiveIntentClassification & ro & 44.3 & 57.92 & 58.39 & 62.8 & 50.53 \\ -MassiveIntentClassification & ru & 44.29 & 60.67 & 59.04 & 63.26 & 58.32 \\ -MassiveIntentClassification & sl & 44.72 & 59.37 & 57.36 & 63.51 & 47.74 \\ -MassiveIntentClassification & sq & 46.12 & 58.03 & 56.59 & 62.49 & 48.94 \\ -MassiveIntentClassification & sv & 45.95 & 59.66 & 59.43 & 64.73 & 50.79 \\ -MassiveIntentClassification & sw & 31.89 & 51.62 & 29.57 & 31.95 & 49.81 \\ -MassiveIntentClassification & ta & 29.63 & 55.04 & 36.77 & 50.17 & 56.4 \\ -MassiveIntentClassification & te & 36.03 & 58.32 & 40.72 & 52.82 & 54.71 \\ -MassiveIntentClassification & th & 43.39 & 56.58 & 58.97 & 61.11 & 44.43 \\ -MassiveIntentClassification & tl & 29.73 & 55.28 & 33.67 & 38.83 & 50.21 \\ -MassiveIntentClassification & tr & 43.93 & 60.91 & 59.9 & 64.54 & 46.56 \\ -MassiveIntentClassification & ur & 26.11 & 56.7 & 52.8 & 56.37 & 56.75 \\ -MassiveIntentClassification & vi & 44.33 & 56.67 & 56.61 & 59.68 & 64.53 \\ -MassiveIntentClassification & zh-CN & 40.62 & 63.86 & 61.99 & 65.33 & 67.07 \\ -MassiveIntentClassification & zh-TW & 32.93 & 59.51 & 58.77 & 62.35 & 62.89 \\ -MassiveScenarioClassification & af & 47.1 & 63.39 & 53.64 & 59.67 & 51.47 \\ -MassiveScenarioClassification & am & 17.7 & 62.02 & 41.89 & 48.97 & 34.87 \\ -MassiveScenarioClassification & ar & 45.21 & 57.72 & 51.74 & 57.78 & 65.21 \\ -MassiveScenarioClassification & az & 28.21 & 63.48 & 52.06 & 61.53 & 45.58 \\ -MassiveScenarioClassification & bn & 50.52 & 61.84 & 41.17 & 54.53 & 67.3 \\ -MassiveScenarioClassification & cy & 22.58 & 56.13 & 31.72 & 35.26 & 46.29 \\ -MassiveScenarioClassification & da & 54.87 & 65.24 & 66.87 & 71.0 & 53.52 \\ -MassiveScenarioClassification & de & 54.34 & 62.39 & 57.4 & 67.34 & 61.74 \\ -MassiveScenarioClassification & el & 55.47 & 64.58 & 66.14 & 68.81 & 48.96 \\ -MassiveScenarioClassification & es & 52.77 & 63.61 & 65.04 & 70.42 & 73.34 \\ -MassiveScenarioClassification & fa & 52.5 & 67.46 & 65.86 & 69.88 & 53.17 \\ -MassiveScenarioClassification & fi & 52.63 & 64.58 & 63.75 & 67.6 & 44.69 \\ -MassiveScenarioClassification & fr & 54.32 & 65.1 & 66.06 & 70.69 & 72.91 \\ -MassiveScenarioClassification & he & 52.41 & 63.53 & 59.2 & 65.16 & 43.1 \\ -MassiveScenarioClassification & hi & 47.37 & 64.4 & 65.21 & 67.92 & 69.27 \\ -MassiveScenarioClassification & hu & 53.43 & 65.82 & 66.56 & 70.3 & 45.16 \\ -MassiveScenarioClassification & hy & 33.57 & 61.25 & 56.11 & 63.02 & 38.73 \\ -MassiveScenarioClassification & id & 54.38 & 65.84 & 66.16 & 70.73 & 70.13 \\ -MassiveScenarioClassification & is & 49.78 & 61.94 & 37.52 & 44.16 & 44.21 \\ -MassiveScenarioClassification & it & 54.84 & 64.09 & 65.0 & 69.73 & 65.57 \\ -MassiveScenarioClassification & ja & 54.12 & 67.72 & 66.5 & 69.69 & 65.76 \\ -MassiveScenarioClassification & jv & 32.71 & 58.29 & 38.6 & 44.2 & 54.79 \\ -MassiveScenarioClassification & ka & 26.92 & 53.38 & 50.66 & 57.3 & 32.99 \\ -MassiveScenarioClassification & km & 27.23 & 56.18 & 46.96 & 53.14 & 39.34 \\ -MassiveScenarioClassification & kn & 10.06 & 61.74 & 45.73 & 56.08 & 60.5 \\ -MassiveScenarioClassification & ko & 52.01 & 67.26 & 55.66 & 68.52 & 55.69 \\ -MassiveScenarioClassification & lv & 44.82 & 61.87 & 59.8 & 66.28 & 44.35 \\ -MassiveScenarioClassification & ml & 49.1 & 62.26 & 47.69 & 60.13 & 65.53 \\ -MassiveScenarioClassification & mn & 21.51 & 62.6 & 57.07 & 60.85 & 38.72 \\ -MassiveScenarioClassification & ms & 53.6 & 65.63 & 61.71 & 65.81 & 64.99 \\ -MassiveScenarioClassification & my & 29.72 & 62.94 & 59.1 & 63.03 & 36.84 \\ -MassiveScenarioClassification & nb & 43.9 & 64.29 & 64.25 & 70.24 & 51.8 \\ -MassiveScenarioClassification & nl & 53.33 & 65.16 & 65.52 & 70.37 & 56.32 \\ -MassiveScenarioClassification & pl & 52.92 & 64.56 & 65.04 & 68.99 & 49.98 \\ -MassiveScenarioClassification & pt & 53.41 & 63.28 & 65.79 & 70.09 & 71.46 \\ -MassiveScenarioClassification & ro & 50.48 & 62.41 & 64.17 & 67.95 & 53.69 \\ -MassiveScenarioClassification & ru & 51.84 & 65.25 & 65.24 & 69.92 & 61.6 \\ -MassiveScenarioClassification & sl & 51.29 & 64.25 & 64.01 & 70.81 & 48.04 \\ -MassiveScenarioClassification & sq & 55.65 & 64.54 & 64.31 & 69.63 & 50.06 \\ -MassiveScenarioClassification & sv & 54.64 & 66.01 & 67.14 & 71.6 & 51.73 \\ -MassiveScenarioClassification & sw & 42.04 & 58.36 & 34.86 & 37.29 & 54.22 \\ -MassiveScenarioClassification & ta & 36.72 & 59.08 & 42.62 & 55.96 & 62.77 \\ -MassiveScenarioClassification & te & 42.08 & 64.13 & 46.46 & 58.81 & 62.59 \\ -MassiveScenarioClassification & th & 52.15 & 64.34 & 67.01 & 69.44 & 45.18 \\ -MassiveScenarioClassification & tl & 37.34 & 60.23 & 37.37 & 43.99 & 52.06 \\ -MassiveScenarioClassification & tr & 52.56 & 65.43 & 66.55 & 70.4 & 47.21 \\ -MassiveScenarioClassification & ur & 32.6 & 61.52 & 60.43 & 62.9 & 64.26 \\ -MassiveScenarioClassification & vi & 50.97 & 61.05 & 60.72 & 65.71 & 70.61 \\ -MassiveScenarioClassification & zh-CN & 50.22 & 70.85 & 67.44 & 71.23 & 73.95 \\ -MassiveScenarioClassification & zh-TW & 42.32 & 67.08 & 65.7 & 68.73 & 70.3 \\ -MTOPDomainClassification & de & 74.08 & 86.95 & 79.2 & 85.73 & 82.05 \\ -MTOPDomainClassification & es & 73.47 & 84.07 & 83.04 & 86.96 & 93.55 \\ -MTOPDomainClassification & fr & 72.26 & 84.14 & 78.63 & 81.21 & 90.98 \\ -MTOPDomainClassification & hi & 72.95 & 85.11 & 81.36 & 84.76 & 89.33 \\ -MTOPDomainClassification & th & 72.68 & 81.24 & 79.99 & 82.51 & 60.49 \\ -MTOPIntentClassification & de & 51.62 & 63.42 & 54.23 & 61.27 & 61.92 \\ -MTOPIntentClassification & es & 52.75 & 64.44 & 60.28 & 66.59 & 74.49 \\ -MTOPIntentClassification & fr & 50.12 & 62.01 & 54.05 & 59.76 & 69.12 \\ -MTOPIntentClassification & hi & 45.55 & 62.58 & 59.9 & 62.37 & 64.85 \\ -MTOPIntentClassification & th & 50.07 & 64.61 & 61.96 & 64.8 & 49.36 \\ -Average & mix & 42.85 & 60.77 & 54.87 & 60.39 & 54.4 \\ diff --git a/plotstables/multilingsts.txt b/plotstables/multilingsts.txt deleted file mode 100644 index 2cd0b762..00000000 --- a/plotstables/multilingsts.txt +++ /dev/null @@ -1,29 +0,0 @@ -Dataset & Language & LASER2 & LaBSE & MiniLM-L12-multilingual & MPNet-multilingual & SGPT-BLOOM-7.1B-msmarco \\ -STS17 & ko-ko & 70.52 & 71.32 & 77.03 & 83.41 & 66.89 \\ -STS17 & ar-ar & 67.47 & 69.07 & 79.16 & 79.1 & 76.42 \\ -STS17 & en-ar & 65.05 & 74.51 & 81.22 & 80.85 & 78.07 \\ -STS17 & en-de & 66.66 & 73.85 & 84.22 & 83.28 & 59.1 \\ -STS17 & en-tr & 70.05 & 72.07 & 76.74 & 74.9 & 11.8 \\ -STS17 & es-en & 55.3 & 65.71 & 84.44 & 86.11 & 78.22 \\ -STS17 & es-es & 79.67 & 80.83 & 85.56 & 85.14 & 86.0 \\ -STS17 & fr-en & 70.82 & 76.98 & 76.59 & 81.17 & 80.46 \\ -STS17 & it-en & 70.98 & 76.99 & 82.35 & 84.24 & 51.58 \\ -STS17 & nl-en & 68.12 & 75.22 & 81.71 & 82.51 & 45.85 \\ -STS22 & de & 25.69 & 48.58 & 44.64 & 46.7 & 30.05 \\ -STS22 & es & 54.92 & 63.18 & 56.56 & 59.91 & 65.41 \\ -STS22 & pl & 18.34 & 39.3 & 33.74 & 33.65 & 31.13 \\ -STS22 & tr & 36.97 & 58.15 & 53.39 & 56.3 & 47.14 \\ -STS22 & ar & 42.57 & 57.67 & 46.2 & 52.19 & 58.67 \\ -STS22 & ru & 39.24 & 57.49 & 57.08 & 58.74 & 43.36 \\ -STS22 & zh & 49.41 & 63.02 & 58.75 & 61.75 & 66.78 \\ -STS22 & fr & 58.61 & 77.95 & 70.55 & 74.3 & 80.38 \\ -STS22 & de-en & 32.35 & 50.14 & 52.65 & 50.81 & 51.16 \\ -STS22 & es-en & 54.34 & 71.86 & 67.33 & 70.26 & 75.06 \\ -STS22 & it & 60.31 & 72.22 & 55.22 & 60.65 & 65.65 \\ -STS22 & pl-en & 53.63 & 69.41 & 69.02 & 73.07 & 53.31 \\ -STS22 & zh-en & 46.19 & 64.02 & 65.71 & 67.96 & 68.45 \\ -STS22 & es-it & 42.21 & 69.69 & 47.67 & 53.7 & 65.5 \\ -STS22 & de-fr & 37.41 & 53.28 & 51.73 & 62.34 & 53.28 \\ -STS22 & de-pl & 15.67 & 58.69 & 44.22 & 40.53 & 43.05 \\ -STS22 & fr-pl & 39.44 & 61.98 & 50.71 & 84.52 & 28.17 \\ -Average & mix & 51.55 & 65.67 & 64.23 & 67.71 & 57.81 \\ diff --git a/plotstables/multilingual.png b/plotstables/multilingual.png deleted file mode 100644 index f457c4ed..00000000 Binary files a/plotstables/multilingual.png and /dev/null differ diff --git a/plotstables/multilingual_clf.pdf b/plotstables/multilingual_clf.pdf deleted file mode 100644 index e9e8a3a2..00000000 Binary files a/plotstables/multilingual_clf.pdf and /dev/null differ diff --git a/plotstables/multilingual_clf.png b/plotstables/multilingual_clf.png deleted file mode 100644 index 6a9a284e..00000000 Binary files a/plotstables/multilingual_clf.png and /dev/null differ diff --git a/plotstables/multilingual_sts.pdf b/plotstables/multilingual_sts.pdf deleted file mode 100644 index c0792d07..00000000 Binary files a/plotstables/multilingual_sts.pdf and /dev/null differ diff --git a/plotstables/multilingual_sts.png b/plotstables/multilingual_sts.png deleted file mode 100644 index 814cf7c3..00000000 Binary files a/plotstables/multilingual_sts.png and /dev/null differ diff --git a/plotstables/multilingual_tatoeba.pdf b/plotstables/multilingual_tatoeba.pdf deleted file mode 100644 index 1c9ec904..00000000 Binary files a/plotstables/multilingual_tatoeba.pdf and /dev/null differ diff --git a/plotstables/multilingual_tatoeba.png b/plotstables/multilingual_tatoeba.png deleted file mode 100644 index 30ea522a..00000000 Binary files a/plotstables/multilingual_tatoeba.png and /dev/null differ diff --git a/plotstables/results_to_avg_table.py b/plotstables/results_to_avg_table.py deleted file mode 100644 index 8ede5a2b..00000000 --- a/plotstables/results_to_avg_table.py +++ /dev/null @@ -1,259 +0,0 @@ -import json -import os -import sys - -from mteb import MTEB - -### GLOBAL VARIABLES ### - -TASK_LIST_CLASSIFICATION = [ - "AmazonCounterfactualClassification", - "AmazonPolarityClassification", - "AmazonReviewsClassification", - "Banking77Classification", - "EmotionClassification", - "ImdbClassification", - "MassiveIntentClassification", - "MassiveScenarioClassification", - "MTOPDomainClassification", - "MTOPIntentClassification", - "ToxicConversationsClassification", - "TweetSentimentExtractionClassification", -] - -TASK_LIST_CLUSTERING = [ - "ArxivClusteringP2P", - "ArxivClusteringS2S", - "BiorxivClusteringP2P", - "BiorxivClusteringS2S", - "MedrxivClusteringP2P", - "MedrxivClusteringS2S", - "RedditClustering", - "RedditClusteringP2P", - "StackExchangeClustering", - "StackExchangeClusteringP2P", - "TwentyNewsgroupsClustering", -] - -TASK_LIST_PAIR_CLASSIFICATION = [ - "SprintDuplicateQuestions", - "TwitterSemEval2015", - "TwitterURLCorpus", -] - -TASK_LIST_RERANKING = [ - "AskUbuntuDupQuestions", - "MindSmallReranking", - "SciDocsRR", - "StackOverflowDupQuestions", -] - -TASK_LIST_RETRIEVAL = [ - "ArguAna", - "ClimateFEVER", - "CQADupstackRetrieval", - "DBPedia", - "FEVER", - "FiQA2018", - "HotpotQA", - "MSMARCO", - "NFCorpus", - "NQ", - "QuoraRetrieval", - "SCIDOCS", - "SciFact", - "Touche2020", - "TRECCOVID", -] - -TASK_LIST_STS = [ - "BIOSSES", - "SICK-R", - "STS12", - "STS13", - "STS14", - "STS15", - "STS16", - "STS17", - "STS22", - "STSBenchmark", -] - - -TASK_LIST_SUMMARIZATION = [ - "SummEval", -] - -TASK_LIST_EN = ( - TASK_LIST_CLASSIFICATION - + TASK_LIST_CLUSTERING - + TASK_LIST_PAIR_CLASSIFICATION - + TASK_LIST_RERANKING - + TASK_LIST_RETRIEVAL - + TASK_LIST_STS - + TASK_LIST_SUMMARIZATION -) - - -TASK_LIST_NAMES = [ - ("Class.", TASK_LIST_CLASSIFICATION, ["en", "en-en"]), - ("Clust.", TASK_LIST_CLUSTERING, ["en", "en-en"]), - ("PairClass.", TASK_LIST_PAIR_CLASSIFICATION, ["en", "en-en"]), - ("Rerank.", TASK_LIST_RERANKING, ["en", "en-en"]), - ("Retr.", TASK_LIST_RETRIEVAL, ["en", "en-en"]), - ("STS", TASK_LIST_STS, ["en", "en-en"]), - ("Summ.", TASK_LIST_SUMMARIZATION, ["en", "en-en"]), - # ("BitextMining", TASK_LIST_BITEXT, []), - ("Avg.", TASK_LIST_EN, ["en", "en-en"]), -] - -SELFSUPERVISED_MODELS = [ - "glove.6B.300d", - "komninos", - "bert-base-uncased", - "unsup-simcse-bert-base-uncased", -] - -SUPERVISED_MODELS = [ - "sup-simcse-bert-base-uncased", - "msmarco-bert-co-condensor", - "contriever-base-msmarco", - "allenai-specter", - "LaBSE", - "LASER2", - "all-MiniLM-L6-v2", - "all-MiniLM-L12-v2", - "paraphrase-multilingual-MiniLM-L12-v2", - "all-mpnet-base-v2", - "paraphrase-multilingual-mpnet-base-v2", - "text-similarity-ada-001", - "SGPT-125M-weightedmean-nli-bitfit", - "SGPT-5.8B-weightedmean-nli-bitfit", - "SGPT-125M-weightedmean-msmarco-specb-bitfit", - "SGPT-1.3B-weightedmean-msmarco-specb-bitfit", - "SGPT-2.7B-weightedmean-msmarco-specb-bitfit", - "SGPT-5.8B-weightedmean-msmarco-specb-bitfit", - "sgpt-bloom-7b1-msmarco", - "gtr-t5-base", # 110M - "gtr-t5-large", - "gtr-t5-xl", - "gtr-t5-xxl", # 4.8B - "sentence-t5-base", # 110M - "sentence-t5-large", - "sentence-t5-xl", - "sentence-t5-xxl", # 4.8B -] - -MODEL_TO_NAME = { - "bert-base-uncased": "BERT", - "gtr-t5-base": "GTR-Base", - "gtr-t5-large": "GTR-Large", - "gtr-t5-xl": "GTR-XL", - "gtr-t5-xxl": "GTR-XXL", - "sentence-t5-base": "ST5-Base", - "sentence-t5-large": "ST5-Large", - "sentence-t5-xl": "ST5-XL", - "sentence-t5-xxl": "ST5-XXL", - "SGPT-125M-weightedmean-msmarco-specb-bitfit": "SGPT-125M-msmarco", - "SGPT-1.3B-weightedmean-msmarco-specb-bitfit": "SGPT-1.3B-msmarco", - "SGPT-2.7B-weightedmean-msmarco-specb-bitfit": "SGPT-2.7B-msmarco", - "SGPT-5.8B-weightedmean-msmarco-specb-bitfit": "SGPT-5.8B-msmarco", - "sgpt-bloom-7b1-msmarco": "SGPT-BLOOM-7.1B-msmarco", - "SGPT-125M-weightedmean-nli-bitfit": "SGPT-125M-nli", - "SGPT-5.8B-weightedmean-nli-bitfit": "SGPT-5.8B-nli", - "sup-simcse-bert-base-uncased": "SimCSE-BERT-sup", - "contriever-base-msmarco": "Contriever", - "msmarco-bert-co-condensor": "coCondenser-msmarco", # They write it as coCondenser in the paper - "unsup-simcse-bert-base-uncased": "SimCSE-BERT-unsup", - "glove.6B.300d": "Glove", - "komninos": "Komninos", - "all-MiniLM-L6-v2": "MiniLM-L6", - "all-MiniLM-L12-v2": "MiniLM-L12", - "paraphrase-multilingual-MiniLM-L12-v2": "MiniLM-L12-multilingual", - "all-mpnet-base-v2": "MPNet", - "paraphrase-multilingual-mpnet-base-v2": "MPNet-multilingual", - "allenai-specter": "SPECTER", - "text-similarity-ada-001": "Ada Similarity", -} - - -### LOGIC ### - -results_folder = sys.argv[1].strip("/") -all_results = {} - -for model_name in os.listdir(results_folder): - model_res_folder = os.path.join(results_folder, model_name) - if os.path.isdir(model_res_folder): - all_results.setdefault(model_name, {}) - for file_name in os.listdir(model_res_folder): - if not file_name.endswith(".json"): - print(f"Skipping non-json {file_name}") - continue - with open(os.path.join(model_res_folder, file_name), "r", encoding="utf-8") as f: - results = json.load(f) - all_results[model_name] = {**all_results[model_name], **{file_name.replace(".json", ""): results}} - -def get_row(dataset, model_name, limit_langs=[], skip_langs=[]): - # CQADupstackRetrieval uses the same metric as its subsets - tasks = MTEB(tasks=[dataset.replace("CQADupstackRetrieval", "CQADupstackTexRetrieval")]).tasks - assert len(tasks) == 1, f"Found {len(tasks)} for {dataset}. Expected 1." - main_metric = tasks[0].description["main_score"] - test_result = all_results.get(model_name, {}). get(dataset, {}) - - # Dev / Val set is used for MSMARCO (See BEIR paper) - if "MSMARCO" in dataset: - test_result = ( - test_result.get("dev") if "dev" in test_result else test_result.get("validation") - ) - else: - test_result = test_result.get("test") - - for lang in tasks[0].description["eval_langs"]: - if (limit_langs and lang not in limit_langs) or (skip_langs and lang in skip_langs): - continue - elif test_result is None: - raise NotImplementedError(f"Got no test result {test_result} for ds: {dataset} model: {model_name}") - - test_result_lang = test_result.get(lang, test_result) - if main_metric == "cosine_spearman": - test_result_lang = test_result_lang.get("cos_sim", {}).get("spearman") - elif main_metric == "ap": - test_result_lang = test_result_lang.get("cos_sim", {}).get("ap") - else: - test_result_lang = test_result_lang.get(main_metric) - - if test_result_lang is None: - raise NotImplementedError - - return test_result_lang - raise NotImplementedError - - -table = "Task ($\ rightarrow$) & " + " & ".join([x[0] for x in TASK_LIST_NAMES]) + " \\\\" + "\n" -table += "Num. Datasets ($\ rightarrow$) & " + " & ".join([str(len(x[1])) for x in TASK_LIST_NAMES]) + " \\\\" + "\n" -table += "Model ($\downarrow$) & " + " & ".join([x[0] for x in TASK_LIST_NAMES]) + " \\\\" + "\n" - - -def add_to_table(model_list, table): - for model in model_list: - results = [] - for (task_name, task_list, limit_langs) in TASK_LIST_NAMES: - try: - model_task_results = [get_row(task, model, limit_langs=limit_langs) for task in task_list] - except: - results.append("") - continue - results.append(str(round(100 * (sum(model_task_results) / len(model_task_results)), 2))) - - model_name = MODEL_TO_NAME.get(model, model) - table += model_name + " & " + " & ".join(results) + " \\\\" + "\n" - return table - - -table = add_to_table(SELFSUPERVISED_MODELS, table) -table = add_to_table(SUPERVISED_MODELS, table) - -with open("avg_table.txt", "w") as f: - f.write(table) - diff --git a/plotstables/results_to_heatmap.py b/plotstables/results_to_heatmap.py deleted file mode 100644 index 3d9461e7..00000000 --- a/plotstables/results_to_heatmap.py +++ /dev/null @@ -1,313 +0,0 @@ -""" -Usage: python results_to_heatmap.py results_folder_path -results_folder_path contains results of multiple models whose folders should be named after them -Source: https://medium.com/@szabo.bibor/how-to-create-a-seaborn-correlation-heatmap-in-python-834c0686b88e -""" -import json -import os -import sys - -from mteb import MTEB -import numpy as np -import pandas as pd - -TASK_LIST_BITEXT = [ - "BUCC", - "Tatoeba", -] - -TASK_LIST_CLASSIFICATION = [ - "AmazonCounterfactualClassification", - "AmazonPolarityClassification", - "AmazonReviewsClassification", - "Banking77Classification", - "EmotionClassification", - "ImdbClassification", - "MassiveIntentClassification", - "MassiveScenarioClassification", - "MTOPDomainClassification", - "MTOPIntentClassification", - "ToxicConversationsClassification", - "TweetSentimentExtractionClassification", -] - -TASK_LIST_CLUSTERING = [ - "ArxivClusteringP2P", - "ArxivClusteringS2S", - "BiorxivClusteringP2P", - "BiorxivClusteringS2S", - "MedrxivClusteringP2P", - "MedrxivClusteringS2S", - "RedditClustering", - "RedditClusteringP2P", - "StackExchangeClustering", - "StackExchangeClusteringP2P", - "TwentyNewsgroupsClustering", -] - -TASK_LIST_PAIR_CLASSIFICATION = [ - "SprintDuplicateQuestions", - "TwitterSemEval2015", - "TwitterURLCorpus", -] - -TASK_LIST_RERANKING = [ - "AskUbuntuDupQuestions", - "MindSmallReranking", - "SciDocsRR", - "StackOverflowDupQuestions", -] - -TASK_LIST_RETRIEVAL = [ - "ArguAna", - "ClimateFEVER", - "CQADupstackRetrieval", - "DBPedia", - "FEVER", - "FiQA2018", - "HotpotQA", - "MSMARCO", - "NFCorpus", - "NQ", - "QuoraRetrieval", - "SCIDOCS", - "SciFact", - "Touche2020", - "TRECCOVID", -] - -TASK_LIST_STS = [ - "BIOSSES", - "SICK-R", - "STS12", - "STS13", - "STS14", - "STS15", - "STS16", - "STS17", - "STS22", - "STSBenchmark", -] - -TASK_LIST_SUMMARIZATION = [ - "SummEval", -] - -TASK_LIST = ( - TASK_LIST_BITEXT - + TASK_LIST_CLASSIFICATION - + TASK_LIST_CLUSTERING - + TASK_LIST_PAIR_CLASSIFICATION - + TASK_LIST_RERANKING - + TASK_LIST_RETRIEVAL - + TASK_LIST_STS - + TASK_LIST_SUMMARIZATION -) - -TASK_LIST_EN = ( - TASK_LIST_CLASSIFICATION - + TASK_LIST_CLUSTERING - + TASK_LIST_PAIR_CLASSIFICATION - + TASK_LIST_RERANKING - + TASK_LIST_RETRIEVAL - + TASK_LIST_STS - + TASK_LIST_SUMMARIZATION -) - -TASK_LIST_NAMES = [ - ("Class.", TASK_LIST_CLASSIFICATION, ["en", "en-en"]), - ("Clust.", TASK_LIST_CLUSTERING, ["en", "en-en"]), - ("PairClass.", TASK_LIST_PAIR_CLASSIFICATION, ["en", "en-en"]), - ("Rerank.", TASK_LIST_RERANKING, ["en", "en-en"]), - ("Retr.", TASK_LIST_RETRIEVAL, ["en", "en-en"]), - ("STS", TASK_LIST_STS, ["en", "en-en"]), - ("Summ.", TASK_LIST_SUMMARIZATION, ["en", "en-en"]), - # ("BitextMining", TASK_LIST_BITEXT, []), - # ("Avg.", TASK_LIST_EN, ["en", "en-en"]), -] - -MODEL_TO_NAME = { - "bert-base-uncased": "BERT", - "gtr-t5-base": "GTR-Base", - "gtr-t5-large": "GTR-Large", - "gtr-t5-xl": "GTR-XL", - "gtr-t5-xxl": "GTR-XXL", - "sentence-t5-base": "ST5-Base", - "sentence-t5-large": "ST5-Large", - "sentence-t5-xl": "ST5-XL", - "sentence-t5-xxl": "ST5-XXL", - "SGPT-125M-weightedmean-msmarco-specb-bitfit": "SGPT-125M-msmarco", - "SGPT-1.3B-weightedmean-msmarco-specb-bitfit": "SGPT-1.3B-msmarco", - "SGPT-2.7B-weightedmean-msmarco-specb-bitfit": "SGPT-2.7B-msmarco", - "SGPT-5.8B-weightedmean-msmarco-specb-bitfit": "SGPT-5.8B-msmarco", - "sgpt-bloom-7b1-msmarco": "SGPT-BLOOM-7.1B-msmarco", - "SGPT-125M-weightedmean-nli-bitfit": "SGPT-125M-nli", - "SGPT-5.8B-weightedmean-nli-bitfit": "SGPT-5.8B-nli", - "sup-simcse-bert-base-uncased": "SimCSE-BERT-sup", - "contriever-base-msmarco": "Contriever", - "msmarco-bert-co-condensor": "coCondenser-msmarco", # They write it as coCondenser in the paper - "unsup-simcse-bert-base-uncased": "SimCSE-BERT-unsup", - "glove.6B.300d": "Glove", - "komninos": "Komninos", - "all-MiniLM-L6-v2": "MiniLM-L6", - "all-MiniLM-L12-v2": "MiniLM-L12", - "paraphrase-multilingual-MiniLM-L12-v2": "MiniLM-L12-multilingual", - "all-mpnet-base-v2": "MPNet", - "paraphrase-multilingual-mpnet-base-v2": "MPNet-multilingual", - "allenai-specter": "SPECTER", - # "text-similarity-ada-001": "Ada Similarity", -} - -SELFSUPERVISED_MODELS = [ - "glove.6B.300d", - "komninos", - "bert-base-uncased", - "unsup-simcse-bert-base-uncased", -] - -SUPERVISED_MODELS = [ - "sup-simcse-bert-base-uncased", - "msmarco-bert-co-condensor", - "contriever-base-msmarco", - "allenai-specter", - "LaBSE", - "LASER2", - "all-MiniLM-L6-v2", - "all-MiniLM-L12-v2", - "paraphrase-multilingual-MiniLM-L12-v2", - "all-mpnet-base-v2", - "paraphrase-multilingual-mpnet-base-v2", - # "text-similarity-ada-001", - "SGPT-125M-weightedmean-nli-bitfit", - "SGPT-5.8B-weightedmean-nli-bitfit", - "SGPT-125M-weightedmean-msmarco-specb-bitfit", - "SGPT-1.3B-weightedmean-msmarco-specb-bitfit", - "SGPT-2.7B-weightedmean-msmarco-specb-bitfit", - "SGPT-5.8B-weightedmean-msmarco-specb-bitfit", - "sgpt-bloom-7b1-msmarco", - "gtr-t5-base", # 110M - "gtr-t5-large", - "gtr-t5-xl", - "gtr-t5-xxl", # 4.8B - "sentence-t5-base", # 110M - "sentence-t5-large", - "sentence-t5-xl", - "sentence-t5-xxl", # 4.8B -] - -results_folder = sys.argv[1].strip("/") - -all_results = {} - -for model_name in os.listdir(results_folder): - model_res_folder = os.path.join(results_folder, model_name) - if os.path.isdir(model_res_folder): - all_results.setdefault(model_name, {}) - for file_name in os.listdir(model_res_folder): - if not file_name.endswith(".json"): - print(f"Skipping non-json {file_name}") - continue - with open(os.path.join(model_res_folder, file_name), "r", encoding="utf-8") as f: - results = json.load(f) - all_results[model_name] = {**all_results[model_name], **{file_name.replace(".json", ""): results}} - -def get_row(dataset, model_name, limit_langs=[], skip_langs=[]): - # CQADupstackRetrieval uses the same metric as its subsets - tasks = MTEB(tasks=[dataset.replace("CQADupstackRetrieval", "CQADupstackTexRetrieval")]).tasks - assert len(tasks) == 1, f"Found {len(tasks)} for {dataset}. Expected 1." - main_metric = tasks[0].description["main_score"] - test_result = all_results.get(model_name, {}). get(dataset, {}) - - # Dev / Val set is used for MSMARCO (See BEIR paper) - if "MSMARCO" in dataset: - test_result = ( - test_result.get("dev") if "dev" in test_result else test_result.get("validation") - ) - else: - test_result = test_result.get("test") - - for lang in tasks[0].description["eval_langs"]: - if (limit_langs and lang not in limit_langs) or (skip_langs and lang in skip_langs): - continue - elif test_result is None: - raise NotImplementedError(f"Got no test result {test_result} for ds: {dataset} model: {model_name}") - - test_result_lang = test_result.get(lang, test_result) - if main_metric == "cosine_spearman": - test_result_lang = test_result_lang.get("cos_sim", {}).get("spearman") - elif main_metric == "ap": - test_result_lang = test_result_lang.get("cos_sim", {}).get("ap") - else: - test_result_lang = test_result_lang.get(main_metric) - - if test_result_lang is None: - raise NotImplementedError - - return test_result_lang - raise NotImplementedError - - -### MODEL HEATMAP - -model_dict = [] - -for ds in TASK_LIST_EN: - model_dict.append({MODEL_TO_NAME.get(model,model): get_row(ds, model, limit_langs=["en", "en-en"]) for model in SELFSUPERVISED_MODELS + SUPERVISED_MODELS}) - -model_df = pd.DataFrame(model_dict) -import seaborn as sns -import matplotlib.pyplot as plt - -plt.figure(figsize=(20, 10)) - -model_df = (model_df.corr() * 100).round(0).astype(int) - -# define the mask to set the values in the upper triangle to True -mask = np.triu(np.ones_like(model_df, dtype=np.bool)) -heatmap = sns.heatmap(model_df, mask=mask, vmin=model_df.values.min(), vmax=model_df.values.max(), annot=True, fmt='g', cmap='Blues') -# heatmap.set_title('Pearson Correlations of scores on MTEB', fontdict={'fontsize':18}, pad=16); - -plt.savefig('heatmap_model.pdf', dpi=300, bbox_inches='tight') - -data_dict = [] - - -### TASK HEATMAP -for model in MODEL_TO_NAME: - results = {} - for (task_name, task_list, limit_langs) in TASK_LIST_NAMES: - model_task_results = [get_row(task, model, limit_langs=limit_langs) for task in task_list] - results[task_name] = np.mean(model_task_results) - data_dict.append(results) - -data_df = pd.DataFrame(data_dict) -data_df = (data_df.corr() * 100).round(0).astype(int) - -plt.figure(figsize=(20, 10)) -# define the mask to set the values in the upper triangle to True -mask = np.triu(np.ones_like(data_df, dtype=np.bool)) -heatmap = sns.heatmap(data_df, mask=mask, vmin=data_df.values.min(), vmax=data_df.values.max(), annot=True, fmt='g', cmap='Blues') -# heatmap.set_title('Pearson Correlations of tasks on MTEB', fontdict={'fontsize':18}, pad=16) - -plt.savefig('heatmap_tasks.pdf', dpi=300, bbox_inches='tight') - -exit() -# The last heatmap is not used - -### DATA HEATMAP -# This is to be differentiated from a heatmap of actual data content (e.g. via unigram Jaccard similarity) -# E.g. for BEIR SciFact & HotpotQA have very low unigram Jaccard similarity, but in this method, -# they get a high similarity score, because model scores seem to correlate on the datasrt - -for model, name in MODEL_TO_NAME.items(): - data_dict.append({ds: get_row(ds, model, limit_langs=["en", "en-en"]) for ds in TASK_LIST_EN}) - -data_df = pd.DataFrame(data_dict) - -plt.figure(figsize=(128, 48)) -# define the mask to set the values in the upper triangle to True -mask = np.triu(np.ones_like(data_df.corr(), dtype=np.bool)) -heatmap = sns.heatmap(data_df.corr(), mask=mask, vmin=-1, vmax=1, annot=True, cmap='Blues') -heatmap.set_title('Pearson Correlations of scores on MTEB', fontdict={'fontsize':18}, pad=16) - -plt.savefig('heatmap_data.pdf', dpi=300, bbox_inches='tight') diff --git a/plotstables/results_to_multilingual.py b/plotstables/results_to_multilingual.py deleted file mode 100644 index f89263f9..00000000 --- a/plotstables/results_to_multilingual.py +++ /dev/null @@ -1,329 +0,0 @@ -""" -Usage: python results_to_multilingual.py results_folder_path -Make sure the final directory results_folder_path is the name of your model -""" -import json -import os -import sys - -### GLOBAL VARIABLES ### - -TASK_LIST_BITEXT = [ - "BUCC", - "Tatoeba", -] - -BITEXT_MODELS = MULTILING_MODELS = [ - "LaBSE", - "LASER2", - "paraphrase-multilingual-MiniLM-L12-v2", - "paraphrase-multilingual-mpnet-base-v2", - "sgpt-bloom-7b1-msmarco", - # "sgpt-bloom-1b3-nli", # Not too interesting -] - -MODEL_TO_NAME = { - "bert-base-uncased": "BERT", - "gtr-t5-base": "GTR-Base", - "gtr-t5-large": "GTR-Large", - "gtr-t5-xl": "GTR-XL", - "gtr-t5-xxl": "GTR-XXL", - "sentence-t5-base": "ST5-Base", - "sentence-t5-large": "ST5-Large", - "sentence-t5-xl": "ST5-XL", - "sentence-t5-xxl": "ST5-XXL", - "SGPT-125M-weightedmean-msmarco-specb-bitfit": "SGPT-125M-msmarco", - "SGPT-1.3B-weightedmean-msmarco-specb-bitfit": "SGPT-1.3B-msmarco", - "SGPT-2.7B-weightedmean-msmarco-specb-bitfit": "SGPT-2.7B-msmarco", - "SGPT-5.8B-weightedmean-msmarco-specb-bitfit": "SGPT-5.8B-msmarco", - "sgpt-bloom-7b1-msmarco": "SGPT-BLOOM-7.1B-msmarco", - "SGPT-125M-weightedmean-nli-bitfit": "SGPT-125M-nli", - "SGPT-5.8B-weightedmean-nli-bitfit": "SGPT-5.8B-nli", - "sup-simcse-bert-base-uncased": "SimCSE-BERT-sup", - "contriever-base-msmarco": "Contriever", - "msmarco-bert-co-condensor": "coCondenser-msmarco", # They write it as coCondenser in the paper - "unsup-simcse-bert-base-uncased": "SimCSE-BERT-unsup", - "glove.6B.300d": "Glove", - "komninos": "Komninos", - "all-MiniLM-L6-v2": "MiniLM-L6", - "all-MiniLM-L12-v2": "MiniLM-L12", - "paraphrase-multilingual-MiniLM-L12-v2": "MiniLM-L12-multilingual", - "all-mpnet-base-v2": "MPNet", - "paraphrase-multilingual-mpnet-base-v2": "MPNet-multilingual", - "allenai-specter": "SPECTER", - "text-similarity-ada-001": "Ada Similarity", -} - -# Base from: -# https://coolors.co/palette/ff5400-ff6d00-ff8500-ff9100-ff9e00-00b4d8-0096c7-0077b6-023e8a-03045e -# Yellow tones from: -# https://coolors.co/palette/6ab6dc-49a6d4-2f94c6-277ba5-1f6284-e0b700-ffd20a-ffda33-ffe15c-ffe570 -# Green from: -# https://coolors.co/palette/f94144-f3722c-f8961e-f9844a-f9c74f-90be6d-43aa8b-4d908e-577590-277da1 -MODEL_TO_COLOR = { - "MiniLM": "#BAF19C",#"#017600", # Green - "MPNet": "#F94144",#"#007A7A", # Light Green - "GTR": "#FF5400",#"#221D91", # Blue 1 - "ST5": "#FF9E00",#"#86D4F1", # Blue 2 - "SGPT": "#00B4D8",#"#7B3FB9", # Purple - "SimCSE": "#F9C74F",#"#2070B4", # Blue 3 - "LaBSE": "#F9C74F",#"#2070B4", # Blue 3 - "SPECTER": "#E0B700", # Shade of #2070B4 - "Glove": "#023E8A",#"#9BC7DD", # Light Blue - "LASER2": "#03045E", # Grey -} - - -MULTILINGUAL_CLF = [ - "AmazonCounterfactualClassification", - "AmazonReviewsClassification", - "MassiveIntentClassification", - "MassiveScenarioClassification", - "MTOPDomainClassification", - "MTOPIntentClassification", -] - -MULTILINGUAL_STS = [ - "STS17", - "STS22", -] - -### LOGIC ### - -results_folder = sys.argv[1].strip("/") -all_results = {} - -for model_name in os.listdir(results_folder): - model_res_folder = os.path.join(results_folder, model_name) - if os.path.isdir(model_res_folder): - all_results.setdefault(model_name, {}) - for file_name in os.listdir(model_res_folder): - if not file_name.endswith(".json"): - print(f"Skipping non-json {file_name}") - continue - with open(os.path.join(model_res_folder, file_name), "r", encoding="utf-8") as f: - results = json.load(f) - all_results[model_name] = {**all_results[model_name], **{file_name.replace(".json", ""): results}} - - - -# Create a plot for each task with scaling of the model performances on this task -import matplotlib.pyplot as plt -import numpy as np - -fig, ax = plt.subplots(figsize=(64,12)) - -markers = ["x", "o", "v", "*", "p"] - -# Compute averages -scores = {} -for i, model in enumerate(BITEXT_MODELS): - if not(all_results.get(model, []).get("Tatoeba")): - continue - for lang, res in all_results[model]["Tatoeba"]["test"].items(): - if lang == "evaluation_time": - continue - scores.setdefault(lang, []) - scores[lang].append(res["f1"]) -# Average -scores = {k: np.mean(v) for k,v in scores.items()} -scores_sorted = sorted(scores.items(), key=lambda x: x[-1], reverse=True) -langs_sorted = [x[0] for x in scores_sorted] -global_idx = {lang: langs_sorted.index(lang) for lang in scores} - -for i, model in enumerate(BITEXT_MODELS): - scores = {} - - if not(all_results.get(model, []).get("Tatoeba")): - continue - - for lang, res in all_results[model]["Tatoeba"]["test"].items(): - if lang == "evaluation_time": - continue - scores[lang] = res["f1"] - - # Optionally sort by LaBSE scores - if i == 0: - assert model == "LaBSE" - scores_sorted = sorted(scores.items(), key=lambda x: x[-1], reverse=True) - langs_sorted = [x[0] for x in scores_sorted] - global_idx = {lang: langs_sorted.index(lang) for lang in scores} - - # Reverse is already accounted for in global_idx - scores_sorted = sorted(scores.items(), key=lambda x: global_idx[x[0]], reverse=False) - x_langs = [x[0] for x in scores_sorted] - y_scores = [x[1] for x in scores_sorted] - - model_name = MODEL_TO_NAME.get(model, model) - ax.plot(x_langs, y_scores, linewidth=6.0, label=model_name, marker=markers[i], color=MODEL_TO_COLOR.get(model_name.split("-")[0])) - -ax.set_ylabel("F1 score", fontsize=22) -ax.margins(x=0.01) # Reduce whitespace left & right - -plt.xticks(rotation=45, fontsize=20) #plt.xticks(rotation=90, ha='right') -plt.legend(fontsize=25) -plt.savefig('multilingual_tatoeba.pdf', dpi=300, bbox_inches='tight') - - -### CLASSIFICATION ### - -# Compute averages -scores = {} -for i, model in enumerate(BITEXT_MODELS): - for ds in MULTILINGUAL_CLF: - if not(all_results.get(model, []).get(ds)): - continue - for lang, res in all_results[model][ds]["test"].items(): - if lang == "evaluation_time": - continue - elif lang == "en-ext": - lang = "en" - - scores.setdefault(lang, []) - scores[lang].append(res["accuracy"]) -# Average -scores = {k: np.mean(v) for k,v in scores.items()} -scores_sorted = sorted(scores.items(), key=lambda x: x[-1], reverse=True) -langs_sorted = [x[0] for x in scores_sorted] -global_idx = {lang: langs_sorted.index(lang) for lang in scores} - - -fig, ax = plt.subplots(figsize=(32,8)) - - -for i, model in enumerate(BITEXT_MODELS): - scores = {} - for ds in MULTILINGUAL_CLF: - if not(all_results.get(model, []).get(ds)): - continue - for lang, res in all_results[model][ds]["test"].items(): - if lang == "evaluation_time": - continue - elif lang == "en-ext": - lang = "en" - - scores.setdefault(lang, []) - scores[lang].append(res["accuracy"]) - - # Average scores for langs - scores = {k: np.mean(v) for k,v in scores.items()} - - # Reverse is already accounted for in global_idx - scores_sorted = sorted(scores.items(), key=lambda x: global_idx[x[0]], reverse=False) - x_langs = [x[0] for x in scores_sorted] - y_scores = [x[1] for x in scores_sorted] - model_name = MODEL_TO_NAME.get(model, model) - ax.plot(x_langs, y_scores, linewidth=6.0, label=model_name, marker=markers[i], color=MODEL_TO_COLOR.get(model_name.split("-")[0])) - -ax.set_ylabel("Accuracy", fontsize=22) - -plt.xticks(rotation=45, fontsize=20) #plt.xticks(rotation=90, ha='right') -plt.legend(fontsize=25) - -plt.savefig('multilingual_clf.pdf', dpi=300, bbox_inches='tight') - - - -### STS ### - -# Compute averages -scores_multi = {} -scores_cross = {} - -for i, model in enumerate(BITEXT_MODELS): - for ds in MULTILINGUAL_STS: - if not(all_results.get(model, []).get(ds)): - continue - for lang, res in all_results[model][ds]["test"].items(): - if lang == "evaluation_time": - continue - multi = True - if "-" in lang: - l1, l2 = lang.split("-") - if l1 != l2: - multi = False - else: - lang = l1 - if multi: - scores_multi.setdefault(lang, []) - scores_multi[lang].append(res["cos_sim"]["spearman"]) - else: - scores_cross.setdefault(lang, []) - scores_cross[lang].append(res["cos_sim"]["spearman"]) - -# Average -scores = {k: np.mean(v) for k,v in scores_multi.items()} -scores_sorted = sorted(scores.items(), key=lambda x: x[-1], reverse=True) -langs_sorted = [x[0] for x in scores_sorted] -global_idx_multi = {lang: langs_sorted.index(lang) for lang in scores} - -scores = {k: np.mean(v) for k,v in scores_cross.items()} -scores_sorted = sorted(scores.items(), key=lambda x: x[-1], reverse=True) -langs_sorted = [x[0] for x in scores_sorted] -global_idx_cross = {lang: langs_sorted.index(lang) for lang in scores} - - - -fig, axes = plt.subplots(figsize=(32,8), ncols=2, nrows=1, sharey=True) - -ax_multi, ax_cross = axes - -for i, model in enumerate(BITEXT_MODELS): - scores_multi = {} - scores_cross = {} - for ds in MULTILINGUAL_STS: - if not(all_results.get(model, []).get(ds)): - continue - for lang, res in all_results[model][ds]["test"].items(): - if lang == "evaluation_time": - continue - multi = True - if "-" in lang: - l1, l2 = lang.split("-") - if l1 != l2: - multi = False - else: - lang = l1 - - if multi: - scores_multi.setdefault(lang, []) - scores_multi[lang].append(res["cos_sim"]["spearman"]) - else: - scores_cross.setdefault(lang, []) - scores_cross[lang].append(res["cos_sim"]["spearman"]) - - scores_multi = {k: np.mean(v) for k,v in scores_multi.items()} - scores_cross = {k: np.mean(v) for k,v in scores_cross.items()} - - scores_sorted_multi = sorted(scores_multi.items(), key=lambda x: global_idx_multi[x[0]], reverse=False) - scores_sorted_cross = sorted(scores_cross.items(), key=lambda x: global_idx_cross[x[0]], reverse=False) - - model_name = MODEL_TO_NAME.get(model, model) - model_color = MODEL_TO_COLOR.get(model_name.split("-")[0]) - - ax_multi.plot( - [x[0] for x in scores_sorted_multi], - [x[1] for x in scores_sorted_multi], - label=model_name, - marker=markers[i], - color=model_color, - linewidth=6.0, - ) - - ax_cross.plot( - [x[0] for x in scores_sorted_cross], - [x[1] for x in scores_sorted_cross], - label=model_name, - marker=markers[i], - color=model_color, - linewidth=6.0, - ) - -ax_multi.set_ylabel("Cos. Sim. Spearman Corr.", fontsize=22) - -ax_multi.tick_params(axis='both', which='minor', labelsize=20) -ax_multi.tick_params(axis='both', which='major', labelsize=20) -ax_cross.tick_params(axis='both', which='minor', labelsize=20, rotation=45) -ax_cross.tick_params(axis='both', which='major', labelsize=20, rotation=45) - -plt.savefig('multilingual_sts.pdf', dpi=300, bbox_inches='tight') diff --git a/plotstables/results_to_scale.py b/plotstables/results_to_scale.py deleted file mode 100644 index 36bf8013..00000000 --- a/plotstables/results_to_scale.py +++ /dev/null @@ -1,274 +0,0 @@ -""" -Creates scaling graphs -Usage: python results_to_scale.py results_folder_path -results_folder_path contains results of multiple models whose folders should be named after them -""" -import json -import os -import sys - -from mteb import MTEB -import numpy as np - -### GLOBAL VARIABLES ### - -TASK_LIST_CLASSIFICATION = [ - "AmazonCounterfactualClassification", - "AmazonPolarityClassification", - "AmazonReviewsClassification", - "Banking77Classification", - "EmotionClassification", - "ImdbClassification", - "MassiveIntentClassification", - "MassiveScenarioClassification", - "MTOPDomainClassification", - "MTOPIntentClassification", - "ToxicConversationsClassification", - "TweetSentimentExtractionClassification", -] - -TASK_LIST_CLUSTERING = [ - "ArxivClusteringP2P", - "ArxivClusteringS2S", - "BiorxivClusteringP2P", - "BiorxivClusteringS2S", - "MedrxivClusteringP2P", - "MedrxivClusteringS2S", - "RedditClustering", - "RedditClusteringP2P", - "StackExchangeClustering", - "StackExchangeClusteringP2P", - "TwentyNewsgroupsClustering", -] - -TASK_LIST_PAIR_CLASSIFICATION = [ - "SprintDuplicateQuestions", - "TwitterSemEval2015", - "TwitterURLCorpus", -] - -TASK_LIST_RERANKING = [ - "AskUbuntuDupQuestions", - "MindSmallReranking", - "SciDocsRR", - "StackOverflowDupQuestions", -] - -TASK_LIST_RETRIEVAL = [ - "ArguAna", - "ClimateFEVER", - "CQADupstackRetrieval", - "DBPedia", - "FEVER", - "FiQA2018", - "HotpotQA", - "MSMARCO", - "NFCorpus", - "NQ", - "QuoraRetrieval", - "SCIDOCS", - "SciFact", - "Touche2020", - "TRECCOVID", -] - -TASK_LIST_STS = [ - "BIOSSES", - "SICK-R", - "STS12", - "STS13", - "STS14", - "STS15", - "STS16", - "STS17", - "STS22", - "STSBenchmark", -] - -# Parameter counts in millions -MODELS = [ -# Doesnt add a lot of value to the figure -# [ -# ("MiniLM-L6", "all-MiniLM-L6-v2", 22.713216), # 22.7 M -# ("MiniLM-L12", "all-MiniLM-L12-v2", 33.360000), # 33.4 M -# ], - [ - ("GTR-Base", "gtr-t5-base", 110), - ("GTR-Large", "gtr-t5-large", 335), - ("GTR-XL", "gtr-t5-xl", 1240), - ("GTR-XXL", "gtr-t5-xxl", 4800), - ], - [ - ("ST5-Base", "sentence-t5-base", 110), - ("ST5-Large", "sentence-t5-large", 335), - ("ST5-XL", "sentence-t5-xl", 1240), - ("ST5-XXL", "sentence-t5-xxl", 4800), - ], - [ - ("SGPT-125M-msmarco", "SGPT-125M-weightedmean-msmarco-specb-bitfit", 125), - ("SGPT-1.3B-msmarco", "SGPT-1.3B-weightedmean-msmarco-specb-bitfit", 1300), - ("SGPT-2.7B-msmarco", "SGPT-2.7B-weightedmean-msmarco-specb-bitfit", 2700), - ("SGPT-5.8B-msmarco", "SGPT-5.8B-weightedmean-msmarco-specb-bitfit", 5800), - ], -] - -# todo: remove -lines = ["blue2", "blue", "purple"] -shades = ["lightblue2", "lightblue", "lightpurple"] -colors = { - "purple": "#7B3FB9", - "lightpurple": "#CBB3E3", - "blue": "#221D91", - "lightblue": "#B6B4DB", - "blue2": "#86D4F1", - "lightblue2": "#AAF2F2", -} - - -MODEL_TO_MARKER = { - "MiniLM": "o", - "GTR": "x", - "ST5": "*", - "SGPT": "v", -} - -# Base from: -# https://coolors.co/palette/ff5400-ff6d00-ff8500-ff9100-ff9e00-00b4d8-0096c7-0077b6-023e8a-03045e -# Yellow tones from: -# https://coolors.co/palette/6ab6dc-49a6d4-2f94c6-277ba5-1f6284-e0b700-ffd20a-ffda33-ffe15c-ffe570 -# Green from: -# https://coolors.co/palette/f94144-f3722c-f8961e-f9844a-f9c74f-90be6d-43aa8b-4d908e-577590-277da1 -MODEL_TO_COLOR = { - "MiniLM": "#BAF19C",#"#017600", # Green - "MPNet": "#F94144",#"#007A7A", # Light Green - "GTR": "#FF5400",#"#221D91", # Blue 1 - "ST5": "#FF9E00",#"#86D4F1", # Blue 2 - "SGPT": "#00B4D8",#"#7B3FB9", # Purple - "SimCSE": "#F9C74F",#"#2070B4", # Blue 3 - "LaBSE": "#F9C74F",#"#2070B4", # Blue 3 - "SPECTER": "#E0B700", # Shade of #2070B4 - "Glove": "#023E8A",#"#9BC7DD", # Light Blue - "LASER2": "#03045E", # Grey -} - - -TASK_LIST_NAMES = [ - ("Classification", TASK_LIST_CLASSIFICATION, ["en", "en-en"], "accuracy"), - ("Clustering", TASK_LIST_CLUSTERING, ["en", "en-en"], "v_measure"), - ("PairClassification", TASK_LIST_PAIR_CLASSIFICATION, ["en", "en-en"], "ap"), - ("Reranking", TASK_LIST_RERANKING, ["en", "en-en"], "map"), - ("Retrieval", TASK_LIST_RETRIEVAL, ["en", "en-en"], "nDCG@10"), - ("STS", TASK_LIST_STS, ["en", "en-en"], "cos. sim. spearman corr."), -] - - -### LOGIC ### - -results_folder = sys.argv[1].strip("/") - -all_results = {} - -for model_name in os.listdir(results_folder): - model_res_folder = os.path.join(results_folder, model_name) - if os.path.isdir(model_res_folder): - all_results.setdefault(model_name, {}) - for file_name in os.listdir(model_res_folder): - if not file_name.endswith(".json"): - print(f"Skipping non-json {file_name}") - continue - with open(os.path.join(model_res_folder, file_name), "r", encoding="utf-8") as f: - results = json.load(f) - all_results[model_name] = {**all_results[model_name], **{file_name.replace(".json", ""): results}} - - -def get_row(dataset, model_name, limit_langs=[], skip_langs=[]): - # CQADupstackRetrieval uses the same metric as its subsets - tasks = MTEB(tasks=[dataset.replace("CQADupstackRetrieval", "CQADupstackTexRetrieval")]).tasks - assert len(tasks) == 1, f"Found {len(tasks)} for {dataset}. Expected 1." - main_metric = tasks[0].description["main_score"] - test_result = all_results.get(model_name, {}). get(dataset, {}) - - # Dev / Val set is used for MSMARCO (See BEIR paper) - if "MSMARCO" in dataset: - test_result = ( - test_result.get("dev") if "dev" in test_result else test_result.get("validation") - ) - else: - test_result = test_result.get("test") - - for lang in tasks[0].description["eval_langs"]: - if (limit_langs and lang not in limit_langs) or (skip_langs and lang in skip_langs): - continue - elif test_result is None: - raise NotImplementedError(f"Got no test result {test_result} for ds: {dataset} model: {model_name}") - - test_result_lang = test_result.get(lang, test_result) - if main_metric == "cosine_spearman": - test_result_lang = test_result_lang.get("cos_sim", {}).get("spearman") - elif main_metric == "ap": - test_result_lang = test_result_lang.get("cos_sim", {}).get("ap") - else: - test_result_lang = test_result_lang.get(main_metric) - - if test_result_lang is None: - raise NotImplementedError - - return test_result_lang - raise NotImplementedError - - -# Create a plot for each task with scaling of the model performances on this task -import matplotlib.pyplot as plt - -fig, axes = plt.subplots(figsize=(16, 20), facecolor='w', edgecolor='k', ncols=2, nrows=3, sharey=False) - - -# Create each boxplot -model_xticks_global = ['0.1B', '1B','2B','4B'] -model_xticks_num_global = [np.log10(100_000_000), np.log10(1_000_000_000), np.log10(2_000_000_000), np.log10(4_000_000_000)] - -for ax, (task_name, task_list, limit_langs, metric) in zip(axes.flatten(), TASK_LIST_NAMES): - for i, model_group in enumerate(MODELS): - model_xticks_num = [np.log10(x[-1] * 1_000_000) for x in model_group] - avg_scores = [] - std_scores = [] - for model in model_group: - model_name = model[0] - try: - model_task_results = [get_row(task, model[1], limit_langs=limit_langs) for task in task_list] - except: - model_task_results = [0.5] - - avg_scores.append(np.mean(np.array(model_task_results)).item()) - std_scores.append(np.std(np.array(model_task_results)).item()) - - ax.plot( - model_xticks_num, - avg_scores, - label=model_name.split("-")[0], - color=MODEL_TO_COLOR.get(model_name.split("-")[0]), - marker=MODEL_TO_MARKER.get(model_name.split("-")[0]) - ) - - # Shade doesn't look good, as std is too big - # ax.fill_between(model_xticks_num, [avg-std for avg, std in zip(avg_scores, std_scores)], [avg+std for avg, std in zip(avg_scores, std_scores)], color=colors.get(shades[i]), alpha=0.5) - - ax.set_ylabel(f"Average Performance ({metric})", fontsize=16) - ax.set_xlabel("Model Parameters (Billions)", fontsize=16) - ax.set_xticks(model_xticks_num_global, model_xticks_global) - ax.set_title(task_name, fontweight="bold", fontsize=20) - ax.grid(alpha=0.5) - -# Create deduplicated Global Legend -handles, labels = plt.gca().get_legend_handles_labels() -by_label = dict(zip(labels, handles)) -fig.legend( - by_label.values(), - by_label.keys(), - loc=(0.35, 0.94), # "upper center", - ncol=len(by_label), - frameon=False, - fontsize=15, -) - -plt.savefig('scale.pdf', dpi=300, bbox_inches='tight') diff --git a/plotstables/results_to_tex.py b/plotstables/results_to_tex.py deleted file mode 100644 index aea339b1..00000000 --- a/plotstables/results_to_tex.py +++ /dev/null @@ -1,308 +0,0 @@ -""" -Usage: python results_to_tex.py results_folder_path -results_folder_path contains results of multiple models whose folders should be named after them -""" -import json -import os -import sys - -from mteb import MTEB -import numpy as np - - -### GLOBAL VARIABLES ### - - -TASK_LIST_BITEXT = [ - "BUCC", - "Tatoeba", -] - -TASK_LIST_CLASSIFICATION = [ - "AmazonCounterfactualClassification", - "AmazonPolarityClassification", - "AmazonReviewsClassification", - "Banking77Classification", - "EmotionClassification", - "ImdbClassification", - "MassiveIntentClassification", - "MassiveScenarioClassification", - "MTOPDomainClassification", - "MTOPIntentClassification", - "ToxicConversationsClassification", - "TweetSentimentExtractionClassification", -] - -TASK_LIST_CLUSTERING = [ - "ArxivClusteringP2P", - "ArxivClusteringS2S", - "BiorxivClusteringP2P", - "BiorxivClusteringS2S", - "MedrxivClusteringP2P", - "MedrxivClusteringS2S", - "RedditClustering", - "RedditClusteringP2P", - "StackExchangeClustering", - "StackExchangeClusteringP2P", - "TwentyNewsgroupsClustering", -] - -TASK_LIST_PAIR_CLASSIFICATION = [ - "SprintDuplicateQuestions", - "TwitterSemEval2015", - "TwitterURLCorpus", -] - -TASK_LIST_RERANKING = [ - "AskUbuntuDupQuestions", - "MindSmallReranking", - "SciDocsRR", - "StackOverflowDupQuestions", -] - -TASK_LIST_RETRIEVAL = [ - "ArguAna", - "ClimateFEVER", - "CQADupstackRetrieval", - "DBPedia", - "FEVER", - "FiQA2018", - "HotpotQA", - "MSMARCO", - "NFCorpus", - "NQ", - "QuoraRetrieval", - "SCIDOCS", - "SciFact", - "Touche2020", - "TRECCOVID", -] - -TASK_LIST_STS = [ - "BIOSSES", - "SICK-R", - "STS12", - "STS13", - "STS14", - "STS15", - "STS16", - "STS17", - "STS22", - "STSBenchmark", -] - -TASK_LIST_SUMMARIZATION = [ - "SummEval", -] - -TASK_LIST = ( - TASK_LIST_BITEXT - + TASK_LIST_CLASSIFICATION - + TASK_LIST_CLUSTERING - + TASK_LIST_PAIR_CLASSIFICATION - + TASK_LIST_RERANKING - + TASK_LIST_RETRIEVAL - + TASK_LIST_STS - + TASK_LIST_SUMMARIZATION -) - -TASK_LIST_EN = ( - TASK_LIST_CLASSIFICATION - + TASK_LIST_CLUSTERING - + TASK_LIST_PAIR_CLASSIFICATION - + TASK_LIST_RERANKING - + TASK_LIST_RETRIEVAL - + TASK_LIST_STS - + TASK_LIST_SUMMARIZATION -) - -TASK_LIST_NAMES = [ - ("Classification", TASK_LIST_CLASSIFICATION, ["en", "en-en"]), - ("Clustering", TASK_LIST_CLUSTERING, ["en", "en-en"]), - ("PairClassification", TASK_LIST_PAIR_CLASSIFICATION, ["en", "en-en"]), - ("Reranking", TASK_LIST_RERANKING, ["en", "en-en"]), - ("Retrieval", TASK_LIST_RETRIEVAL, ["en", "en-en"]), - ("STS", TASK_LIST_STS, ["en", "en-en"]), - ("all", TASK_LIST, ["en", "en-en"]), - ("BitextMining", TASK_LIST_BITEXT, []), -] - -BITEXT_MODELS = MULTILING_MODELS = [ - # "glove.6B.300d", - # "komninos", - "LASER2", - "LaBSE", - "paraphrase-multilingual-MiniLM-L12-v2", - "paraphrase-multilingual-mpnet-base-v2", - "sgpt-bloom-7b1-msmarco", - # "sgpt-bloom-1b3-nli", -] - - -SELFSUPERVISED_MODELS = [ - "glove.6B.300d", - "komninos", - "bert-base-uncased", - "unsup-simcse-bert-base-uncased", -] - -SUPERVISED_MODELS = [ - "sup-simcse-bert-base-uncased", - "msmarco-bert-co-condensor", - "contriever-base-msmarco", - "allenai-specter", - "LaBSE", - "LASER2", - "all-MiniLM-L6-v2", - "all-MiniLM-L12-v2", - "paraphrase-multilingual-MiniLM-L12-v2", - "all-mpnet-base-v2", - "paraphrase-multilingual-mpnet-base-v2", - "text-similarity-ada-001", - "SGPT-125M-weightedmean-nli-bitfit", - "SGPT-5.8B-weightedmean-nli-bitfit", - "SGPT-125M-weightedmean-msmarco-specb-bitfit", - "SGPT-1.3B-weightedmean-msmarco-specb-bitfit", - "SGPT-2.7B-weightedmean-msmarco-specb-bitfit", - "SGPT-5.8B-weightedmean-msmarco-specb-bitfit", - "sgpt-bloom-7b1-msmarco", - "gtr-t5-base", # 110M - "gtr-t5-large", - "gtr-t5-xl", - "gtr-t5-xxl", # 4.8B - "sentence-t5-base", # 110M - "sentence-t5-large", - "sentence-t5-xl", - "sentence-t5-xxl", # 4.8B -] - - -MODEL_TO_NAME = { - "bert-base-uncased": "BERT", - "gtr-t5-base": "GTR-Base", - "gtr-t5-large": "GTR-Large", - "gtr-t5-xl": "GTR-XL", - "gtr-t5-xxl": "GTR-XXL", - "sentence-t5-base": "ST5-Base", - "sentence-t5-large": "ST5-Large", - "sentence-t5-xl": "ST5-XL", - "sentence-t5-xxl": "ST5-XXL", - "SGPT-125M-weightedmean-msmarco-specb-bitfit": "SGPT-125M-msmarco", - "SGPT-1.3B-weightedmean-msmarco-specb-bitfit": "SGPT-1.3B-msmarco", - "SGPT-2.7B-weightedmean-msmarco-specb-bitfit": "SGPT-2.7B-msmarco", - "SGPT-5.8B-weightedmean-msmarco-specb-bitfit": "SGPT-5.8B-msmarco", - "sgpt-bloom-7b1-msmarco": "SGPT-BLOOM-7.1B-msmarco", - "SGPT-125M-weightedmean-nli-bitfit": "SGPT-125M-nli", - "SGPT-5.8B-weightedmean-nli-bitfit": "SGPT-5.8B-nli", - "sup-simcse-bert-base-uncased": "SimCSE-BERT-sup", - "contriever-base-msmarco": "Contriever", - "msmarco-bert-co-condensor": "coCondenser-msmarco", # They write it as coCondenser in the paper - "unsup-simcse-bert-base-uncased": "SimCSE-BERT-unsup", - "glove.6B.300d": "Glove", - "komninos": "Komninos", - "all-MiniLM-L6-v2": "MiniLM-L6", - "all-MiniLM-L12-v2": "MiniLM-L12", - "paraphrase-multilingual-MiniLM-L12-v2": "MiniLM-L12-multilingual", - "all-mpnet-base-v2": "MPNet", - "paraphrase-multilingual-mpnet-base-v2": "MPNet-multilingual", - "allenai-specter": "SPECTER", - "text-similarity-ada-001": "Ada Similarity", - "text-search-ada-query-001": "Ada Search Query" -} - - - -### LOGIC ### - -results_folder = sys.argv[1].strip("/") - -all_results = {} - -for model_name in os.listdir(results_folder): - model_res_folder = os.path.join(results_folder, model_name) - if os.path.isdir(model_res_folder): - all_results.setdefault(model_name, {}) - for file_name in os.listdir(model_res_folder): - if not file_name.endswith(".json"): - print(f"Skipping non-json {file_name}") - continue - with open(os.path.join(model_res_folder, file_name), "r", encoding="utf-8") as f: - results = json.load(f) - all_results[model_name] = {**all_results[model_name], **{file_name.replace(".json", ""): results}} - - -def get_rows(dataset, model_name, limit_langs=[], skip_langs=[]): - rows = [] - # CQADupstackRetrieval uses the same metric as its subsets - tasks = MTEB(tasks=[dataset.replace("CQADupstackRetrieval", "CQADupstackTexRetrieval")]).tasks - assert len(tasks) == 1, f"Found {len(tasks)} for {dataset}. Expected 1." - main_metric = tasks[0].description["main_score"] - test_result = all_results.get(model_name, {}). get(dataset, {}) - - # Dev / Val set is used for MSMARCO (See BEIR paper) - if "MSMARCO" in dataset: - test_result = ( - test_result.get("dev") if "dev" in test_result else test_result.get("validation") - ) - else: - test_result = test_result.get("test") - - for lang in tasks[0].description["eval_langs"]: - if (limit_langs and lang not in limit_langs) or (skip_langs and lang in skip_langs): - continue - elif test_result is None: - rows.append([lang, main_metric, None]) - continue - - test_result_lang = test_result.get(lang, test_result) - if main_metric == "cosine_spearman": - test_result_lang = test_result_lang.get("cos_sim", {}).get("spearman") - elif main_metric == "ap": - test_result_lang = test_result_lang.get("cos_sim", {}).get("ap") - else: - test_result_lang = test_result_lang.get(main_metric) - - if test_result_lang is None: - rows.append([lang, main_metric, None]) - continue - - rows.append([lang, main_metric, test_result_lang]) - return rows - - -def get_table(models, task_list, limit_langs=[], skip_langs=[], name="table"): - TABLE = "Dataset & Language & " + " & ".join([MODEL_TO_NAME.get(model, model) for model in models]) + " \\\\" + "\n" - scores_all = [] - for ds in task_list: - results = [get_rows(dataset=ds, model_name=model, limit_langs=limit_langs, skip_langs=skip_langs) for model in models] - assert all(len(sub) == len(results[0]) for sub in results) - for lang_idx in range(len(results[0])): - scores = [x[lang_idx][-1] for x in results] - scores_all.append(scores) - lang = results[0][lang_idx][0] - one_line = " & ".join([ds, lang] + [str(round(x*100, 2)) if x is not None else "" for x in scores]) - TABLE += one_line + " \\\\" + "\n" - - arr = np.array(scores_all, dtype=np.float32) - # Get an index of columns which has any NaN value - index = np.isnan(arr).any(axis=0) - # Delete columns (models) with any NaN value from 2D NumPy Array - arr = np.delete(arr, index, axis=1) - # Average - scores_avg = list(np.mean(arr, axis=0)) - # Insert empty string for NaN columns - for i, val in enumerate(index): - if val == True: - scores_avg.insert(i, "") - lang = "mix" if not(limit_langs) else limit_langs[0] - TABLE += " & ".join(["Average", lang] + [str(round(x*100, 2)) if x else "" for x in scores_avg]) + " \\\\" + "\n" - - with open(f"{name}.txt", "w") as f: - f.write(TABLE) - - -get_table(SELFSUPERVISED_MODELS + SUPERVISED_MODELS, TASK_LIST_EN, limit_langs=["en", "en-en",], name="all_en") -get_table(BITEXT_MODELS, TASK_LIST_BITEXT, limit_langs=[], name="bitext") -get_table(MULTILING_MODELS, TASK_LIST_CLASSIFICATION, limit_langs=[], skip_langs=["en", "en-en", "en-ext"], name="multilingclf") -get_table(MULTILING_MODELS, TASK_LIST_STS, limit_langs=[], skip_langs=["en", "en-en", "en-ext"], name="multilingsts") - diff --git a/plotstables/scale.pdf b/plotstables/scale.pdf deleted file mode 100644 index 0ce7ea0f..00000000 Binary files a/plotstables/scale.pdf and /dev/null differ diff --git a/plotstables/scale.png b/plotstables/scale.png deleted file mode 100644 index 170dc4dd..00000000 Binary files a/plotstables/scale.png and /dev/null differ diff --git a/plotstables/sim_data.csv b/plotstables/sim_data.csv deleted file mode 100644 index 13f03085..00000000 --- a/plotstables/sim_data.csv +++ /dev/null @@ -1,68 +0,0 @@ -,AmazonCounterfactualClassification,AmazonPolarityClassification,AmazonReviewsClassification,Banking77Classification,EmotionClassification,ImdbClassification,MassiveIntentClassification,MassiveScenarioClassification,MTOPDomainClassification,MTOPIntentClassification,ToxicConversationsClassification,TweetSentimentExtractionClassification,ArxivClusteringP2P,ArxivClusteringS2S,BiorxivClusteringP2P,BiorxivClusteringS2S,MedrxivClusteringP2P,MedrxivClusteringS2S,RedditClustering,RedditClusteringP2P,StackExchangeClustering,StackExchangeClusteringP2P,TwentyNewsgroupsClustering,SprintDuplicateQuestions,TwitterSemEval2015,TwitterURLCorpus,AskUbuntuDupQuestions,MindSmallReranking,SciDocsRR,StackOverflowDupQuestions,ArguAna,ClimateFEVER,CQADupstackAndroidRetrieval,CQADupstackEnglishRetrieval,CQADupstackGamingRetrieval,CQADupstackGisRetrieval,CQADupstackMathematicaRetrieval,CQADupstackPhysicsRetrieval,CQADupstackProgrammersRetrieval,CQADupstackStatsRetrieval,CQADupstackTexRetrieval,CQADupstackUnixRetrieval,CQADupstackWebmastersRetrieval,CQADupstackWordpressRetrieval,DBPedia,FEVER,FiQA2018,HotpotQA,MSMARCO,NFCorpus,NQ,QuoraRetrieval,SCIDOCS,SciFact,Touche2020,TRECCOVID,BIOSSES,SICK-R,STS12,STS13,STS14,STS15,STS16,STS17,STS22,STSBenchmark,SummEval -AmazonCounterfactualClassification,0.9999998807907104,0.9725366830825806,0.8517286777496338,0.901214599609375,0.8986697196960449,0.9143805503845215,0.915182888507843,0.915182888507843,0.9136463403701782,0.9136463403701782,0.9268499612808228,0.9379464387893677,0.9061737060546875,0.9226172566413879,0.8816869854927063,0.9111778140068054,0.8716050982475281,0.8851615786552429,0.9361067414283752,0.9444894790649414,0.9205214381217957,0.8694825768470764,0.927936851978302,0.7448422908782959,0.8848876357078552,0.9181852340698242,0.8771646618843079,0.8432382345199585,0.9121214151382446,0.8809256553649902,0.9206087589263916,0.8687456250190735,0.8753382563591003,0.9054211974143982,0.9050220847129822,0.8575048446655273,0.8769376277923584,0.8788719773292542,0.8834834098815918,0.8732110261917114,0.8720847964286804,0.8778753876686096,0.8800479173660278,0.8742043972015381,0.8960760831832886,0.8687456250190735,0.9194890260696411,0.8983748555183411,0.9003165364265442,0.8930765986442566,0.9119835495948792,0.9182446002960205,0.889691174030304,0.892997145652771,0.9215371608734131,0.8833832144737244,0.8625791668891907,0.8299649357795715,0.9155313968658447,0.9190161824226379,0.9440998435020447,0.9286789298057556,0.9331569671630859,0.908374547958374,0.8873857855796814,0.933322012424469,0.9260546565055847 -AmazonPolarityClassification,0.9725366830825806,1.0000001192092896,0.8430818319320679,0.8914044499397278,0.8869525194168091,0.9380159974098206,0.9221842288970947,0.9221842288970947,0.9159311652183533,0.9159311652183533,0.9314469695091248,0.9369900822639465,0.9092541337013245,0.9261904954910278,0.8834359645843506,0.9139003753662109,0.8740972876548767,0.8870366811752319,0.9363611340522766,0.9457828998565674,0.9190340638160706,0.8677751421928406,0.9307129383087158,0.7418009638786316,0.8866813778877258,0.9222720861434937,0.8745728135108948,0.8573817014694214,0.9169661998748779,0.8784961700439453,0.911296546459198,0.8777851462364197,0.8737571239471436,0.9069114327430725,0.9039636254310608,0.8530020713806152,0.874477207660675,0.8754789233207703,0.8846838474273682,0.870514988899231,0.8698432445526123,0.8746495246887207,0.8826687932014465,0.8705847859382629,0.9033768773078918,0.8777851462364197,0.91923987865448,0.9041053652763367,0.9071121215820312,0.8932070136070251,0.9180147051811218,0.922761857509613,0.8992704749107361,0.8951342701911926,0.9281871914863586,0.8897658586502075,0.8574753999710083,0.8390056490898132,0.915518581867218,0.9148537516593933,0.93932044506073,0.9213941097259521,0.9270878434181213,0.904332160949707,0.8904899954795837,0.9300788640975952,0.924091637134552 -AmazonReviewsClassification,0.8517286777496338,0.8430818319320679,1.0,0.8274011611938477,0.8377322554588318,0.8060175180435181,0.8927789330482483,0.8927789330482483,0.8721326589584351,0.8721326589584351,0.8651610612869263,0.8857579231262207,0.8268718719482422,0.8719925284385681,0.8097570538520813,0.8545474410057068,0.8083184361457825,0.8327248692512512,0.8771207332611084,0.8643344044685364,0.8922589421272278,0.7934136986732483,0.8788571953773499,0.6903976202011108,0.829818844795227,0.8436835408210754,0.8549239039421082,0.7972355484962463,0.8631322979927063,0.8426644802093506,0.8367676734924316,0.8256171345710754,0.7958477735519409,0.8322862386703491,0.8206374049186707,0.7885207533836365,0.7998864650726318,0.8036501407623291,0.8091654181480408,0.7987720966339111,0.8001028299331665,0.8078638315200806,0.8033185601234436,0.7957671284675598,0.857245147228241,0.8256171345710754,0.8700592517852783,0.861984133720398,0.8672728538513184,0.8301711678504944,0.8646405339241028,0.8830270767211914,0.8478741645812988,0.831039309501648,0.8493306040763855,0.8365864753723145,0.797872006893158,0.8078639507293701,0.8909081220626831,0.8852756023406982,0.8971998691558838,0.8883463144302368,0.8896051645278931,0.8645649552345276,0.9054290056228638,0.8723177313804626,0.8509836196899414 -Banking77Classification,0.901214599609375,0.8914044499397278,0.8274011611938477,1.0000001192092896,0.8723608255386353,0.8470839262008667,0.9069743156433105,0.9069743156433105,0.9206961393356323,0.9206961393356323,0.8975388407707214,0.9144454002380371,0.8722931742668152,0.896972119808197,0.8474944233894348,0.8749076724052429,0.8394017219543457,0.8544522523880005,0.9231041669845581,0.925590991973877,0.9119146466255188,0.8638715744018555,0.9062290787696838,0.7831484079360962,0.8524944186210632,0.8785852193832397,0.8910298943519592,0.8121806979179382,0.8894634246826172,0.8714233040809631,0.868624746799469,0.8610327243804932,0.891751766204834,0.8907471895217896,0.9007715582847595,0.8569079041481018,0.8734966516494751,0.866807758808136,0.8690007328987122,0.8641847968101501,0.860200822353363,0.8753891587257385,0.8771096467971802,0.8702175617218018,0.8732821941375732,0.8610327243804932,0.9026527404785156,0.8878933191299438,0.9106489419937134,0.8546255826950073,0.8853597640991211,0.9191234707832336,0.85566246509552,0.8536851406097412,0.8839954733848572,0.8441707491874695,0.8158388733863831,0.8224376440048218,0.9016413688659668,0.8994392156600952,0.9188550710678101,0.903643786907196,0.9121543169021606,0.8826841711997986,0.8817184567451477,0.9049835801124573,0.8948301672935486 -EmotionClassification,0.8986697196960449,0.8869525194168091,0.8377322554588318,0.8723608255386353,0.9999998807907104,0.8484991192817688,0.8875608444213867,0.8875608444213867,0.8829092979431152,0.8829092979431152,0.8891539573669434,0.9219067692756653,0.8317292332649231,0.8686325550079346,0.8193415999412537,0.8483346104621887,0.8102065920829773,0.8282783031463623,0.9020230770111084,0.9169633984565735,0.8777654767036438,0.8238407969474792,0.8720272779464722,0.7159684896469116,0.8473405838012695,0.8711422085762024,0.8389461040496826,0.8027688264846802,0.8528178930282593,0.8284692168235779,0.8480105400085449,0.8250551819801331,0.8178394436836243,0.8577408790588379,0.847434937953949,0.7987120747566223,0.8172357678413391,0.8205375075340271,0.8239988684654236,0.8105478286743164,0.8049406409263611,0.8208318948745728,0.8235769867897034,0.8167813420295715,0.8425841331481934,0.8250551819801331,0.8733782172203064,0.8516587615013123,0.8590372204780579,0.840248167514801,0.8614929914474487,0.8839017748832703,0.8242060542106628,0.8306145071983337,0.8763502836227417,0.8282589912414551,0.7966073751449585,0.8122089505195618,0.8802585005760193,0.8810560703277588,0.9074888229370117,0.8856732845306396,0.9061300754547119,0.8690868020057678,0.8543643951416016,0.8943052887916565,0.8744396567344666 -ImdbClassification,0.9143805503845215,0.9380159974098206,0.8060175180435181,0.8470839262008667,0.8484991192817688,0.9999999403953552,0.8814461827278137,0.8814461827278137,0.8795197010040283,0.8795197010040283,0.8966728448867798,0.8972986340522766,0.8641327023506165,0.8861985206604004,0.8488188982009888,0.8755455613136292,0.8384004235267639,0.8527776598930359,0.8933579325675964,0.9051699042320251,0.8781872391700745,0.8293363451957703,0.8849860429763794,0.6948404908180237,0.8515918254852295,0.8860101699829102,0.8357095718383789,0.8353019952774048,0.877151370048523,0.8337087631225586,0.8854134678840637,0.839382529258728,0.8364291787147522,0.8700346946716309,0.8654309511184692,0.8066450357437134,0.8283482193946838,0.8346224427223206,0.8459446430206299,0.8239307999610901,0.8178578019142151,0.8369230031967163,0.8369060158729553,0.825982391834259,0.8585302829742432,0.839382529258728,0.8759719133377075,0.8769184350967407,0.8686230182647705,0.8685956597328186,0.8820010423660278,0.8861343860626221,0.8516706228256226,0.8627360463142395,0.8969246745109558,0.8567239046096802,0.8268139362335205,0.8363828063011169,0.8844971656799316,0.8787712454795837,0.9043587446212769,0.8874390721321106,0.8882074952125549,0.8814995884895325,0.8508477807044983,0.9050480723381042,0.9031330347061157 -MassiveIntentClassification,0.915182888507843,0.9221842288970947,0.8927789330482483,0.9069743156433105,0.8875608444213867,0.8814461827278137,0.9999998807907104,0.9999998807907104,0.9750701189041138,0.9750701189041138,0.9595458507537842,0.9734818339347839,0.8967283368110657,0.9652600884437561,0.8725947141647339,0.9290119409561157,0.8716907501220703,0.9026644825935364,0.9533762335777283,0.9527404308319092,0.9526662826538086,0.9006844758987427,0.9625997543334961,0.7721297740936279,0.9069065451622009,0.9225729703903198,0.921728789806366,0.8853965401649475,0.9450154900550842,0.9193835258483887,0.9042443037033081,0.9111378788948059,0.8964847922325134,0.9208599328994751,0.9270387291908264,0.8749427199363708,0.8865760564804077,0.8897450566291809,0.8953050374984741,0.8811706304550171,0.8818027973175049,0.8984538316726685,0.897138774394989,0.8913282155990601,0.9272512793540955,0.9111378788948059,0.9463034868240356,0.9470813274383545,0.9404415488243103,0.8946558833122253,0.9522876143455505,0.9682824611663818,0.9068668484687805,0.8946090340614319,0.9345963597297668,0.8986713886260986,0.8533274531364441,0.8902477025985718,0.9739812016487122,0.9600082635879517,0.9729264974594116,0.9575598835945129,0.9471473097801208,0.9507492780685425,0.9367305040359497,0.9577158689498901,0.9361376762390137 -MassiveScenarioClassification,0.915182888507843,0.9221842288970947,0.8927789330482483,0.9069743156433105,0.8875608444213867,0.8814461827278137,0.9999998807907104,0.9999998807907104,0.9750701189041138,0.9750701189041138,0.9595458507537842,0.9734818339347839,0.8967283368110657,0.9652600884437561,0.8725947141647339,0.9290119409561157,0.8716907501220703,0.9026644825935364,0.9533762335777283,0.9527404308319092,0.9526662826538086,0.9006844758987427,0.9625997543334961,0.7721297740936279,0.9069065451622009,0.9225729703903198,0.921728789806366,0.8853965401649475,0.9450154900550842,0.9193835258483887,0.9042443037033081,0.9111378788948059,0.8964847922325134,0.9208599328994751,0.9270387291908264,0.8749427199363708,0.8865760564804077,0.8897450566291809,0.8953050374984741,0.8811706304550171,0.8818027973175049,0.8984538316726685,0.897138774394989,0.8913282155990601,0.9272512793540955,0.9111378788948059,0.9463034868240356,0.9470813274383545,0.9404415488243103,0.8946558833122253,0.9522876143455505,0.9682824611663818,0.9068668484687805,0.8946090340614319,0.9345963597297668,0.8986713886260986,0.8533274531364441,0.8902477025985718,0.9739812016487122,0.9600082635879517,0.9729264974594116,0.9575598835945129,0.9471473097801208,0.9507492780685425,0.9367305040359497,0.9577158689498901,0.9361376762390137 -MTOPDomainClassification,0.9136463403701782,0.9159311652183533,0.8721326589584351,0.9206961393356323,0.8829092979431152,0.8795197010040283,0.9750701189041138,0.9750701189041138,1.000000238418579,1.000000238418579,0.9477394223213196,0.9620302319526672,0.8916090726852417,0.9507513046264648,0.8692935109138489,0.9198922514915466,0.8673161864280701,0.8943023681640625,0.9499332308769226,0.9520930647850037,0.9415560960769653,0.8926861882209778,0.9499171376228333,0.7859740853309631,0.9024993777275085,0.9156029224395752,0.9076282382011414,0.8778815865516663,0.9327757358551025,0.9088919162750244,0.8972400426864624,0.8968261480331421,0.900844395160675,0.917822539806366,0.922167956829071,0.8686424493789673,0.8871181607246399,0.8848662972450256,0.8901574015617371,0.8792017698287964,0.8758128881454468,0.892038881778717,0.8904502391815186,0.8847219944000244,0.9200598001480103,0.8968261480331421,0.9321253299713135,0.9370198249816895,0.9469432234764099,0.8838071823120117,0.9368411898612976,0.956068754196167,0.8929500579833984,0.8861535787582397,0.9225219488143921,0.8865776658058167,0.8508256077766418,0.8823556900024414,0.9523103833198547,0.9431838989257812,0.960503101348877,0.9454217553138733,0.9361295700073242,0.9454723000526428,0.918997585773468,0.9517570734024048,0.932615339756012 -MTOPIntentClassification,0.9136463403701782,0.9159311652183533,0.8721326589584351,0.9206961393356323,0.8829092979431152,0.8795197010040283,0.9750701189041138,0.9750701189041138,1.000000238418579,1.000000238418579,0.9477394223213196,0.9620302319526672,0.8916090726852417,0.9507513046264648,0.8692935109138489,0.9198922514915466,0.8673161864280701,0.8943023681640625,0.9499332308769226,0.9520930647850037,0.9415560960769653,0.8926861882209778,0.9499171376228333,0.7859740853309631,0.9024993777275085,0.9156029224395752,0.9076282382011414,0.8778815865516663,0.9327757358551025,0.9088919162750244,0.8972400426864624,0.8968261480331421,0.900844395160675,0.917822539806366,0.922167956829071,0.8686424493789673,0.8871181607246399,0.8848662972450256,0.8901574015617371,0.8792017698287964,0.8758128881454468,0.892038881778717,0.8904502391815186,0.8847219944000244,0.9200598001480103,0.8968261480331421,0.9321253299713135,0.9370198249816895,0.9469432234764099,0.8838071823120117,0.9368411898612976,0.956068754196167,0.8929500579833984,0.8861535787582397,0.9225219488143921,0.8865776658058167,0.8508256077766418,0.8823556900024414,0.9523103833198547,0.9431838989257812,0.960503101348877,0.9454217553138733,0.9361295700073242,0.9454723000526428,0.918997585773468,0.9517570734024048,0.932615339756012 -ToxicConversationsClassification,0.9268499612808228,0.9314469695091248,0.8651610612869263,0.8975388407707214,0.8891539573669434,0.8966728448867798,0.9595458507537842,0.9595458507537842,0.9477394223213196,0.9477394223213196,1.0,0.9791338443756104,0.8923237323760986,0.9557365775108337,0.8709883689880371,0.9188441038131714,0.871146023273468,0.8937941193580627,0.9509134292602539,0.9578840732574463,0.939154326915741,0.8850978016853333,0.9539644122123718,0.7365511655807495,0.9154583811759949,0.9275434613227844,0.8940021991729736,0.88185054063797,0.9295150637626648,0.8942537307739258,0.9117645025253296,0.8989847898483276,0.8807247281074524,0.9173315763473511,0.9148895144462585,0.8601332306861877,0.8703606724739075,0.8878904581069946,0.8843247294425964,0.8718209862709045,0.8644421100616455,0.8797895312309265,0.8873119354248047,0.8789352178573608,0.9204174280166626,0.8989847898483276,0.9462225437164307,0.9388979077339172,0.9315915107727051,0.8950788378715515,0.9423636198043823,0.9756679534912109,0.8898159265518188,0.8889317512512207,0.9373823404312134,0.8933861255645752,0.8529271483421326,0.8919810056686401,0.9546875357627869,0.9443492889404297,0.968707799911499,0.9534297585487366,0.9462233185768127,0.9519502520561218,0.9268797636032104,0.9642019867897034,0.9413120746612549 -TweetSentimentExtractionClassification,0.9379464387893677,0.9369900822639465,0.8857579231262207,0.9144454002380371,0.9219067692756653,0.8972986340522766,0.9734818339347839,0.9734818339347839,0.9620302319526672,0.9620302319526672,0.9791338443756104,0.9999998807907104,0.8941646814346313,0.9579667448997498,0.8712595701217651,0.9201945066452026,0.8699737787246704,0.89603590965271,0.9627991318702698,0.9685115814208984,0.9449712634086609,0.8842088580131531,0.957473874092102,0.7514926791191101,0.9206107258796692,0.9304682612419128,0.9066123366355896,0.8845974802970886,0.9322013854980469,0.902407705783844,0.9012815952301025,0.9066250324249268,0.8804208636283875,0.9191873669624329,0.9144876003265381,0.8603336215019226,0.8727058172225952,0.8835071921348572,0.8789454698562622,0.8683345913887024,0.8654515743255615,0.8802311420440674,0.8835926055908203,0.8761081099510193,0.9267283082008362,0.9066250324249268,0.9446496963500977,0.9437984228134155,0.9354615807533264,0.8943512439727783,0.9478681683540344,0.9679496884346008,0.8895792961120605,0.8876237869262695,0.9310782551765442,0.8951513767242432,0.8504247069358826,0.8916918635368347,0.9640517830848694,0.952302873134613,0.9752659201622009,0.960588812828064,0.9546923041343689,0.9575697183609009,0.9292759299278259,0.9668652415275574,0.9422594308853149 -ArxivClusteringP2P,0.9061737060546875,0.9092541337013245,0.8268718719482422,0.8722931742668152,0.8317292332649231,0.8641327023506165,0.8967283368110657,0.8967283368110657,0.8916090726852417,0.8916090726852417,0.8923237323760986,0.8941646814346313,1.0000001192092896,0.9296634197235107,0.9502661228179932,0.9453719854354858,0.9198501706123352,0.9335336685180664,0.9047669172286987,0.9180348515510559,0.9226759076118469,0.8873042464256287,0.9214056134223938,0.7285578846931458,0.849682629108429,0.8937832117080688,0.8850733637809753,0.8498433828353882,0.942658007144928,0.8952110409736633,0.9187605381011963,0.8781493902206421,0.8702689409255981,0.9049964547157288,0.8932080864906311,0.8815957903862,0.9128819108009338,0.9258798360824585,0.9027954936027527,0.916979193687439,0.9049314856529236,0.8934173583984375,0.8883233666419983,0.8882037997245789,0.9017259478569031,0.8781493902206421,0.9082144498825073,0.9008392095565796,0.8916330933570862,0.9167720675468445,0.910786509513855,0.9078540205955505,0.9503505229949951,0.9326635599136353,0.9119592308998108,0.9338494539260864,0.9067458510398865,0.8065285086631775,0.8979881405830383,0.912732720375061,0.9206886291503906,0.9100639820098877,0.9006866812705994,0.8714514374732971,0.8883957266807556,0.8937532901763916,0.9103951454162598 -ArxivClusteringS2S,0.9226172566413879,0.9261904954910278,0.8719925284385681,0.896972119808197,0.8686325550079346,0.8861985206604004,0.9652600884437561,0.9652600884437561,0.9507513046264648,0.9507513046264648,0.9557365775108337,0.9579667448997498,0.9296634197235107,1.0000001192092896,0.8961361050605774,0.9565531611442566,0.8940218091011047,0.9300719499588013,0.9504541754722595,0.9541244506835938,0.9593493342399597,0.9132787585258484,0.9759801030158997,0.7547072768211365,0.9124619960784912,0.9184166789054871,0.9116992354393005,0.8834333419799805,0.9691587686538696,0.9220894575119019,0.9125237464904785,0.9121813178062439,0.8931977152824402,0.9257218837738037,0.9272482991218567,0.888041615486145,0.9053133130073547,0.9213353395462036,0.9087139964103699,0.9069965481758118,0.8965097069740295,0.9025301933288574,0.9069535732269287,0.8937448859214783,0.9494740962982178,0.9121813178062439,0.947498619556427,0.956941545009613,0.9396511316299438,0.9095065593719482,0.9639177918434143,0.9652320742607117,0.9295390248298645,0.9176734685897827,0.9311457872390747,0.9181896448135376,0.8817151784896851,0.8841482996940613,0.9600266218185425,0.9491159915924072,0.9618067741394043,0.9499640464782715,0.9319314360618591,0.942611038684845,0.933143138885498,0.949581503868103,0.9360114932060242 -BiorxivClusteringP2P,0.8816869854927063,0.8834359645843506,0.8097570538520813,0.8474944233894348,0.8193415999412537,0.8488188982009888,0.8725947141647339,0.8725947141647339,0.8692935109138489,0.8692935109138489,0.8709883689880371,0.8712595701217651,0.9502661228179932,0.8961361050605774,0.9999998211860657,0.9384621381759644,0.956779956817627,0.9311316013336182,0.8793036341667175,0.8974093794822693,0.8921758532524109,0.8562670350074768,0.8946423530578613,0.7087208032608032,0.8287373781204224,0.8686756491661072,0.8585236072540283,0.8195667266845703,0.9144576191902161,0.8649075031280518,0.9078131318092346,0.8496026396751404,0.8520771265029907,0.8786687850952148,0.8723767995834351,0.8544682264328003,0.8758416771888733,0.8830129504203796,0.8758763670921326,0.8930484056472778,0.8660279512405396,0.8672094345092773,0.8678568005561829,0.8641262650489807,0.8874456882476807,0.8496026396751404,0.8788062334060669,0.8783058524131775,0.8714765310287476,0.9372479319572449,0.8799590468406677,0.8842073082923889,0.9247989654541016,0.9538105130195618,0.8883203864097595,0.942850649356842,0.9347233772277832,0.7822513580322266,0.8725536465644836,0.8873900175094604,0.8985730409622192,0.8854580521583557,0.8810415267944336,0.8527474999427795,0.8675625324249268,0.8723320960998535,0.8993033170700073 -BiorxivClusteringS2S,0.9111778140068054,0.9139003753662109,0.8545474410057068,0.8749076724052429,0.8483346104621887,0.8755455613136292,0.9290119409561157,0.9290119409561157,0.9198922514915466,0.9198922514915466,0.9188441038131714,0.9201945066452026,0.9453719854354858,0.9565531611442566,0.9384621381759644,0.9999999403953552,0.9279764294624329,0.9682707190513611,0.9273788332939148,0.9292674660682678,0.9397923350334167,0.9037898182868958,0.9507690668106079,0.7521305084228516,0.8774093389511108,0.9051491618156433,0.898175835609436,0.8788741827011108,0.9745520353317261,0.9197582602500916,0.9147593975067139,0.8964840173721313,0.8843629360198975,0.9122307896614075,0.9139900803565979,0.8944020867347717,0.913551926612854,0.9188753366470337,0.911128044128418,0.923565149307251,0.8992398977279663,0.9039843082427979,0.9041688442230225,0.8961097002029419,0.9335931539535522,0.8964840173721313,0.9220610857009888,0.9312097430229187,0.9096160531044006,0.9406470060348511,0.9374184012413025,0.930925726890564,0.9657474756240845,0.9617788791656494,0.915169358253479,0.957700252532959,0.9307554960250854,0.844598650932312,0.9283256530761719,0.932389497756958,0.9405863881111145,0.9268279671669006,0.9138600826263428,0.9094533324241638,0.9075512886047363,0.9221967458724976,0.921167254447937 -MedrxivClusteringP2P,0.8716050982475281,0.8740972876548767,0.8083184361457825,0.8394017219543457,0.8102065920829773,0.8384004235267639,0.8716907501220703,0.8716907501220703,0.8673161864280701,0.8673161864280701,0.871146023273468,0.8699737787246704,0.9198501706123352,0.8940218091011047,0.956779956817627,0.9279764294624329,0.9999998807907104,0.9557642936706543,0.8812946081161499,0.8938334584236145,0.894402801990509,0.8485972285270691,0.9003008604049683,0.7113694548606873,0.8280568718910217,0.87420654296875,0.8530390858650208,0.8247097730636597,0.9096218347549438,0.8582576513290405,0.9103243947029114,0.8590527176856995,0.8470963835716248,0.8777068257331848,0.8736319541931152,0.8565325140953064,0.8669453859329224,0.870018720626831,0.8732233643531799,0.9020135402679443,0.8579853177070618,0.8595551252365112,0.8668990731239319,0.8557989597320557,0.8835738897323608,0.8590527176856995,0.8795516490936279,0.8794113397598267,0.8779413104057312,0.9487459659576416,0.8801149129867554,0.8837840557098389,0.9216246008872986,0.9501500725746155,0.888805091381073,0.9613710641860962,0.9220839142799377,0.7837725281715393,0.8748796582221985,0.8852849006652832,0.899524986743927,0.8803415298461914,0.8795533776283264,0.853192925453186,0.8731187582015991,0.8726744651794434,0.9043846130371094 -MedrxivClusteringS2S,0.8851615786552429,0.8870366811752319,0.8327248692512512,0.8544522523880005,0.8282783031463623,0.8527776598930359,0.9026644825935364,0.9026644825935364,0.8943023681640625,0.8943023681640625,0.8937941193580627,0.89603590965271,0.9335336685180664,0.9300719499588013,0.9311316013336182,0.9682707190513611,0.9557642936706543,0.9999999403953552,0.9053035378456116,0.9091455340385437,0.9207715392112732,0.8806784749031067,0.9318292140960693,0.7369350790977478,0.8536560535430908,0.8926279544830322,0.8838703632354736,0.8627454042434692,0.953004002571106,0.8977246284484863,0.9049397110939026,0.884668231010437,0.8621524572372437,0.886515200138092,0.8903034329414368,0.8818444013595581,0.8924626708030701,0.8929807543754578,0.8892922401428223,0.9127675890922546,0.8801870942115784,0.8826228976249695,0.8879578709602356,0.8784561157226562,0.9113901853561401,0.884668231010437,0.9016450047492981,0.9183295369148254,0.8957593441009521,0.9393429756164551,0.9159761667251587,0.9054452776908875,0.9564609527587891,0.9464058876037598,0.896754264831543,0.9670438766479492,0.9164301753044128,0.8197333216667175,0.908024787902832,0.9073441624641418,0.9192919135093689,0.90470290184021,0.8936593532562256,0.8826584219932556,0.895809531211853,0.8965899348258972,0.9074312448501587 -RedditClustering,0.9361067414283752,0.9363611340522766,0.8771207332611084,0.9231041669845581,0.9020230770111084,0.8933579325675964,0.9533762335777283,0.9533762335777283,0.9499332308769226,0.9499332308769226,0.9509134292602539,0.9627991318702698,0.9047669172286987,0.9504541754722595,0.8793036341667175,0.9273788332939148,0.8812946081161499,0.9053035378456116,1.0,0.9636914730072021,0.9537168145179749,0.8936053514480591,0.9547697305679321,0.771812379360199,0.9131519198417664,0.9342699646949768,0.902423083782196,0.8822137117385864,0.935879111289978,0.9017000794410706,0.9085865616798401,0.9140297770500183,0.8902676105499268,0.928565263748169,0.9274815917015076,0.8728464841842651,0.8922913074493408,0.8964251279830933,0.8974623084068298,0.8893818259239197,0.8851460814476013,0.8965474367141724,0.8993308544158936,0.8881715536117554,0.9286068081855774,0.9140297770500183,0.9438943862915039,0.9391739964485168,0.9369815587997437,0.9039009809494019,0.9450920224189758,0.959994375705719,0.9040453433990479,0.8978421688079834,0.9354337453842163,0.89899742603302,0.8582507967948914,0.8789204955101013,0.9507595896720886,0.9454846978187561,0.964044988155365,0.9484242796897888,0.9516837000846863,0.9390854835510254,0.9249169826507568,0.9586203694343567,0.9392504096031189 -RedditClusteringP2P,0.9444894790649414,0.9457828998565674,0.8643344044685364,0.925590991973877,0.9169633984565735,0.9051699042320251,0.9527404308319092,0.9527404308319092,0.9520930647850037,0.9520930647850037,0.9578840732574463,0.9685115814208984,0.9180348515510559,0.9541244506835938,0.8974093794822693,0.9292674660682678,0.8938334584236145,0.9091455340385437,0.9636914730072021,1.0000001192092896,0.9489050507545471,0.9091170430183411,0.9504086375236511,0.7593399882316589,0.9181907773017883,0.9331505298614502,0.9009460210800171,0.8788705468177795,0.9390465617179871,0.902940571308136,0.9276478886604309,0.8984732627868652,0.9011673927307129,0.9325352311134338,0.9371761083602905,0.8817288279533386,0.8999888896942139,0.9109740257263184,0.9104718565940857,0.901867687702179,0.8874367475509644,0.9019376039505005,0.9086974263191223,0.8998266458511353,0.92087322473526,0.8984732627868652,0.9436620473861694,0.931955099105835,0.9319586157798767,0.9122331738471985,0.9413416981697083,0.9608737826347351,0.9078938364982605,0.9097794890403748,0.9485433101654053,0.9055797457695007,0.8730185031890869,0.8813309073448181,0.9435673952102661,0.9429996013641357,0.96527099609375,0.9469503164291382,0.9470174312591553,0.9395745992660522,0.9250169992446899,0.9560426473617554,0.9494283199310303 -StackExchangeClustering,0.9205214381217957,0.9190340638160706,0.8922589421272278,0.9119146466255188,0.8777654767036438,0.8781872391700745,0.9526662826538086,0.9526662826538086,0.9415560960769653,0.9415560960769653,0.939154326915741,0.9449712634086609,0.9226759076118469,0.9593493342399597,0.8921758532524109,0.9397923350334167,0.894402801990509,0.9207715392112732,0.9537168145179749,0.9489050507545471,0.9999999403953552,0.9163016676902771,0.9632638096809387,0.7653152346611023,0.8863519430160522,0.9227038025856018,0.920473575592041,0.8657011389732361,0.9519332051277161,0.9323137402534485,0.9170532822608948,0.9021029472351074,0.9093854427337646,0.9599207639694214,0.9356912970542908,0.9072699546813965,0.9264189600944519,0.9299076795578003,0.9368252158164978,0.9241059422492981,0.9255838990211487,0.9274458289146423,0.9290803074836731,0.9175654649734497,0.9289190769195557,0.9021029472351074,0.9526970386505127,0.9371076226234436,0.934260904788971,0.9154017567634583,0.9463439583778381,0.967335045337677,0.9253087043762207,0.9159756898880005,0.9310891032218933,0.9119280576705933,0.876883327960968,0.8612088561058044,0.9496986269950867,0.9500508308410645,0.9619574546813965,0.9489233493804932,0.9536707401275635,0.9245097041130066,0.9384429454803467,0.9446290731430054,0.9257869124412537 -StackExchangeClusteringP2P,0.8694825768470764,0.8677751421928406,0.7934136986732483,0.8638715744018555,0.8238407969474792,0.8293363451957703,0.9006844758987427,0.9006844758987427,0.8926861882209778,0.8926861882209778,0.8850978016853333,0.8842088580131531,0.8873042464256287,0.9132787585258484,0.8562670350074768,0.9037898182868958,0.8485972285270691,0.8806784749031067,0.8936053514480591,0.9091170430183411,0.9163016676902771,0.9999999403953552,0.9050739407539368,0.7441107630729675,0.8436607122421265,0.8557848334312439,0.8838653564453125,0.8219260573387146,0.9194225668907166,0.918297290802002,0.8716145753860474,0.8466858863830566,0.9168010950088501,0.9102783799171448,0.9504308104515076,0.9322190880775452,0.939926028251648,0.9218308925628662,0.9459747672080994,0.9272336959838867,0.9189807772636414,0.9332571029663086,0.9323667287826538,0.9188860058784485,0.865909218788147,0.8466858863830566,0.9015153050422668,0.8845800161361694,0.8738407492637634,0.865176260471344,0.8900197744369507,0.9089018702507019,0.899509608745575,0.8790194392204285,0.876009464263916,0.8632569313049316,0.8407849073410034,0.8076320886611938,0.8901703953742981,0.8968315720558167,0.9036004543304443,0.8906463384628296,0.8955281376838684,0.8716039061546326,0.8718999028205872,0.8934082984924316,0.87904953956604 -TwentyNewsgroupsClustering,0.927936851978302,0.9307129383087158,0.8788571953773499,0.9062290787696838,0.8720272779464722,0.8849860429763794,0.9625997543334961,0.9625997543334961,0.9499171376228333,0.9499171376228333,0.9539644122123718,0.957473874092102,0.9214056134223938,0.9759801030158997,0.8946423530578613,0.9507690668106079,0.9003008604049683,0.9318292140960693,0.9547697305679321,0.9504086375236511,0.9632638096809387,0.9050739407539368,1.0000001192092896,0.7655467987060547,0.9043189883232117,0.9265701770782471,0.9229085445404053,0.8874714374542236,0.962175726890564,0.9225630164146423,0.9156261682510376,0.9241911172866821,0.8967640995979309,0.9317466616630554,0.9298926591873169,0.888175904750824,0.901106059551239,0.9096159934997559,0.9153035879135132,0.9074497222900391,0.8974940180778503,0.9126835465431213,0.9110020995140076,0.8972079753875732,0.9430718421936035,0.9241911172866821,0.9498501420021057,0.9545242190361023,0.943306565284729,0.9156935811042786,0.9650863409042358,0.9623216986656189,0.9296791553497314,0.9186417460441589,0.9339285492897034,0.9249359369277954,0.8814013004302979,0.879280149936676,0.9567474722862244,0.9498928785324097,0.9652311205863953,0.9521862268447876,0.9399745464324951,0.9386094212532043,0.9347309470176697,0.9504886865615845,0.9389493465423584 -SprintDuplicateQuestions,0.7448422908782959,0.7418009638786316,0.6903976202011108,0.7831484079360962,0.7159684896469116,0.6948404908180237,0.7721297740936279,0.7721297740936279,0.7859740853309631,0.7859740853309631,0.7365511655807495,0.7514926791191101,0.7285578846931458,0.7547072768211365,0.7087208032608032,0.7521305084228516,0.7113694548606873,0.7369350790977478,0.771812379360199,0.7593399882316589,0.7653152346611023,0.7441107630729675,0.7655467987060547,1.0000001192092896,0.7108128070831299,0.7352480292320251,0.7720503807067871,0.6729485392570496,0.7625035047531128,0.7492141723632812,0.7167574167251587,0.7223891615867615,0.7940370440483093,0.7370885610580444,0.7504194378852844,0.7442988157272339,0.7660002708435059,0.7272855639457703,0.7467317581176758,0.7388317584991455,0.7514438033103943,0.7605757713317871,0.7364763021469116,0.7487673163414001,0.7269722819328308,0.7223891615867615,0.7529628872871399,0.7362186312675476,0.7674270272254944,0.7220373749732971,0.7399367094039917,0.7632731795310974,0.740402340888977,0.7349553108215332,0.7392125129699707,0.7177295684814453,0.712036669254303,0.6801100373268127,0.7566519975662231,0.7461093068122864,0.7562764286994934,0.74090975522995,0.7517213821411133,0.7253054976463318,0.7493941187858582,0.7513360977172852,0.7284443974494934 -TwitterSemEval2015,0.8848876357078552,0.8866813778877258,0.829818844795227,0.8524944186210632,0.8473405838012695,0.8515918254852295,0.9069065451622009,0.9069065451622009,0.9024993777275085,0.9024993777275085,0.9154583811759949,0.9206107258796692,0.849682629108429,0.9124619960784912,0.8287373781204224,0.8774093389511108,0.8280568718910217,0.8536560535430908,0.9131519198417664,0.9181907773017883,0.8863519430160522,0.8436607122421265,0.9043189883232117,0.7108128070831299,1.0,0.8765551447868347,0.8490984439849854,0.8362776637077332,0.8876252174377441,0.8500367999076843,0.8637065887451172,0.8542394042015076,0.8377655744552612,0.8568944334983826,0.8744352459907532,0.8095758557319641,0.8217236399650574,0.8345741033554077,0.8340392112731934,0.8295093774795532,0.8197506666183472,0.8345680236816406,0.8336871266365051,0.8268483281135559,0.8812722563743591,0.8542394042015076,0.8879868388175964,0.8985650539398193,0.8808208107948303,0.8472391366958618,0.8976626992225647,0.9045380353927612,0.8541281223297119,0.8465986251831055,0.8941338062286377,0.8519248962402344,0.8185410499572754,0.8878549337387085,0.9094604253768921,0.8995444178581238,0.9149953126907349,0.9032992720603943,0.8763034343719482,0.9204296469688416,0.8792893886566162,0.9244780540466309,0.920987069606781 -TwitterURLCorpus,0.9181852340698242,0.9222720861434937,0.8436835408210754,0.8785852193832397,0.8711422085762024,0.8860101699829102,0.9225729703903198,0.9225729703903198,0.9156029224395752,0.9156029224395752,0.9275434613227844,0.9304682612419128,0.8937832117080688,0.9184166789054871,0.8686756491661072,0.9051491618156433,0.87420654296875,0.8926279544830322,0.9342699646949768,0.9331505298614502,0.9227038025856018,0.8557848334312439,0.9265701770782471,0.7352480292320251,0.8765551447868347,0.9999999403953552,0.8714816570281982,0.8772052526473999,0.9152761697769165,0.8724372982978821,0.9144514799118042,0.8842594623565674,0.8573910593986511,0.9045571088790894,0.8917332291603088,0.8425986766815186,0.8575678467750549,0.8722454309463501,0.8732454776763916,0.8640527129173279,0.8527172803878784,0.8612546324729919,0.87152498960495,0.8667789697647095,0.8917800784111023,0.8842594623565674,0.9199483394622803,0.9096399545669556,0.9038676619529724,0.8938794136047363,0.9211317300796509,0.9303578734397888,0.8930266499519348,0.8879072666168213,0.9263610243797302,0.8916494846343994,0.8485643863677979,0.8542071580886841,0.9259677529335022,0.9181943535804749,0.9424399137496948,0.9249746799468994,0.9261443614959717,0.9087364673614502,0.9015825986862183,0.9283378720283508,0.926887035369873 -AskUbuntuDupQuestions,0.8771646618843079,0.8745728135108948,0.8549239039421082,0.8910298943519592,0.8389461040496826,0.8357095718383789,0.921728789806366,0.921728789806366,0.9076282382011414,0.9076282382011414,0.8940021991729736,0.9066123366355896,0.8850733637809753,0.9116992354393005,0.8585236072540283,0.898175835609436,0.8530390858650208,0.8838703632354736,0.902423083782196,0.9009460210800171,0.920473575592041,0.8838653564453125,0.9229085445404053,0.7720503807067871,0.8490984439849854,0.8714816570281982,0.9999999403953552,0.8271192312240601,0.9121510982513428,0.9237239956855774,0.874069333076477,0.8626706600189209,0.9023654460906982,0.8748583793640137,0.8971635103225708,0.8783930540084839,0.8864251375198364,0.8661698698997498,0.8830022215843201,0.8678272366523743,0.880608856678009,0.93036949634552,0.8850497603416443,0.8874524235725403,0.8765844106674194,0.8626706600189209,0.9054697155952454,0.8964870572090149,0.888830304145813,0.8649072647094727,0.8988544940948486,0.9126819968223572,0.8830411434173584,0.864525556564331,0.8846127390861511,0.8762941360473633,0.8330298662185669,0.8284757733345032,0.914065420627594,0.9060060381889343,0.9224478602409363,0.9142858386039734,0.9095703363418579,0.8892186880111694,0.8974651098251343,0.901841938495636,0.8834519982337952 -MindSmallReranking,0.8432382345199585,0.8573817014694214,0.7972355484962463,0.8121806979179382,0.8027688264846802,0.8353019952774048,0.8853965401649475,0.8853965401649475,0.8778815865516663,0.8778815865516663,0.88185054063797,0.8845974802970886,0.8498433828353882,0.8834333419799805,0.8195667266845703,0.8788741827011108,0.8247097730636597,0.8627454042434692,0.8822137117385864,0.8788705468177795,0.8657011389732361,0.8219260573387146,0.8874714374542236,0.6729485392570496,0.8362776637077332,0.8772052526473999,0.8271192312240601,0.9999998807907104,0.8903162479400635,0.8392180800437927,0.8564772009849548,0.8417803645133972,0.8068230152130127,0.8346021175384521,0.8371538519859314,0.7987034916877747,0.8081290125846863,0.8156949281692505,0.8110246062278748,0.8064138293266296,0.8085086941719055,0.8037967681884766,0.8270236253738403,0.810706615447998,0.8491460084915161,0.8417803645133972,0.8699422478675842,0.8755338788032532,0.8483620285987854,0.8392335772514343,0.8892091512680054,0.8765057325363159,0.8636199235916138,0.8338578343391418,0.8729779720306396,0.8616792559623718,0.802665114402771,0.8343176245689392,0.873137354850769,0.8696905374526978,0.889416515827179,0.875885009765625,0.8529878854751587,0.8793433308601379,0.8625198602676392,0.8832800984382629,0.8852307796478271 -SciDocsRR,0.9121214151382446,0.9169661998748779,0.8631322979927063,0.8894634246826172,0.8528178930282593,0.877151370048523,0.9450154900550842,0.9450154900550842,0.9327757358551025,0.9327757358551025,0.9295150637626648,0.9322013854980469,0.942658007144928,0.9691587686538696,0.9144576191902161,0.9745520353317261,0.9096218347549438,0.953004002571106,0.935879111289978,0.9390465617179871,0.9519332051277161,0.9194225668907166,0.962175726890564,0.7625035047531128,0.8876252174377441,0.9152761697769165,0.9121510982513428,0.8903162479400635,0.9999999403953552,0.9336423277854919,0.9188738465309143,0.8973691463470459,0.8964974880218506,0.9219396710395813,0.9212622046470642,0.9038124680519104,0.9215748310089111,0.9176410436630249,0.9275285005569458,0.9317771792411804,0.9046880602836609,0.9125511646270752,0.9187279343605042,0.907027006149292,0.932131826877594,0.8973691463470459,0.9359960556030273,0.9427277445793152,0.9200493097305298,0.918515145778656,0.94994056224823,0.943608820438385,0.9736051559448242,0.9353674650192261,0.9235967397689819,0.9372914433479309,0.8973731994628906,0.8585872650146484,0.9384756684303284,0.9367377161979675,0.9479785561561584,0.9355614185333252,0.9222978949546814,0.9234777092933655,0.9208407998085022,0.9328756928443909,0.9246395826339722 -StackOverflowDupQuestions,0.8809256553649902,0.8784961700439453,0.8426644802093506,0.8714233040809631,0.8284692168235779,0.8337087631225586,0.9193835258483887,0.9193835258483887,0.9088919162750244,0.9088919162750244,0.8942537307739258,0.902407705783844,0.8952110409736633,0.9220894575119019,0.8649075031280518,0.9197582602500916,0.8582576513290405,0.8977246284484863,0.9017000794410706,0.902940571308136,0.9323137402534485,0.918297290802002,0.9225630164146423,0.7492141723632812,0.8500367999076843,0.8724372982978821,0.9237239956855774,0.8392180800437927,0.9336423277854919,1.0,0.8813484311103821,0.862632155418396,0.8961885571479797,0.8927103877067566,0.9056882858276367,0.9124735593795776,0.9236457943916321,0.8843101859092712,0.9159318804740906,0.9021614193916321,0.9091837406158447,0.9131677150726318,0.9141953587532043,0.9168320894241333,0.8864024877548218,0.862632155418396,0.9124810099601746,0.9039829969406128,0.8921213150024414,0.8695310950279236,0.9073175191879272,0.9127946496009827,0.9117957949638367,0.8792681694030762,0.8867591619491577,0.8783340454101562,0.8480639457702637,0.82960045337677,0.9093490242958069,0.9070543050765991,0.9182902574539185,0.9101777076721191,0.9040267467498779,0.8930013179779053,0.8990249037742615,0.9047304391860962,0.8860877752304077 -ArguAna,0.9206087589263916,0.911296546459198,0.8367676734924316,0.868624746799469,0.8480105400085449,0.8854134678840637,0.9042443037033081,0.9042443037033081,0.8972400426864624,0.8972400426864624,0.9117645025253296,0.9012815952301025,0.9187605381011963,0.9125237464904785,0.9078131318092346,0.9147593975067139,0.9103243947029114,0.9049397110939026,0.9085865616798401,0.9276478886604309,0.9170532822608948,0.8716145753860474,0.9156261682510376,0.7167574167251587,0.8637065887451172,0.9144514799118042,0.874069333076477,0.8564772009849548,0.9188738465309143,0.8813484311103821,1.0,0.868762731552124,0.8723280429840088,0.9121270179748535,0.900233805179596,0.8606018424034119,0.8785495758056641,0.8957704305648804,0.9052194356918335,0.8878010511398315,0.8700098991394043,0.8799136877059937,0.8878820538520813,0.8797154426574707,0.8886457085609436,0.868762731552124,0.920708417892456,0.8948706984519958,0.8937736749649048,0.9279579520225525,0.9106296896934509,0.9145981669425964,0.9124789237976074,0.9248058795928955,0.9558223485946655,0.9113011956214905,0.8957175016403198,0.8236397504806519,0.914850115776062,0.9224849343299866,0.9421572685241699,0.9227895736694336,0.9244217872619629,0.8932615518569946,0.8931587934494019,0.9195448756217957,0.9351081848144531 -ClimateFEVER,0.8687456250190735,0.8777851462364197,0.8256171345710754,0.8610327243804932,0.8250551819801331,0.839382529258728,0.9111378788948059,0.9111378788948059,0.8968261480331421,0.8968261480331421,0.8989847898483276,0.9066250324249268,0.8781493902206421,0.9121813178062439,0.8496026396751404,0.8964840173721313,0.8590527176856995,0.884668231010437,0.9140297770500183,0.8984732627868652,0.9021029472351074,0.8466858863830566,0.9241911172866821,0.7223891615867615,0.8542394042015076,0.8842594623565674,0.8626706600189209,0.8417803645133972,0.8973691463470459,0.862632155418396,0.868762731552124,1.0000001192092896,0.8460091352462769,0.877137303352356,0.8844062089920044,0.8418534994125366,0.8496251106262207,0.8617082834243774,0.8505048155784607,0.8545312881469727,0.8441368937492371,0.8518548607826233,0.8488019108772278,0.8423066735267639,0.9115601778030396,1.0000001192092896,0.8865174651145935,0.9264897108078003,0.9099120497703552,0.8735563158988953,0.9296450614929199,0.9116727113723755,0.8757598400115967,0.872587263584137,0.8834298253059387,0.8787876963615417,0.8280983567237854,0.8355134129524231,0.9138962626457214,0.9049146771430969,0.9181155562400818,0.9026291370391846,0.8828028440475464,0.8914071917533875,0.8870561718940735,0.8974177241325378,0.9090380072593689 -CQADupstackAndroidRetrieval,0.8753382563591003,0.8737571239471436,0.7958477735519409,0.891751766204834,0.8178394436836243,0.8364291787147522,0.8964847922325134,0.8964847922325134,0.900844395160675,0.900844395160675,0.8807247281074524,0.8804208636283875,0.8702689409255981,0.8931977152824402,0.8520771265029907,0.8843629360198975,0.8470963835716248,0.8621524572372437,0.8902676105499268,0.9011673927307129,0.9093854427337646,0.9168010950088501,0.8967640995979309,0.7940370440483093,0.8377655744552612,0.8573910593986511,0.9023654460906982,0.8068230152130127,0.8964974880218506,0.8961885571479797,0.8723280429840088,0.8460091352462769,1.0000001192092896,0.9084342122077942,0.9370324015617371,0.9095420837402344,0.9166147112846375,0.9050965905189514,0.9206304550170898,0.9030284285545349,0.8972444534301758,0.9373556971549988,0.9290414452552795,0.9209067821502686,0.8635401725769043,0.8460091352462769,0.8912644982337952,0.8783244490623474,0.8861086964607239,0.8625217080116272,0.8842442631721497,0.9079585671424866,0.8729583621025085,0.8696499466896057,0.8786553740501404,0.8585020303726196,0.8368518948554993,0.8128756284713745,0.8859356641769409,0.8857338428497314,0.9007682204246521,0.8860815763473511,0.8972189426422119,0.8669255375862122,0.8668273091316223,0.8936954140663147,0.882964015007019 -CQADupstackEnglishRetrieval,0.9054211974143982,0.9069114327430725,0.8322862386703491,0.8907471895217896,0.8577408790588379,0.8700346946716309,0.9208599328994751,0.9208599328994751,0.917822539806366,0.917822539806366,0.9173315763473511,0.9191873669624329,0.9049964547157288,0.9257218837738037,0.8786687850952148,0.9122307896614075,0.8777068257331848,0.886515200138092,0.928565263748169,0.9325352311134338,0.9599207639694214,0.9102783799171448,0.9317466616630554,0.7370885610580444,0.8568944334983826,0.9045571088790894,0.8748583793640137,0.8346021175384521,0.9219396710395813,0.8927103877067566,0.9121270179748535,0.877137303352356,0.9084342122077942,1.0,0.9400819540023804,0.9009470343589783,0.9214543104171753,0.9414180517196655,0.9488763213157654,0.9316865801811218,0.9194899201393127,0.9315387010574341,0.9308363199234009,0.917028546333313,0.9058995842933655,0.877137303352356,0.9196183085441589,0.9052009582519531,0.9050914645195007,0.9017252922058105,0.9156660437583923,0.9440099596977234,0.9012807607650757,0.9038532376289368,0.9157229065895081,0.8888267874717712,0.8651591539382935,0.8234075903892517,0.924311637878418,0.9411693215370178,0.943746030330658,0.9186076521873474,0.9344898462295532,0.8920850157737732,0.8855765461921692,0.9203011989593506,0.9117947220802307 -CQADupstackGamingRetrieval,0.9050220847129822,0.9039636254310608,0.8206374049186707,0.9007715582847595,0.847434937953949,0.8654309511184692,0.9270387291908264,0.9270387291908264,0.922167956829071,0.922167956829071,0.9148895144462585,0.9144876003265381,0.8932080864906311,0.9272482991218567,0.8723767995834351,0.9139900803565979,0.8736319541931152,0.8903034329414368,0.9274815917015076,0.9371761083602905,0.9356912970542908,0.9504308104515076,0.9298926591873169,0.7504194378852844,0.8744352459907532,0.8917332291603088,0.8971635103225708,0.8371538519859314,0.9212622046470642,0.9056882858276367,0.900233805179596,0.8844062089920044,0.9370324015617371,0.9400819540023804,0.9999998807907104,0.9216479063034058,0.9330098628997803,0.9342014789581299,0.9393653869628906,0.9243125319480896,0.9149504899978638,0.940504789352417,0.9380438923835754,0.9254974722862244,0.8909529447555542,0.8844062089920044,0.9199414849281311,0.9049500823020935,0.9040770530700684,0.889488935470581,0.9149694442749023,0.9382474422454834,0.8936588168144226,0.8950048685073853,0.9083070158958435,0.882118821144104,0.858853280544281,0.8333576321601868,0.9167808890342712,0.9232589602470398,0.9353626370429993,0.9158080816268921,0.9224990010261536,0.9030084609985352,0.8916919231414795,0.9242655038833618,0.9156969785690308 -CQADupstackGisRetrieval,0.8575048446655273,0.8530020713806152,0.7885207533836365,0.8569079041481018,0.7987120747566223,0.8066450357437134,0.8749427199363708,0.8749427199363708,0.8686424493789673,0.8686424493789673,0.8601332306861877,0.8603336215019226,0.8815957903862,0.888041615486145,0.8544682264328003,0.8944020867347717,0.8565325140953064,0.8818444013595581,0.8728464841842651,0.8817288279533386,0.9072699546813965,0.9322190880775452,0.888175904750824,0.7442988157272339,0.8095758557319641,0.8425986766815186,0.8783930540084839,0.7987034916877747,0.9038124680519104,0.9124735593795776,0.8606018424034119,0.8418534994125366,0.9095420837402344,0.9009470343589783,0.9216479063034058,0.9999998807907104,0.9448469281196594,0.903142511844635,0.9301380515098572,0.9305801391601562,0.9221766591072083,0.9310668110847473,0.9317100048065186,0.918504536151886,0.8589087724685669,0.8418534994125366,0.8800798058509827,0.8755719661712646,0.8662577867507935,0.854289710521698,0.8728598952293396,0.8888506889343262,0.8968186974525452,0.8719730377197266,0.8571727871894836,0.8625155687332153,0.8343672752380371,0.7701677680015564,0.8710922002792358,0.8752908706665039,0.8871220350265503,0.8685832023620605,0.8740650415420532,0.8476601839065552,0.8595708608627319,0.8642983436584473,0.8621535301208496 -CQADupstackMathematicaRetrieval,0.8769376277923584,0.874477207660675,0.7998864650726318,0.8734966516494751,0.8172357678413391,0.8283482193946838,0.8865760564804077,0.8865760564804077,0.8871181607246399,0.8871181607246399,0.8703606724739075,0.8727058172225952,0.9128819108009338,0.9053133130073547,0.8758416771888733,0.913551926612854,0.8669453859329224,0.8924626708030701,0.8922913074493408,0.8999888896942139,0.9264189600944519,0.939926028251648,0.901106059551239,0.7660002708435059,0.8217236399650574,0.8575678467750549,0.8864251375198364,0.8081290125846863,0.9215748310089111,0.9236457943916321,0.8785495758056641,0.8496251106262207,0.9166147112846375,0.9214543104171753,0.9330098628997803,0.9448469281196594,1.0,0.9277931451797485,0.9406094551086426,0.9559803009033203,0.9574756026268005,0.9464672207832336,0.9315505027770996,0.9326183199882507,0.8738807439804077,0.8496251106262207,0.8993229269981384,0.8777073621749878,0.8827200531959534,0.8768882155418396,0.8850938081741333,0.9020552039146423,0.9163784980773926,0.8997095227241516,0.8748701810836792,0.8762643337249756,0.8645408153533936,0.7892798185348511,0.8838201761245728,0.889945387840271,0.8978254199028015,0.8780002593994141,0.8911046385765076,0.8559651970863342,0.8719625473022461,0.8818624019622803,0.8755237460136414 -CQADupstackPhysicsRetrieval,0.8788719773292542,0.8754789233207703,0.8036501407623291,0.866807758808136,0.8205375075340271,0.8346224427223206,0.8897450566291809,0.8897450566291809,0.8848662972450256,0.8848662972450256,0.8878904581069946,0.8835071921348572,0.9258798360824585,0.9213353395462036,0.8830129504203796,0.9188753366470337,0.870018720626831,0.8929807543754578,0.8964251279830933,0.9109740257263184,0.9299076795578003,0.9218308925628662,0.9096159934997559,0.7272855639457703,0.8345741033554077,0.8722454309463501,0.8661698698997498,0.8156949281692505,0.9176410436630249,0.8843101859092712,0.8957704305648804,0.8617082834243774,0.9050965905189514,0.9414180517196655,0.9342014789581299,0.903142511844635,0.9277931451797485,0.9999999403953552,0.9381371736526489,0.9415716528892517,0.91279536485672,0.9261923432350159,0.9212412238121033,0.9054916501045227,0.8781377077102661,0.8617082834243774,0.8984333276748657,0.8824734091758728,0.8800548315048218,0.8915558457374573,0.8959563970565796,0.922303318977356,0.9024848937988281,0.9060347676277161,0.8913036584854126,0.8834498524665833,0.8713915348052979,0.8070156574249268,0.892284631729126,0.9096480011940002,0.9132447838783264,0.903823971748352,0.899546205997467,0.8698975443840027,0.869685173034668,0.8923594355583191,0.8838998079299927 -CQADupstackProgrammersRetrieval,0.8834834098815918,0.8846838474273682,0.8091654181480408,0.8690007328987122,0.8239988684654236,0.8459446430206299,0.8953050374984741,0.8953050374984741,0.8901574015617371,0.8901574015617371,0.8843247294425964,0.8789454698562622,0.9027954936027527,0.9087139964103699,0.8758763670921326,0.911128044128418,0.8732233643531799,0.8892922401428223,0.8974623084068298,0.9104718565940857,0.9368252158164978,0.9459747672080994,0.9153035879135132,0.7467317581176758,0.8340392112731934,0.8732454776763916,0.8830022215843201,0.8110246062278748,0.9275285005569458,0.9159318804740906,0.9052194356918335,0.8505048155784607,0.9206304550170898,0.9488763213157654,0.9393653869628906,0.9301380515098572,0.9406094551086426,0.9381371736526489,1.0000001192092896,0.9467190504074097,0.9259558320045471,0.9530035257339478,0.9550228714942932,0.9337886571884155,0.8744832277297974,0.8505048155784607,0.9110053181648254,0.8822770714759827,0.8804248571395874,0.8899571895599365,0.8902618885040283,0.9181737303733826,0.9190900325775146,0.9017314910888672,0.8959858417510986,0.8842517137527466,0.8579108119010925,0.8032951951026917,0.8920109868049622,0.9053783416748047,0.9127124547958374,0.89433753490448,0.9096778035163879,0.8691129088401794,0.8748772144317627,0.8964371681213379,0.8863435387611389 -CQADupstackStatsRetrieval,0.8732110261917114,0.870514988899231,0.7987720966339111,0.8641847968101501,0.8105478286743164,0.8239307999610901,0.8811706304550171,0.8811706304550171,0.8792017698287964,0.8792017698287964,0.8718209862709045,0.8683345913887024,0.916979193687439,0.9069965481758118,0.8930484056472778,0.923565149307251,0.9020135402679443,0.9127675890922546,0.8893818259239197,0.901867687702179,0.9241059422492981,0.9272336959838867,0.9074497222900391,0.7388317584991455,0.8295093774795532,0.8640527129173279,0.8678272366523743,0.8064138293266296,0.9317771792411804,0.9021614193916321,0.8878010511398315,0.8545312881469727,0.9030284285545349,0.9316865801811218,0.9243125319480896,0.9305801391601562,0.9559803009033203,0.9415716528892517,0.9467190504074097,1.0000001192092896,0.9336675405502319,0.9365205764770508,0.930739164352417,0.9157169461250305,0.8788878917694092,0.8545312881469727,0.897214412689209,0.8763482570648193,0.8824243545532227,0.895236074924469,0.8826169371604919,0.9051744341850281,0.9343346357345581,0.9186573028564453,0.8812727332115173,0.8962944149971008,0.8794823884963989,0.7893253564834595,0.8797302842140198,0.8905830383300781,0.8992865085601807,0.878086507320404,0.8895502090454102,0.8537505865097046,0.8672860264778137,0.879453182220459,0.8792035579681396 -CQADupstackTexRetrieval,0.8720847964286804,0.8698432445526123,0.8001028299331665,0.860200822353363,0.8049406409263611,0.8178578019142151,0.8818027973175049,0.8818027973175049,0.8758128881454468,0.8758128881454468,0.8644421100616455,0.8654515743255615,0.9049314856529236,0.8965097069740295,0.8660279512405396,0.8992398977279663,0.8579853177070618,0.8801870942115784,0.8851460814476013,0.8874367475509644,0.9255838990211487,0.9189807772636414,0.8974940180778503,0.7514438033103943,0.8197506666183472,0.8527172803878784,0.880608856678009,0.8085086941719055,0.9046880602836609,0.9091837406158447,0.8700098991394043,0.8441368937492371,0.8972444534301758,0.9194899201393127,0.9149504899978638,0.9221766591072083,0.9574756026268005,0.91279536485672,0.9259558320045471,0.9336675405502319,1.0,0.9387102723121643,0.9309241771697998,0.9312326312065125,0.8696514964103699,0.8441368937492371,0.8951022624969482,0.8696152567863464,0.8712650537490845,0.8649015426635742,0.8828864097595215,0.8967395424842834,0.8998434543609619,0.8823222517967224,0.876192569732666,0.874040961265564,0.8481307625770569,0.7818848490715027,0.8804553747177124,0.8856875896453857,0.8912297487258911,0.8708868026733398,0.885320246219635,0.843300998210907,0.8693545460700989,0.8727013468742371,0.8655053973197937 -CQADupstackUnixRetrieval,0.8778753876686096,0.8746495246887207,0.8078638315200806,0.8753891587257385,0.8208318948745728,0.8369230031967163,0.8984538316726685,0.8984538316726685,0.892038881778717,0.892038881778717,0.8797895312309265,0.8802311420440674,0.8934173583984375,0.9025301933288574,0.8672094345092773,0.9039843082427979,0.8595551252365112,0.8826228976249695,0.8965474367141724,0.9019376039505005,0.9274458289146423,0.9332571029663086,0.9126835465431213,0.7605757713317871,0.8345680236816406,0.8612546324729919,0.93036949634552,0.8037967681884766,0.9125511646270752,0.9131677150726318,0.8799136877059937,0.8518548607826233,0.9373556971549988,0.9315387010574341,0.940504789352417,0.9310668110847473,0.9464672207832336,0.9261923432350159,0.9530035257339478,0.9365205764770508,0.9387102723121643,0.9999998211860657,0.9425334334373474,0.9361576437950134,0.8726716041564941,0.8518548607826233,0.9011945724487305,0.8829306364059448,0.8788676857948303,0.8737623691558838,0.8897589445114136,0.9121820330619812,0.8979055881500244,0.8855134844779968,0.8814226984977722,0.8759041428565979,0.8541761040687561,0.8000640273094177,0.8939105272293091,0.8999499082565308,0.9092024564743042,0.8901516199111938,0.9038639068603516,0.8625014424324036,0.8721633553504944,0.8905507326126099,0.873734712600708 -CQADupstackWebmastersRetrieval,0.8800479173660278,0.8826687932014465,0.8033185601234436,0.8771096467971802,0.8235769867897034,0.8369060158729553,0.897138774394989,0.897138774394989,0.8904502391815186,0.8904502391815186,0.8873119354248047,0.8835926055908203,0.8883233666419983,0.9069535732269287,0.8678568005561829,0.9041688442230225,0.8668990731239319,0.8879578709602356,0.8993308544158936,0.9086974263191223,0.9290803074836731,0.9323667287826538,0.9110020995140076,0.7364763021469116,0.8336871266365051,0.87152498960495,0.8850497603416443,0.8270236253738403,0.9187279343605042,0.9141953587532043,0.8878820538520813,0.8488019108772278,0.9290414452552795,0.9308363199234009,0.9380438923835754,0.9317100048065186,0.9315505027770996,0.9212412238121033,0.9550228714942932,0.930739164352417,0.9309241771697998,0.9425334334373474,0.9999998807907104,0.9648351073265076,0.8810744285583496,0.8488019108772278,0.9122025966644287,0.8880552053451538,0.8899712562561035,0.8789516091346741,0.8950729966163635,0.9161275029182434,0.9036228060722351,0.8879486918449402,0.888551652431488,0.8810858726501465,0.8505773544311523,0.7996203303337097,0.8922863602638245,0.89543616771698,0.9074068665504456,0.8887396454811096,0.9075014591217041,0.8670238256454468,0.8819707036018372,0.8923331499099731,0.8873686790466309 -CQADupstackWordpressRetrieval,0.8742043972015381,0.8705847859382629,0.7957671284675598,0.8702175617218018,0.8167813420295715,0.825982391834259,0.8913282155990601,0.8913282155990601,0.8847219944000244,0.8847219944000244,0.8789352178573608,0.8761081099510193,0.8882037997245789,0.8937448859214783,0.8641262650489807,0.8961097002029419,0.8557989597320557,0.8784561157226562,0.8881715536117554,0.8998266458511353,0.9175654649734497,0.9188860058784485,0.8972079753875732,0.7487673163414001,0.8268483281135559,0.8667789697647095,0.8874524235725403,0.810706615447998,0.907027006149292,0.9168320894241333,0.8797154426574707,0.8423066735267639,0.9209067821502686,0.917028546333313,0.9254974722862244,0.918504536151886,0.9326183199882507,0.9054916501045227,0.9337886571884155,0.9157169461250305,0.9312326312065125,0.9361576437950134,0.9648351073265076,1.0000001192092896,0.8684195876121521,0.8423066735267639,0.9004048705101013,0.8746203780174255,0.8731535077095032,0.8685926198959351,0.8860312104225159,0.9065831899642944,0.8904937505722046,0.8772619366645813,0.8826008439064026,0.8709307909011841,0.8430542945861816,0.793935239315033,0.8826149106025696,0.8886222243309021,0.8997173309326172,0.8829312920570374,0.8971396088600159,0.8602017760276794,0.8727341890335083,0.886046290397644,0.8786991834640503 -DBPedia,0.8960760831832886,0.9033768773078918,0.857245147228241,0.8732821941375732,0.8425841331481934,0.8585302829742432,0.9272512793540955,0.9272512793540955,0.9200598001480103,0.9200598001480103,0.9204174280166626,0.9267283082008362,0.9017259478569031,0.9494740962982178,0.8874456882476807,0.9335931539535522,0.8835738897323608,0.9113901853561401,0.9286068081855774,0.92087322473526,0.9289190769195557,0.865909218788147,0.9430718421936035,0.7269722819328308,0.8812722563743591,0.8917800784111023,0.8765844106674194,0.8491460084915161,0.932131826877594,0.8864024877548218,0.8886457085609436,0.9115601778030396,0.8635401725769043,0.9058995842933655,0.8909529447555542,0.8589087724685669,0.8738807439804077,0.8781377077102661,0.8744832277297974,0.8788878917694092,0.8696514964103699,0.8726716041564941,0.8810744285583496,0.8684195876121521,1.0000001192092896,0.9115601778030396,0.9128028154373169,0.9553118348121643,0.9282314777374268,0.9043858051300049,0.9467520117759705,0.9322096705436707,0.908551037311554,0.9075112342834473,0.9047530293464661,0.908862829208374,0.8717136979103088,0.84193354845047,0.9324358105659485,0.9196749925613403,0.9341995120048523,0.9155308604240417,0.9073795676231384,0.9020201563835144,0.9053149819374084,0.9130927324295044,0.9138416051864624 -FEVER,0.8687456250190735,0.8777851462364197,0.8256171345710754,0.8610327243804932,0.8250551819801331,0.839382529258728,0.9111378788948059,0.9111378788948059,0.8968261480331421,0.8968261480331421,0.8989847898483276,0.9066250324249268,0.8781493902206421,0.9121813178062439,0.8496026396751404,0.8964840173721313,0.8590527176856995,0.884668231010437,0.9140297770500183,0.8984732627868652,0.9021029472351074,0.8466858863830566,0.9241911172866821,0.7223891615867615,0.8542394042015076,0.8842594623565674,0.8626706600189209,0.8417803645133972,0.8973691463470459,0.862632155418396,0.868762731552124,1.0000001192092896,0.8460091352462769,0.877137303352356,0.8844062089920044,0.8418534994125366,0.8496251106262207,0.8617082834243774,0.8505048155784607,0.8545312881469727,0.8441368937492371,0.8518548607826233,0.8488019108772278,0.8423066735267639,0.9115601778030396,1.0000001192092896,0.8865174651145935,0.9264897108078003,0.9099120497703552,0.8735563158988953,0.9296450614929199,0.9116727113723755,0.8757598400115967,0.872587263584137,0.8834298253059387,0.8787876963615417,0.8280983567237854,0.8355134129524231,0.9138962626457214,0.9049146771430969,0.9181155562400818,0.9026291370391846,0.8828028440475464,0.8914071917533875,0.8870561718940735,0.8974177241325378,0.9090380072593689 -FiQA2018,0.9194890260696411,0.91923987865448,0.8700592517852783,0.9026527404785156,0.8733782172203064,0.8759719133377075,0.9463034868240356,0.9463034868240356,0.9321253299713135,0.9321253299713135,0.9462225437164307,0.9446496963500977,0.9082144498825073,0.947498619556427,0.8788062334060669,0.9220610857009888,0.8795516490936279,0.9016450047492981,0.9438943862915039,0.9436620473861694,0.9526970386505127,0.9015153050422668,0.9498501420021057,0.7529628872871399,0.8879868388175964,0.9199483394622803,0.9054697155952454,0.8699422478675842,0.9359960556030273,0.9124810099601746,0.920708417892456,0.8865174651145935,0.8912644982337952,0.9196183085441589,0.9199414849281311,0.8800798058509827,0.8993229269981384,0.8984333276748657,0.9110053181648254,0.897214412689209,0.8951022624969482,0.9011945724487305,0.9122025966644287,0.9004048705101013,0.9128028154373169,0.8865174651145935,1.0,0.9253550171852112,0.9273801445960999,0.8983956575393677,0.9334579706192017,0.9525591731071472,0.9080317616462708,0.8984257578849792,0.932041347026825,0.8989304900169373,0.8627818822860718,0.852931559085846,0.9391583204269409,0.9346805214881897,0.9548579454421997,0.9363479018211365,0.9465791583061218,0.9146230816841125,0.9739243984222412,0.9389200806617737,0.9216462969779968 -HotpotQA,0.8983748555183411,0.9041053652763367,0.861984133720398,0.8878933191299438,0.8516587615013123,0.8769184350967407,0.9470813274383545,0.9470813274383545,0.9370198249816895,0.9370198249816895,0.9388979077339172,0.9437984228134155,0.9008392095565796,0.956941545009613,0.8783058524131775,0.9312097430229187,0.8794113397598267,0.9183295369148254,0.9391739964485168,0.931955099105835,0.9371076226234436,0.8845800161361694,0.9545242190361023,0.7362186312675476,0.8985650539398193,0.9096399545669556,0.8964870572090149,0.8755338788032532,0.9427277445793152,0.9039829969406128,0.8948706984519958,0.9264897108078003,0.8783244490623474,0.9052009582519531,0.9049500823020935,0.8755719661712646,0.8777073621749878,0.8824734091758728,0.8822770714759827,0.8763482570648193,0.8696152567863464,0.8829306364059448,0.8880552053451538,0.8746203780174255,0.9553118348121643,0.9264897108078003,0.9253550171852112,0.9999999403953552,0.9338226914405823,0.8949758410453796,0.9652247428894043,0.9462618827819824,0.9095838069915771,0.8932918310165405,0.9121882319450378,0.9082680940628052,0.8506495952606201,0.8844336271286011,0.950919508934021,0.933860719203949,0.9507559537887573,0.9376870393753052,0.9176111817359924,0.9365106821060181,0.9216817021369934,0.9391409754753113,0.9349841475486755 -MSMARCO,0.9003165364265442,0.9071121215820312,0.8672728538513184,0.9106489419937134,0.8590372204780579,0.8686230182647705,0.9404415488243103,0.9404415488243103,0.9469432234764099,0.9469432234764099,0.9315915107727051,0.9354615807533264,0.8916330933570862,0.9396511316299438,0.8714765310287476,0.9096160531044006,0.8779413104057312,0.8957593441009521,0.9369815587997437,0.9319586157798767,0.934260904788971,0.8738407492637634,0.943306565284729,0.7674270272254944,0.8808208107948303,0.9038676619529724,0.888830304145813,0.8483620285987854,0.9200493097305298,0.8921213150024414,0.8937736749649048,0.9099120497703552,0.8861086964607239,0.9050914645195007,0.9040770530700684,0.8662577867507935,0.8827200531959534,0.8800548315048218,0.8804248571395874,0.8824243545532227,0.8712650537490845,0.8788676857948303,0.8899712562561035,0.8731535077095032,0.9282314777374268,0.9099120497703552,0.9273801445960999,0.9338226914405823,0.9999999403953552,0.8933760523796082,0.9218379259109497,0.9464716911315918,0.896026611328125,0.8949978947639465,0.9115307927131653,0.890006422996521,0.8536749482154846,0.8565343618392944,0.9360795021057129,0.923584520816803,0.9442033767700195,0.9270609021186829,0.9261489510536194,0.9158554077148438,0.922019362449646,0.9279426336288452,0.9159611463546753 -NFCorpus,0.8930765986442566,0.8932070136070251,0.8301711678504944,0.8546255826950073,0.840248167514801,0.8685956597328186,0.8946558833122253,0.8946558833122253,0.8838071823120117,0.8838071823120117,0.8950788378715515,0.8943512439727783,0.9167720675468445,0.9095065593719482,0.9372479319572449,0.9406470060348511,0.9487459659576416,0.9393429756164551,0.9039009809494019,0.9122331738471985,0.9154017567634583,0.865176260471344,0.9156935811042786,0.7220373749732971,0.8472391366958618,0.8938794136047363,0.8649072647094727,0.8392335772514343,0.918515145778656,0.8695310950279236,0.9279579520225525,0.8735563158988953,0.8625217080116272,0.9017252922058105,0.889488935470581,0.854289710521698,0.8768882155418396,0.8915558457374573,0.8899571895599365,0.895236074924469,0.8649015426635742,0.8737623691558838,0.8789516091346741,0.8685926198959351,0.9043858051300049,0.8735563158988953,0.8983956575393677,0.8949758410453796,0.8933760523796082,0.9999999403953552,0.9013606905937195,0.9077708125114441,0.9226983189582825,0.9740851521492004,0.9084823131561279,0.9550403952598572,0.9347903728485107,0.8200613260269165,0.9014897346496582,0.9092106819152832,0.921718180179596,0.9010535478591919,0.9073488116264343,0.877545177936554,0.8832513093948364,0.9017101526260376,0.9184467196464539 -NQ,0.9119835495948792,0.9180147051811218,0.8646405339241028,0.8853597640991211,0.8614929914474487,0.8820010423660278,0.9522876143455505,0.9522876143455505,0.9368411898612976,0.9368411898612976,0.9423636198043823,0.9478681683540344,0.910786509513855,0.9639177918434143,0.8799590468406677,0.9374184012413025,0.8801149129867554,0.9159761667251587,0.9450920224189758,0.9413416981697083,0.9463439583778381,0.8900197744369507,0.9650863409042358,0.7399367094039917,0.8976626992225647,0.9211317300796509,0.8988544940948486,0.8892091512680054,0.94994056224823,0.9073175191879272,0.9106296896934509,0.9296450614929199,0.8842442631721497,0.9156660437583923,0.9149694442749023,0.8728598952293396,0.8850938081741333,0.8959563970565796,0.8902618885040283,0.8826169371604919,0.8828864097595215,0.8897589445114136,0.8950729966163635,0.8860312104225159,0.9467520117759705,0.9296450614929199,0.9334579706192017,0.9652247428894043,0.9218379259109497,0.9013606905937195,1.0,0.9495320916175842,0.9153639674186707,0.8995381593704224,0.9279997944831848,0.9119896292686462,0.8595485091209412,0.876109778881073,0.9519914984703064,0.9385321140289307,0.9532462358474731,0.9365876317024231,0.9189303517341614,0.931150496006012,0.9250473976135254,0.9387162327766418,0.9347662329673767 -QuoraRetrieval,0.9182446002960205,0.922761857509613,0.8830270767211914,0.9191234707832336,0.8839017748832703,0.8861343860626221,0.9682824611663818,0.9682824611663818,0.956068754196167,0.956068754196167,0.9756679534912109,0.9679496884346008,0.9078540205955505,0.9652320742607117,0.8842073082923889,0.930925726890564,0.8837840557098389,0.9054452776908875,0.959994375705719,0.9608737826347351,0.967335045337677,0.9089018702507019,0.9623216986656189,0.7632731795310974,0.9045380353927612,0.9303578734397888,0.9126819968223572,0.8765057325363159,0.943608820438385,0.9127946496009827,0.9145981669425964,0.9116727113723755,0.9079585671424866,0.9440099596977234,0.9382474422454834,0.8888506889343262,0.9020552039146423,0.922303318977356,0.9181737303733826,0.9051744341850281,0.8967395424842834,0.9121820330619812,0.9161275029182434,0.9065831899642944,0.9322096705436707,0.9116727113723755,0.9525591731071472,0.9462618827819824,0.9464716911315918,0.9077708125114441,0.9495320916175842,1.0,0.9058340191841125,0.905327320098877,0.9388357400894165,0.9015847444534302,0.8601654171943665,0.8889256119728088,0.9628604054450989,0.9531316161155701,0.9722914695739746,0.9573034644126892,0.9578181505203247,0.9456358551979065,0.9389108419418335,0.9596331715583801,0.9374577403068542 -SCIDOCS,0.889691174030304,0.8992704749107361,0.8478741645812988,0.85566246509552,0.8242060542106628,0.8516706228256226,0.9068668484687805,0.9068668484687805,0.8929500579833984,0.8929500579833984,0.8898159265518188,0.8895792961120605,0.9503505229949951,0.9295390248298645,0.9247989654541016,0.9657474756240845,0.9216246008872986,0.9564609527587891,0.9040453433990479,0.9078938364982605,0.9253087043762207,0.899509608745575,0.9296791553497314,0.740402340888977,0.8541281223297119,0.8930266499519348,0.8830411434173584,0.8636199235916138,0.9736051559448242,0.9117957949638367,0.9124789237976074,0.8757598400115967,0.8729583621025085,0.9012807607650757,0.8936588168144226,0.8968186974525452,0.9163784980773926,0.9024848937988281,0.9190900325775146,0.9343346357345581,0.8998434543609619,0.8979055881500244,0.9036228060722351,0.8904937505722046,0.908551037311554,0.8757598400115967,0.9080317616462708,0.9095838069915771,0.896026611328125,0.9226983189582825,0.9153639674186707,0.9058340191841125,1.0,0.9472094178199768,0.9022283554077148,0.9487838745117188,0.9116275906562805,0.8131711483001709,0.9032444953918457,0.9103025197982788,0.9158501029014587,0.8992156982421875,0.8955920934677124,0.8796117305755615,0.89765465259552,0.8925430774688721,0.9049012660980225 -SciFact,0.892997145652771,0.8951342701911926,0.831039309501648,0.8536851406097412,0.8306145071983337,0.8627360463142395,0.8946090340614319,0.8946090340614319,0.8861535787582397,0.8861535787582397,0.8889317512512207,0.8876237869262695,0.9326635599136353,0.9176734685897827,0.9538105130195618,0.9617788791656494,0.9501500725746155,0.9464058876037598,0.8978421688079834,0.9097794890403748,0.9159756898880005,0.8790194392204285,0.9186417460441589,0.7349553108215332,0.8465986251831055,0.8879072666168213,0.864525556564331,0.8338578343391418,0.9353674650192261,0.8792681694030762,0.9248058795928955,0.872587263584137,0.8696499466896057,0.9038532376289368,0.8950048685073853,0.8719730377197266,0.8997095227241516,0.9060347676277161,0.9017314910888672,0.9186573028564453,0.8823222517967224,0.8855134844779968,0.8879486918449402,0.8772619366645813,0.9075112342834473,0.872587263584137,0.8984257578849792,0.8932918310165405,0.8949978947639465,0.9740851521492004,0.8995381593704224,0.905327320098877,0.9472094178199768,0.9999998807907104,0.9017122983932495,0.9554412961006165,0.968241274356842,0.8046634197235107,0.8989851474761963,0.907640814781189,0.9171600937843323,0.8982743620872498,0.8993921875953674,0.8695809245109558,0.8843533992767334,0.8911031484603882,0.9126039147377014 -Touche2020,0.9215371608734131,0.9281871914863586,0.8493306040763855,0.8839954733848572,0.8763502836227417,0.8969246745109558,0.9345963597297668,0.9345963597297668,0.9225219488143921,0.9225219488143921,0.9373823404312134,0.9310782551765442,0.9119592308998108,0.9311457872390747,0.8883203864097595,0.915169358253479,0.888805091381073,0.896754264831543,0.9354337453842163,0.9485433101654053,0.9310891032218933,0.876009464263916,0.9339285492897034,0.7392125129699707,0.8941338062286377,0.9263610243797302,0.8846127390861511,0.8729779720306396,0.9235967397689819,0.8867591619491577,0.9558223485946655,0.8834298253059387,0.8786553740501404,0.9157229065895081,0.9083070158958435,0.8571727871894836,0.8748701810836792,0.8913036584854126,0.8959858417510986,0.8812727332115173,0.876192569732666,0.8814226984977722,0.888551652431488,0.8826008439064026,0.9047530293464661,0.8834298253059387,0.932041347026825,0.9121882319450378,0.9115307927131653,0.9084823131561279,0.9279997944831848,0.9388357400894165,0.9022283554077148,0.9017122983932495,1.000000238418579,0.9011086225509644,0.8702623248100281,0.8584712743759155,0.9317410588264465,0.9341952204704285,0.9546455144882202,0.9370003938674927,0.9341966509819031,0.9161267876625061,0.9047480821609497,0.9399805665016174,0.9332844614982605 -TRECCOVID,0.8833832144737244,0.8897658586502075,0.8365864753723145,0.8441707491874695,0.8282589912414551,0.8567239046096802,0.8986713886260986,0.8986713886260986,0.8865776658058167,0.8865776658058167,0.8933861255645752,0.8951513767242432,0.9338494539260864,0.9181896448135376,0.942850649356842,0.957700252532959,0.9613710641860962,0.9670438766479492,0.89899742603302,0.9055797457695007,0.9119280576705933,0.8632569313049316,0.9249359369277954,0.7177295684814453,0.8519248962402344,0.8916494846343994,0.8762941360473633,0.8616792559623718,0.9372914433479309,0.8783340454101562,0.9113011956214905,0.8787876963615417,0.8585020303726196,0.8888267874717712,0.882118821144104,0.8625155687332153,0.8762643337249756,0.8834498524665833,0.8842517137527466,0.8962944149971008,0.874040961265564,0.8759041428565979,0.8810858726501465,0.8709307909011841,0.908862829208374,0.8787876963615417,0.8989304900169373,0.9082680940628052,0.890006422996521,0.9550403952598572,0.9119896292686462,0.9015847444534302,0.9487838745117188,0.9554412961006165,0.9011086225509644,1.0000001192092896,0.9231286644935608,0.8148664236068726,0.8992998003959656,0.9077374935150146,0.9175513982772827,0.9003238081932068,0.8937297463417053,0.8788232803344727,0.8935161828994751,0.8917135000228882,0.9123128652572632 -BIOSSES,0.8625791668891907,0.8574753999710083,0.797872006893158,0.8158388733863831,0.7966073751449585,0.8268139362335205,0.8533274531364441,0.8533274531364441,0.8508256077766418,0.8508256077766418,0.8529271483421326,0.8504247069358826,0.9067458510398865,0.8817151784896851,0.9347233772277832,0.9307554960250854,0.9220839142799377,0.9164301753044128,0.8582507967948914,0.8730185031890869,0.876883327960968,0.8407849073410034,0.8814013004302979,0.712036669254303,0.8185410499572754,0.8485643863677979,0.8330298662185669,0.802665114402771,0.8973731994628906,0.8480639457702637,0.8957175016403198,0.8280983567237854,0.8368518948554993,0.8651591539382935,0.858853280544281,0.8343672752380371,0.8645408153533936,0.8713915348052979,0.8579108119010925,0.8794823884963989,0.8481307625770569,0.8541761040687561,0.8505773544311523,0.8430542945861816,0.8717136979103088,0.8280983567237854,0.8627818822860718,0.8506495952606201,0.8536749482154846,0.9347903728485107,0.8595485091209412,0.8601654171943665,0.9116275906562805,0.968241274356842,0.8702623248100281,0.9231286644935608,1.0000001192092896,0.7632718682289124,0.8584350943565369,0.8718514442443848,0.8818195462226868,0.8666396141052246,0.8593403100967407,0.8286240100860596,0.848635196685791,0.8512697219848633,0.8796348571777344 -SICK-R,0.8299649357795715,0.8390056490898132,0.8078639507293701,0.8224376440048218,0.8122089505195618,0.8363828063011169,0.8902477025985718,0.8902477025985718,0.8823556900024414,0.8823556900024414,0.8919810056686401,0.8916918635368347,0.8065285086631775,0.8841482996940613,0.7822513580322266,0.844598650932312,0.7837725281715393,0.8197333216667175,0.8789204955101013,0.8813309073448181,0.8612088561058044,0.8076320886611938,0.879280149936676,0.6801100373268127,0.8878549337387085,0.8542071580886841,0.8284757733345032,0.8343176245689392,0.8585872650146484,0.82960045337677,0.8236397504806519,0.8355134129524231,0.8128756284713745,0.8234075903892517,0.8333576321601868,0.7701677680015564,0.7892798185348511,0.8070156574249268,0.8032951951026917,0.7893253564834595,0.7818848490715027,0.8000640273094177,0.7996203303337097,0.793935239315033,0.84193354845047,0.8355134129524231,0.852931559085846,0.8844336271286011,0.8565343618392944,0.8200613260269165,0.876109778881073,0.8889256119728088,0.8131711483001709,0.8046634197235107,0.8584712743759155,0.8148664236068726,0.7632718682289124,1.0,0.9038611650466919,0.8643434643745422,0.8853167295455933,0.885067880153656,0.8466845154762268,0.9286790490150452,0.8497613668441772,0.9365238547325134,0.8939717411994934 -STS12,0.9155313968658447,0.915518581867218,0.8909081220626831,0.9016413688659668,0.8802585005760193,0.8844971656799316,0.9739812016487122,0.9739812016487122,0.9523103833198547,0.9523103833198547,0.9546875357627869,0.9640517830848694,0.8979881405830383,0.9600266218185425,0.8725536465644836,0.9283256530761719,0.8748796582221985,0.908024787902832,0.9507595896720886,0.9435673952102661,0.9496986269950867,0.8901703953742981,0.9567474722862244,0.7566519975662231,0.9094604253768921,0.9259677529335022,0.914065420627594,0.873137354850769,0.9384756684303284,0.9093490242958069,0.914850115776062,0.9138962626457214,0.8859356641769409,0.924311637878418,0.9167808890342712,0.8710922002792358,0.8838201761245728,0.892284631729126,0.8920109868049622,0.8797302842140198,0.8804553747177124,0.8939105272293091,0.8922863602638245,0.8826149106025696,0.9324358105659485,0.9138962626457214,0.9391583204269409,0.950919508934021,0.9360795021057129,0.9014897346496582,0.9519914984703064,0.9628604054450989,0.9032444953918457,0.8989851474761963,0.9317410588264465,0.8992998003959656,0.8584350943565369,0.9038611650466919,0.9999998807907104,0.9665259122848511,0.9781090617179871,0.9599546194076538,0.949619710445404,0.9488744735717773,0.929383397102356,0.9622442126274109,0.9399190545082092 -STS13,0.9190161824226379,0.9148537516593933,0.8852756023406982,0.8994392156600952,0.8810560703277588,0.8787712454795837,0.9600082635879517,0.9600082635879517,0.9431838989257812,0.9431838989257812,0.9443492889404297,0.952302873134613,0.912732720375061,0.9491159915924072,0.8873900175094604,0.932389497756958,0.8852849006652832,0.9073441624641418,0.9454846978187561,0.9429996013641357,0.9500508308410645,0.8968315720558167,0.9498928785324097,0.7461093068122864,0.8995444178581238,0.9181943535804749,0.9060060381889343,0.8696905374526978,0.9367377161979675,0.9070543050765991,0.9224849343299866,0.9049146771430969,0.8857338428497314,0.9411693215370178,0.9232589602470398,0.8752908706665039,0.889945387840271,0.9096480011940002,0.9053783416748047,0.8905830383300781,0.8856875896453857,0.8999499082565308,0.89543616771698,0.8886222243309021,0.9196749925613403,0.9049146771430969,0.9346805214881897,0.933860719203949,0.923584520816803,0.9092106819152832,0.9385321140289307,0.9531316161155701,0.9103025197982788,0.907640814781189,0.9341952204704285,0.9077374935150146,0.8718514442443848,0.8643434643745422,0.9665259122848511,1.0000001192092896,0.9773168563842773,0.9567533135414124,0.944054901599884,0.934114933013916,0.9183123111724854,0.9466428756713867,0.9373435378074646 -STS14,0.9440998435020447,0.93932044506073,0.8971998691558838,0.9188550710678101,0.9074888229370117,0.9043587446212769,0.9729264974594116,0.9729264974594116,0.960503101348877,0.960503101348877,0.968707799911499,0.9752659201622009,0.9206886291503906,0.9618067741394043,0.8985730409622192,0.9405863881111145,0.899524986743927,0.9192919135093689,0.964044988155365,0.96527099609375,0.9619574546813965,0.9036004543304443,0.9652311205863953,0.7562764286994934,0.9149953126907349,0.9424399137496948,0.9224478602409363,0.889416515827179,0.9479785561561584,0.9182902574539185,0.9421572685241699,0.9181155562400818,0.9007682204246521,0.943746030330658,0.9353626370429993,0.8871220350265503,0.8978254199028015,0.9132447838783264,0.9127124547958374,0.8992865085601807,0.8912297487258911,0.9092024564743042,0.9074068665504456,0.8997173309326172,0.9341995120048523,0.9181155562400818,0.9548579454421997,0.9507559537887573,0.9442033767700195,0.921718180179596,0.9532462358474731,0.9722914695739746,0.9158501029014587,0.9171600937843323,0.9546455144882202,0.9175513982772827,0.8818195462226868,0.8853167295455933,0.9781090617179871,0.9773168563842773,0.9999998807907104,0.9775272607803345,0.9703587293624878,0.9547243118286133,0.9364654421806335,0.9683411121368408,0.9549727439880371 -STS15,0.9286789298057556,0.9213941097259521,0.8883463144302368,0.903643786907196,0.8856732845306396,0.8874390721321106,0.9575598835945129,0.9575598835945129,0.9454217553138733,0.9454217553138733,0.9534297585487366,0.960588812828064,0.9100639820098877,0.9499640464782715,0.8854580521583557,0.9268279671669006,0.8803415298461914,0.90470290184021,0.9484242796897888,0.9469503164291382,0.9489233493804932,0.8906463384628296,0.9521862268447876,0.74090975522995,0.9032992720603943,0.9249746799468994,0.9142858386039734,0.875885009765625,0.9355614185333252,0.9101777076721191,0.9227895736694336,0.9026291370391846,0.8860815763473511,0.9186076521873474,0.9158080816268921,0.8685832023620605,0.8780002593994141,0.903823971748352,0.89433753490448,0.878086507320404,0.8708868026733398,0.8901516199111938,0.8887396454811096,0.8829312920570374,0.9155308604240417,0.9026291370391846,0.9363479018211365,0.9376870393753052,0.9270609021186829,0.9010535478591919,0.9365876317024231,0.9573034644126892,0.8992156982421875,0.8982743620872498,0.9370003938674927,0.9003238081932068,0.8666396141052246,0.885067880153656,0.9599546194076538,0.9567533135414124,0.9775272607803345,1.0000001192092896,0.9542839527130127,0.9541418552398682,0.9206722378730774,0.9615832567214966,0.9395099878311157 -STS16,0.9331569671630859,0.9270878434181213,0.8896051645278931,0.9121543169021606,0.9061300754547119,0.8882074952125549,0.9471473097801208,0.9471473097801208,0.9361295700073242,0.9361295700073242,0.9462233185768127,0.9546923041343689,0.9006866812705994,0.9319314360618591,0.8810415267944336,0.9138600826263428,0.8795533776283264,0.8936593532562256,0.9516837000846863,0.9470174312591553,0.9536707401275635,0.8955281376838684,0.9399745464324951,0.7517213821411133,0.8763034343719482,0.9261443614959717,0.9095703363418579,0.8529878854751587,0.9222978949546814,0.9040267467498779,0.9244217872619629,0.8828028440475464,0.8972189426422119,0.9344898462295532,0.9224990010261536,0.8740650415420532,0.8911046385765076,0.899546205997467,0.9096778035163879,0.8895502090454102,0.885320246219635,0.9038639068603516,0.9075014591217041,0.8971396088600159,0.9073795676231384,0.8828028440475464,0.9465791583061218,0.9176111817359924,0.9261489510536194,0.9073488116264343,0.9189303517341614,0.9578181505203247,0.8955920934677124,0.8993921875953674,0.9341966509819031,0.8937297463417053,0.8593403100967407,0.8466845154762268,0.949619710445404,0.944054901599884,0.9703587293624878,0.9542839527130127,0.9999997019767761,0.9166147112846375,0.9188820123672485,0.9595922827720642,0.9186667799949646 -STS17,0.908374547958374,0.904332160949707,0.8645649552345276,0.8826841711997986,0.8690868020057678,0.8814995884895325,0.9507492780685425,0.9507492780685425,0.9454723000526428,0.9454723000526428,0.9519502520561218,0.9575697183609009,0.8714514374732971,0.942611038684845,0.8527474999427795,0.9094533324241638,0.853192925453186,0.8826584219932556,0.9390854835510254,0.9395745992660522,0.9245097041130066,0.8716039061546326,0.9386094212532043,0.7253054976463318,0.9204296469688416,0.9087364673614502,0.8892186880111694,0.8793433308601379,0.9234777092933655,0.8930013179779053,0.8932615518569946,0.8914071917533875,0.8669255375862122,0.8920850157737732,0.9030084609985352,0.8476601839065552,0.8559651970863342,0.8698975443840027,0.8691129088401794,0.8537505865097046,0.843300998210907,0.8625014424324036,0.8670238256454468,0.8602017760276794,0.9020201563835144,0.8914071917533875,0.9146230816841125,0.9365106821060181,0.9158554077148438,0.877545177936554,0.931150496006012,0.9456358551979065,0.8796117305755615,0.8695809245109558,0.9161267876625061,0.8788232803344727,0.8286240100860596,0.9286790490150452,0.9488744735717773,0.934114933013916,0.9547243118286133,0.9541418552398682,0.9166147112846375,0.9999997615814209,0.9029736518859863,0.9728071689605713,0.9457882046699524 -STS22,0.8873857855796814,0.8904899954795837,0.9054290056228638,0.8817184567451477,0.8543643951416016,0.8508477807044983,0.9367305040359497,0.9367305040359497,0.918997585773468,0.918997585773468,0.9268797636032104,0.9292759299278259,0.8883957266807556,0.933143138885498,0.8675625324249268,0.9075512886047363,0.8731187582015991,0.895809531211853,0.9249169826507568,0.9250169992446899,0.9384429454803467,0.8718999028205872,0.9347309470176697,0.7493941187858582,0.8792893886566162,0.9015825986862183,0.8974651098251343,0.8625198602676392,0.9208407998085022,0.8990249037742615,0.8931587934494019,0.8870561718940735,0.8668273091316223,0.8855765461921692,0.8916919231414795,0.8595708608627319,0.8719625473022461,0.869685173034668,0.8748772144317627,0.8672860264778137,0.8693545460700989,0.8721633553504944,0.8819707036018372,0.8727341890335083,0.9053149819374084,0.8870561718940735,0.9739243984222412,0.9216817021369934,0.922019362449646,0.8832513093948364,0.9250473976135254,0.9389108419418335,0.89765465259552,0.8843533992767334,0.9047480821609497,0.8935161828994751,0.848635196685791,0.8497613668441772,0.929383397102356,0.9183123111724854,0.9364654421806335,0.9206722378730774,0.9188820123672485,0.9029736518859863,1.0,0.9147948026657104,0.9118971228599548 -STSBenchmark,0.933322012424469,0.9300788640975952,0.8723177313804626,0.9049835801124573,0.8943052887916565,0.9050480723381042,0.9577158689498901,0.9577158689498901,0.9517570734024048,0.9517570734024048,0.9642019867897034,0.9668652415275574,0.8937532901763916,0.949581503868103,0.8723320960998535,0.9221967458724976,0.8726744651794434,0.8965899348258972,0.9586203694343567,0.9560426473617554,0.9446290731430054,0.8934082984924316,0.9504886865615845,0.7513360977172852,0.9244780540466309,0.9283378720283508,0.901841938495636,0.8832800984382629,0.9328756928443909,0.9047304391860962,0.9195448756217957,0.8974177241325378,0.8936954140663147,0.9203011989593506,0.9242655038833618,0.8642983436584473,0.8818624019622803,0.8923594355583191,0.8964371681213379,0.879453182220459,0.8727013468742371,0.8905507326126099,0.8923331499099731,0.886046290397644,0.9130927324295044,0.8974177241325378,0.9389200806617737,0.9391409754753113,0.9279426336288452,0.9017101526260376,0.9387162327766418,0.9596331715583801,0.8925430774688721,0.8911031484603882,0.9399805665016174,0.8917135000228882,0.8512697219848633,0.9365238547325134,0.9622442126274109,0.9466428756713867,0.9683411121368408,0.9615832567214966,0.9595922827720642,0.9728071689605713,0.9147948026657104,0.9999998807907104,0.9490109086036682 -SummEval,0.9260546565055847,0.924091637134552,0.8509836196899414,0.8948301672935486,0.8744396567344666,0.9031330347061157,0.9361376762390137,0.9361376762390137,0.932615339756012,0.932615339756012,0.9413120746612549,0.9422594308853149,0.9103951454162598,0.9360114932060242,0.8993033170700073,0.921167254447937,0.9043846130371094,0.9074312448501587,0.9392504096031189,0.9494283199310303,0.9257869124412537,0.87904953956604,0.9389493465423584,0.7284443974494934,0.920987069606781,0.926887035369873,0.8834519982337952,0.8852307796478271,0.9246395826339722,0.8860877752304077,0.9351081848144531,0.9090380072593689,0.882964015007019,0.9117947220802307,0.9156969785690308,0.8621535301208496,0.8755237460136414,0.8838998079299927,0.8863435387611389,0.8792035579681396,0.8655053973197937,0.873734712600708,0.8873686790466309,0.8786991834640503,0.9138416051864624,0.9090380072593689,0.9216462969779968,0.9349841475486755,0.9159611463546753,0.9184467196464539,0.9347662329673767,0.9374577403068542,0.9049012660980225,0.9126039147377014,0.9332844614982605,0.9123128652572632,0.8796348571777344,0.8939717411994934,0.9399190545082092,0.9373435378074646,0.9549727439880371,0.9395099878311157,0.9186667799949646,0.9457882046699524,0.9118971228599548,0.9490109086036682,1.0 diff --git a/plotstables/thumbnail.png b/plotstables/thumbnail.png deleted file mode 100644 index 27bc78c8..00000000 Binary files a/plotstables/thumbnail.png and /dev/null differ diff --git a/plotstables/thumbnail_v1.drawio b/plotstables/thumbnail_v1.drawio deleted file mode 100644 index b025fbca..00000000 --- a/plotstables/thumbnail_v1.drawio +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/results_to_csv.bash b/results_to_csv.bash deleted file mode 100644 index 17ac0355..00000000 --- a/results_to_csv.bash +++ /dev/null @@ -1,31 +0,0 @@ - -results=( -LASER2 -SGPT-125M-weightedmean-msmarco-specb-bitfit -SGPT-125M-weightedmean-msmarco-specb-bitfit-doc -SGPT-125M-weightedmean-msmarco-specb-bitfit-que -SGPT-125M-weightedmean-nli-bitfit -SGPT-5.8B-weightedmean-msmarco-specb-bitfit -SGPT-5.8B-weightedmean-nli-bitfit -all-MiniLM-L6-v2 -all-mpnet-base-v2 -bert-base-uncased -contriever-base-msmarco -glove.6B.300d -gtr-t5-base -gtr-t5-xxl -komninos -msmarco-bert-co-condensor -sentence-t5-base -sentence-t5-xxl -sgpt-bloom-1b3-nli -sgpt-bloom-7b1-msmarco -sup-simcse-bert-base-uncased -unsup-simcse-bert-base-uncased -) - -for i in "${results[@]}" -do - echo "$i" - python results_to_csv.py results/$i -done diff --git a/results_to_csv.py b/results_to_csv.py deleted file mode 100644 index 993dbbc6..00000000 --- a/results_to_csv.py +++ /dev/null @@ -1,212 +0,0 @@ -""" -Usage: python results_to_csv.py results_folder_path -Make sure the final directory results_folder_path is the name of your model -""" -import csv -import json -import os -import sys - -from mteb import MTEB - -TASK_LIST_CLASSIFICATION = [ - "AmazonCounterfactualClassification", - "AmazonPolarityClassification", - "AmazonReviewsClassification", - "Banking77Classification", - "EmotionClassification", - "ImdbClassification", - "MassiveIntentClassification", - "MassiveScenarioClassification", - "MTOPDomainClassification", - "MTOPIntentClassification", - "ToxicConversationsClassification", - "TweetSentimentExtractionClassification", -] - -TASK_LIST_CLUSTERING = [ - "ArxivClusteringP2P", - "ArxivClusteringS2S", - "BiorxivClusteringP2P", - "BiorxivClusteringS2S", - "MedrxivClusteringP2P", - "MedrxivClusteringS2S", - "RedditClustering", - "RedditClusteringP2P", - "StackExchangeClustering", - "StackExchangeClusteringP2P", - "TwentyNewsgroupsClustering", -] - -TASK_LIST_PAIR_CLASSIFICATION = [ - "SprintDuplicateQuestions", - "TwitterSemEval2015", - "TwitterURLCorpus", -] - -TASK_LIST_RERANKING = [ - "AskUbuntuDupQuestions", - "MindSmallReranking", - "SciDocsRR", - "StackOverflowDupQuestions", -] - -TASK_LIST_RETRIEVAL = [ - "ArguAna", - "ClimateFEVER", - "CQADupstackRetrieval", - "DBPedia", - "FEVER", - "FiQA2018", - "HotpotQA", - "MSMARCO", - "NFCorpus", - "NQ", - "QuoraRetrieval", - "SCIDOCS", - "SciFact", - "Touche2020", - "TRECCOVID", -] - -TASK_LIST_STS = [ - "BIOSSES", - "SICK-R", - "STS12", - "STS13", - "STS14", - "STS15", - "STS16", - "STS17", - "STS22", - "STSBenchmark", -] - -TASK_LIST_SUMMARIZATION = [ - "SummEval", -] - -TASK_LIST_BITEXTMINING = [ - "BUCC", - "Tatoeba", -] - -TASK_LIST = ( - TASK_LIST_BITEXTMINING - + TASK_LIST_CLASSIFICATION - + TASK_LIST_CLUSTERING - + TASK_LIST_PAIR_CLASSIFICATION - + TASK_LIST_RERANKING - + TASK_LIST_RETRIEVAL - + TASK_LIST_STS - + TASK_LIST_SUMMARIZATION -) - -TASK_LIST_NAMES = [ - ("Classification", TASK_LIST_CLASSIFICATION, ["en", "en-en"]), - ("Clustering", TASK_LIST_CLUSTERING, ["en", "en-en"]), - ("PairClassification", TASK_LIST_PAIR_CLASSIFICATION, ["en", "en-en"]), - ("Reranking", TASK_LIST_RERANKING, ["en", "en-en"]), - ("Retrieval", TASK_LIST_RETRIEVAL, ["en", "en-en"]), - ("STS", TASK_LIST_STS, ["en", "en-en"]), - ("all", TASK_LIST, ["en", "en-en"]), - ("BitextMining", TASK_LIST_BITEXTMINING, []), -] - -results_folder = sys.argv[1] -results_folder = results_folder.strip("/") -model_name = results_folder.split("/")[-1] -print(f"Using model name {model_name}") - -all_results = {} - -for file_name in os.listdir(results_folder): - if not file_name.endswith(".json"): - print(f"Skipping non-json {file_name}") - continue - with open(os.path.join(results_folder, file_name), "r", encoding="utf-8") as f: - results = json.load(f) - all_results = {**all_results, **{file_name.replace(".json", ""): results}} - -csv_file = f"{results_folder}_results.csv" -print(f"Converting {results_folder} to {csv_file}") - -NOT_FOUND = [] - - -def get_rows(task, dataset, limit_langs=[]): - rows = [] - # CQADupstackRetrieval uses the same metric as its subsets - tasks = MTEB(tasks=[dataset.replace("CQADupstackRetrieval", "CQADupstackTexRetrieval")]).tasks - assert len(tasks) == 1, f"Found {len(tasks)} for {dataset}. Expected 1." - main_metric = tasks[0].description["main_score"] - test_result = all_results.get(dataset, {}) - - # Dev / Val set is used for MSMARCO (See BEIR paper) - if "MSMARCO" in dataset: - test_result = ( - test_result.get("dev") if "dev" in test_result else test_result.get("validation") - ) - else: - test_result = test_result.get("test") - if test_result is None: - print(f"{dataset} - test set not found") - NOT_FOUND.append(dataset) - return [[model_name, task, dataset, "", main_metric, ""]] - - for lang in tasks[0].description["eval_langs"]: - if limit_langs and lang not in limit_langs: - continue - test_result_lang = test_result.get(lang, test_result) - if main_metric == "cosine_spearman": - test_result_lang = test_result_lang.get("cos_sim", {}).get("spearman") - elif main_metric == "ap": - test_result_lang = test_result_lang.get("cos_sim", {}).get("ap") - else: - test_result_lang = test_result_lang.get(main_metric) - - if test_result_lang is None: - print(f"{lang} & {main_metric} not found for task {dataset}.") - rows.append([model_name, task, dataset, lang, main_metric, ""]) - rows.append([model_name, task, dataset, lang, main_metric, test_result_lang]) - return rows - - -with open(csv_file, "w", encoding="utf-8") as f: - writer = csv.writer(f) - writer.writerow(["model", "task", "dataset", "language", "metric", "value"]) - for task, dataset_list in [ - ("BitextMining", TASK_LIST_BITEXTMINING), - ("Classification", TASK_LIST_CLASSIFICATION), - ("Clustering", TASK_LIST_CLUSTERING), - ("PairClassification", TASK_LIST_PAIR_CLASSIFICATION), - ("Reranking", TASK_LIST_RERANKING), - ("Retrieval", TASK_LIST_RETRIEVAL), - ("STS", TASK_LIST_STS), - ("Summarization", TASK_LIST_SUMMARIZATION), - ]: - for dataset in dataset_list: - writer.writerows(get_rows(task, dataset)) - - # Add average scores - for task, dataset_list, limit_langs in [ - ("BitextMining", TASK_LIST_BITEXTMINING, []), - ("Classification", TASK_LIST_CLASSIFICATION, ["en", "en-en"]), - ("Clustering", TASK_LIST_CLUSTERING, ["en", "en-en"]), - ("PairClassification", TASK_LIST_PAIR_CLASSIFICATION, ["en", "en-en"]), - ("Reranking", TASK_LIST_RERANKING, ["en", "en-en"]), - ("Retrieval", TASK_LIST_RETRIEVAL, ["en", "en-en"]), - ("STS", TASK_LIST_STS, ["en", "en-en"]), - ("all", TASK_LIST, ["en", "en-en"]), - ]: - if all([x in all_results for x in dataset_list]): - rows = [y for x in dataset_list for y in get_rows(task, x, limit_langs=limit_langs)] - try: - avg = sum([float(x[-1]) for x in rows]) / len(rows) - except: - continue - metric = "multiple" if task == "all" else rows[-1][-2] - writer.writerow([model_name, task, "average", "en", metric, avg]) - -if NOT_FOUND: - print("Not found: " + "'" + "','".join(NOT_FOUND) + "'", len(NOT_FOUND)) diff --git a/run_array.py b/run_array.py deleted file mode 100644 index ac172e82..00000000 --- a/run_array.py +++ /dev/null @@ -1,247 +0,0 @@ -import argparse -import logging -import os -from typing import Dict, List, Union - -logging.basicConfig(level=logging.INFO) - -logger = logging.getLogger("main") - -os.environ["HF_DATASETS_OFFLINE"]="1" # 1 for offline -os.environ["TRANSFORMERS_OFFLINE"]="1" # 1 for offline -os.environ["TRANSFORMERS_CACHE"]="/gpfswork/rech/six/commun/models" -os.environ["HF_DATASETS_CACHE"]="/gpfswork/rech/six/commun/datasets" -os.environ["HF_MODULES_CACHE"]="/gpfswork/rech/six/commun/modules" -os.environ["HF_METRICS_CACHE"]="/gpfswork/rech/six/commun/metrics" -os.environ["TOKENIZERS_PARALLELISM"] = "false" - -from mteb import MTEB -import numpy as np -from sentence_transformers import SentenceTransformer -from torch import Tensor -import torch.multiprocessing as mp - - -TASK_LIST_CLASSIFICATION = [ - "AmazonCounterfactualClassification", - "AmazonPolarityClassification", - "AmazonReviewsClassification", - "Banking77Classification", - "EmotionClassification", - "ImdbClassification", - "MassiveIntentClassification", - "MassiveScenarioClassification", - "MTOPDomainClassification", - "MTOPIntentClassification", - "ToxicConversationsClassification", - "TweetSentimentExtractionClassification", -] - -TASK_LIST_CLUSTERING = [ - "ArxivClusteringP2P", - "ArxivClusteringS2S", - "BiorxivClusteringP2P", - "BiorxivClusteringS2S", - "MedrxivClusteringP2P", - "MedrxivClusteringS2S", - "RedditClustering", - "RedditClusteringP2P", - "StackExchangeClustering", - "StackExchangeClusteringP2P", - "TwentyNewsgroupsClustering", -] - -TASK_LIST_PAIR_CLASSIFICATION = [ - "SprintDuplicateQuestions", - "TwitterSemEval2015", - "TwitterURLCorpus", -] - -TASK_LIST_RERANKING = [ - "AskUbuntuDupQuestions", - "MindSmallReranking", - "SciDocsRR", - "StackOverflowDupQuestions", -] - -TASK_LIST_RETRIEVAL = [ - "ArguAna", - "ClimateFEVER", - "CQADupstackAndroidRetrieval", - "CQADupstackEnglishRetrieval", - "CQADupstackGamingRetrieval", - "CQADupstackGisRetrieval", - "CQADupstackMathematicaRetrieval", - "CQADupstackPhysicsRetrieval", - "CQADupstackProgrammersRetrieval", - "CQADupstackStatsRetrieval", - "CQADupstackTexRetrieval", - "CQADupstackUnixRetrieval", - "CQADupstackWebmastersRetrieval", - "CQADupstackWordpressRetrieval", - "DBPedia", - "FEVER", - "FiQA2018", - "HotpotQA", - "MSMARCO", - "NFCorpus", - "NQ", - "QuoraRetrieval", - "SCIDOCS", - "SciFact", - "Touche2020", - "TRECCOVID", -] - -TASK_LIST_STS = [ - "BIOSSES", - "SICK-R", - "STS12", - "STS13", - "STS14", - "STS15", - "STS16", - "STS17", - "STS22", - "STSBenchmark", - "SummEval", -] - -TASK_LIST = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS - - -class SentenceTransformerSpecb: - # Requires: - # https://github.com/Muennighoff/sentence-transformers/tree/sgpt_poolings_specb - # pip install git+https://github.com/Muennighoff/sentence-transformers.git@sgpt_poolings_specb - def __init__(self, model): - self.model = SentenceTransformer(model) - self.sep = " " - tokens = ["[SOS]", "{SOS}"] - self.model._first_module().tokenizer.add_tokens(tokens, special_tokens=True) - self.model._first_module().auto_model.resize_token_embeddings(len(self.model._first_module().tokenizer)) - # Will be replaced with the rep tokens in the model ones - # The problem is we don't know if a text is query or document when tokenizing in the Transformer.py module, - # so we use the SOS tokens as an identifier if we have a query or document at hand & then replace them - # If we would directly use the brackets here, they may become part of another token - self.model._first_module().bos_spec_token_q = self.model._first_module().tokenizer.encode("[SOS]", add_special_tokens=False)[0] - self.model._first_module().bos_spec_token_d = self.model._first_module().tokenizer.encode("{SOS}", add_special_tokens=False)[0] - self.model._first_module().bos_spec_token_q_rep = self.model._first_module().tokenizer.encode("[", add_special_tokens=False)[0] - self.model._first_module().eos_spec_token_q = self.model._first_module().tokenizer.encode("]", add_special_tokens=False)[0] - self.model._first_module().bos_spec_token_d_rep = self.model._first_module().tokenizer.encode("{", add_special_tokens=False)[0] - self.model._first_module().eos_spec_token_d = self.model._first_module().tokenizer.encode("}", add_special_tokens=False)[0] - self.model._first_module().replace_bos = True - - def encode(self, sentences, **kwargs): - """Returns a list of embeddings for the given sentences. - Args: - sentences (`List[str]`): List of sentences to encode - batch_size (`int`): Batch size for the encoding - - Returns: - `List[np.ndarray]` or `List[tensor]`: List of embeddings for the given sentences - """ - # Add specb query token - sentences = ["[SOS]" + sent for sent in sentences] - return self.model.encode(sentences, **kwargs) - - def encode_queries(self, queries: List[str], batch_size: int = 16, **kwargs) -> Union[List[Tensor], np.ndarray, Tensor]: - # Will be replaced with [ in the models tokenization - # If we would put [ here, there is a risk of it getting chained with a different token when encoding - queries = ["[SOS]" + q for q in queries] - return self.model.encode(queries, batch_size=batch_size, **kwargs) - - def encode_corpus(self, corpus: List[Dict[str, str]], batch_size: int = 8, **kwargs) -> Union[List[Tensor], np.ndarray, Tensor]: - # Will be replaced with { in the models tokenization - # If we would put { here, there is a risk of it getting chained with a different token when encoding - sentences = [("{SOS}" + doc["title"] + self.sep + doc["text"]).strip() if "title" in doc else "{SOS}" + doc["text"].strip() for doc in corpus] - return self.model.encode(sentences, batch_size=batch_size, **kwargs) - - def encode_corpus_parallel( - self, corpus: List[Dict[str, str]], pool: Dict[str, object], batch_size: int, chunk_id: int, **kwargs - ): - if type(corpus) is dict: - sentences = [ - ("{SOS}" + corpus["title"][i] + self.sep + corpus["text"][i]).strip() - if "title" in corpus - else "{SOS}" + corpus["text"][i].strip() - for i in range(len(corpus["text"])) - ] - else: - sentences = [ - ("{SOS}" + doc["title"] + self.sep + doc["text"]).strip() if "title" in doc else "{SOS}" + doc["text"].strip() - for doc in corpus - ] - - if chunk_id is not None and chunk_id >= len(pool["processes"]): - output_queue = pool["output"] - output_queue.get() - - input_queue = pool["input"] - input_queue.put([chunk_id, batch_size, sentences]) - - - def start_multi_process_pool(self, target_devices: List[str] = None) -> Dict[str, object]: - logger.info("Start multi-process pool on devices: {}".format(", ".join(map(str, target_devices)))) - - ctx = mp.get_context("spawn") - input_queue = ctx.Queue() - output_queue = ctx.Queue() - processes = [] - - for process_id, device_name in enumerate(target_devices): - p = ctx.Process( - target=SentenceTransformer._encode_multi_process_worker, - args=(process_id, device_name, self.model, input_queue, output_queue), - daemon=True, - ) - p.start() - processes.append(p) - - return {"input": input_queue, "output": output_queue, "processes": processes} - - def stop_multi_process_pool(self, pool: Dict[str, object]): - output_queue = pool["output"] - [output_queue.get() for _ in range(len(pool["processes"]))] - return self.model.stop_multi_process_pool(pool) - - -def parse_args(): - # Parse command line arguments - parser = argparse.ArgumentParser() - parser.add_argument("--startid", type=int) - parser.add_argument("--endid", type=int) - parser.add_argument("--addspecbdoc", action='store_true') - parser.add_argument("--addspecbquery", action='store_true') - parser.add_argument("--modelpath", type=str, default="/gpfswork/rech/six/commun/models/sentence-transformers_sentence-t5-base") - parser.add_argument("--lang", type=str, default="en") - parser.add_argument("--taskname", type=str, default=None) - parser.add_argument("--batchsize", type=int, default=128) - args = parser.parse_args() - return args - -def main(args): - - if args.addspecbdoc or args.addspecbquery: - model = SentenceTransformerSpecb(args.modelpath) # Only used for SGPT-msmarco models - else: - model = SentenceTransformer(args.modelpath) - - if args.taskname is not None: - task = args.taskname - model_name = args.modelpath.split("/")[-1].split("_")[-1] - eval_splits = ["validation"] if task == "MSMARCO" else ["test"] - evaluation = MTEB(tasks=[task], task_langs=[args.lang]) - evaluation.run(model, output_folder=f"results/{model_name}", batch_size=args.batchsize, eval_splits=eval_splits) - exit() - - for task in TASK_LIST[args.startid:args.endid]: - print("Running task: ", task) - eval_splits = ["validation"] if task == "MSMARCO" else ["test"] - model_name = args.modelpath.split("/")[-1].split("_")[-1] - evaluation = MTEB(tasks=[task], task_langs=[args.lang]) - evaluation.run(model, output_folder=f"results/{model_name}", batch_size=args.batchsize, eval_splits=eval_splits) - -if __name__ == "__main__": - args = parse_args() - main(args) diff --git a/run_array_laser.py b/run_array_laser.py deleted file mode 100644 index a3f61208..00000000 --- a/run_array_laser.py +++ /dev/null @@ -1,178 +0,0 @@ -""" -See https://github.com/facebookresearch/LASER/issues/211 -""" - -import argparse -import logging -import os - -import numpy as np -import subprocess - -logging.basicConfig(level=logging.INFO) - -os.environ["HF_DATASETS_OFFLINE"]="1" # 1 for offline -os.environ["TRANSFORMERS_OFFLINE"]="1" # 1 for offline -os.environ["TRANSFORMERS_CACHE"]="/gpfswork/rech/six/commun/models" -os.environ["HF_DATASETS_CACHE"]="/gpfswork/rech/six/commun/datasets" -os.environ["HF_MODULES_CACHE"]="/gpfswork/rech/six/commun/modules" -os.environ["HF_METRICS_CACHE"]="/gpfswork/rech/six/commun/metrics" -os.environ["TOKENIZERS_PARALLELISM"] = "false" - -from mteb import MTEB - -TASK_LIST_CLASSIFICATION = [ - "AmazonCounterfactualClassification", - "AmazonPolarityClassification", - "AmazonReviewsClassification", - "Banking77Classification", - "EmotionClassification", - "ImdbClassification", - "MassiveIntentClassification", - "MassiveScenarioClassification", - "MTOPDomainClassification", - "MTOPIntentClassification", - "ToxicConversationsClassification", - "TweetSentimentExtractionClassification", -] - -TASK_LIST_CLUSTERING = [ - "ArxivClusteringP2P", - "ArxivClusteringS2S", - "BiorxivClusteringP2P", - "BiorxivClusteringS2S", - "MedrxivClusteringP2P", - "MedrxivClusteringS2S", - "RedditClustering", - "RedditClusteringP2P", - "StackExchangeClustering", - "StackExchangeClusteringP2P", - "TwentyNewsgroupsClustering", -] - -TASK_LIST_PAIR_CLASSIFICATION = [ - "SprintDuplicateQuestions", - "TwitterSemEval2015", - "TwitterURLCorpus", -] - -TASK_LIST_RERANKING = [ - "AskUbuntuDupQuestions", - "MindSmallReranking", - "SciDocsRR", - "StackOverflowDupQuestions", -] - -TASK_LIST_RETRIEVAL = [ - "ArguAna", - "ClimateFEVER", - "CQADupstackAndroidRetrieval", - "CQADupstackEnglishRetrieval", - "CQADupstackGamingRetrieval", - "CQADupstackGisRetrieval", - "CQADupstackMathematicaRetrieval", - "CQADupstackPhysicsRetrieval", - "CQADupstackProgrammersRetrieval", - "CQADupstackStatsRetrieval", - "CQADupstackTexRetrieval", - "CQADupstackUnixRetrieval", - "CQADupstackWebmastersRetrieval", - "CQADupstackWordpressRetrieval", - "DBPedia", - "FEVER", - "FiQA2018", - "HotpotQA", - "MSMARCO", - "NFCorpus", - "NQ", - "QuoraRetrieval", - "SCIDOCS", - "SciFact", - "Touche2020", - "TRECCOVID", -] - -TASK_LIST_STS = [ - "BIOSSES", - "SICK-R", - "STS12", - "STS13", - "STS14", - "STS15", - "STS16", - "STS17", - "STS22", - "STSBenchmark", - "SummEval", -] - -TASK_LIST = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS - -### Setup prior to running ### -#with open("LASER_script.sh", "w") as f: -# f.write("LASER=/content/LASER ./LASER/tasks/embed/embed.sh tmp.txt tmp.bin") -# Run `chmod u+rx LASER_script.sh` to give permissions -# !chmod u+rx LASER_script.sh - -class LASER(): - def encode(self, sentences, batch_size=32, **kwargs): - """ - Returns a list of embeddings for the given sentences. - Args: - sentences (`List[str]`): List of sentences to encode - batch_size (`int`): Batch size for the encoding - - Returns: - `List[np.ndarray]` or `List[tensor]`: List of embeddings for the given sentences - """ - if os.path.exists("tmp.txt"): - os.remove("tmp.txt") - if os.path.exists("tmp.bin"): - os.remove("tmp.bin") - - # LASER expects one text per line, so we need to replace newlines - sentences = [s.replace("\n", " ") for s in sentences] - with open("tmp.txt", "w") as f: - f.write("\n".join(sentences)) - - print(len(sentences)) - rc = subprocess.call("./LASER_script.sh", shell=True) - - dim = 1024 - X = np.fromfile("tmp.bin", dtype=np.float32, count=-1) - X.resize(X.shape[0] // dim, dim) - print(X.shape) - return X - -def parse_args(): - # Parse command line arguments - parser = argparse.ArgumentParser() - parser.add_argument("--startid", type=int) - parser.add_argument("--endid", type=int) - parser.add_argument("--lang", type=str, default="en") - parser.add_argument("--taskname", type=str, default=None) - parser.add_argument("--batchsize", type=int, default=128) - args = parser.parse_args() - return args - -def main(args): - - model = LASER() - model_name = "LASER2" - - if args.taskname is not None: - task = args.taskname - eval_splits = ["validation"] if task == "MSMARCO" else ["test"] - evaluation = MTEB(tasks=[task], task_langs=[args.lang]) - evaluation.run(model, output_folder=f"results/{model_name}", batch_size=args.batchsize, eval_splits=eval_splits) - exit() - - for task in TASK_LIST[args.startid:args.endid]: - print("Running task: ", task) - eval_splits = ["validation"] if task == "MSMARCO" else ["test"] - evaluation = MTEB(tasks=[task], task_langs=[args.lang]) - evaluation.run(model, output_folder=f"results/{model_name}", batch_size=args.batchsize, eval_splits=eval_splits) - -if __name__ == "__main__": - args = parse_args() - main(args) diff --git a/run_array_openai.py b/run_array_openai.py deleted file mode 100644 index c3a95d55..00000000 --- a/run_array_openai.py +++ /dev/null @@ -1,220 +0,0 @@ -""" -openai==0.11.4 -""" -import argparse -import logging -import os -import pathlib -import pickle - -import openai -from transformers import GPT2TokenizerFast - -logging.basicConfig(level=logging.INFO) - -os.environ["HF_DATASETS_OFFLINE"]="1" # 1 for offline -os.environ["TRANSFORMERS_OFFLINE"]="1" # 1 for offline -os.environ["TRANSFORMERS_CACHE"]="/gpfswork/rech/six/commun/models" -os.environ["HF_DATASETS_CACHE"]="/gpfswork/rech/six/commun/datasets" -os.environ["HF_MODULES_CACHE"]="/gpfswork/rech/six/commun/modules" -os.environ["HF_METRICS_CACHE"]="/gpfswork/rech/six/commun/metrics" -os.environ["TOKENIZERS_PARALLELISM"] = "false" - -API_KEY = "YOUR_KEY" - -from mteb import MTEB - - -TASK_LIST_CLASSIFICATION = [ - "AmazonCounterfactualClassification", - "AmazonPolarityClassification", - "AmazonReviewsClassification", - "Banking77Classification", - "EmotionClassification", - "ImdbClassification", - "MassiveIntentClassification", - "MassiveScenarioClassification", - "MTOPDomainClassification", - "MTOPIntentClassification", - "ToxicConversationsClassification", - "TweetSentimentExtractionClassification", -] - -TASK_LIST_CLUSTERING = [ - "ArxivClusteringP2P", - "ArxivClusteringS2S", - "BiorxivClusteringP2P", - "BiorxivClusteringS2S", - "MedrxivClusteringP2P", - "MedrxivClusteringS2S", - "RedditClustering", - "RedditClusteringP2P", - "StackExchangeClustering", - "StackExchangeClusteringP2P", - "TwentyNewsgroupsClustering", -] - -TASK_LIST_PAIR_CLASSIFICATION = [ - "SprintDuplicateQuestions", - "TwitterSemEval2015", - "TwitterURLCorpus", -] - -TASK_LIST_RERANKING = [ - "AskUbuntuDupQuestions", - "MindSmallReranking", - "SciDocsRR", - "StackOverflowDupQuestions", -] - -TASK_LIST_RETRIEVAL = [ - "ArguAna", - "ClimateFEVER", - "CQADupstackAndroidRetrieval", - "CQADupstackEnglishRetrieval", - "CQADupstackGamingRetrieval", - "CQADupstackGisRetrieval", - "CQADupstackMathematicaRetrieval", - "CQADupstackPhysicsRetrieval", - "CQADupstackProgrammersRetrieval", - "CQADupstackStatsRetrieval", - "CQADupstackTexRetrieval", - "CQADupstackUnixRetrieval", - "CQADupstackWebmastersRetrieval", - "CQADupstackWordpressRetrieval", - "DBPedia", - "FEVER", - "FiQA2018", - "HotpotQA", - "MSMARCO", - "NFCorpus", - "NQ", - "QuoraRetrieval", - "SCIDOCS", - "SciFact", - "Touche2020", - "TRECCOVID", -] - -TASK_LIST_STS = [ - "BIOSSES", - "SICK-R", - "STS12", - "STS13", - "STS14", - "STS15", - "STS16", - "STS17", - "STS22", - "STSBenchmark", - "SummEval", -] - -TASK_LIST = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS -class OpenAIEmbedder: - """ - Benchmark OpenAIs embeddings endpoint on USEB. - """ - def __init__(self, engine, task_name=None, batch_size=32, save_emb=False, **kwargs): - self.engine = engine - self.max_token_len = 2046 # 2048 - 2 special tokens - self.batch_size = batch_size - self.save_emb = False # Problematic as the filenames end up being the same - self.base_path = f"embeddings/{engine.split('/')[-1]}/" - self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2") - self.task_name = task_name - - if save_emb: - assert self.task_name is not None - - pathlib.Path(self.base_path).mkdir(parents=True, exist_ok=True) - - def encode(self, - sentences, - decode=True, - idx=None, - **kwargs - ): - - openai.api_key = API_KEY - - fin_embeddings = [] - - embedding_path = f"{self.base_path}/{self.task_name}_{sentences[0][:5]}_{sentences[-1][-5:]}.pickle" - if sentences and os.path.exists(embedding_path): - loaded = pickle.load(open(embedding_path, "rb")) - fin_embeddings = loaded["fin_embeddings"] - else: - for i in range(0, len(sentences), self.batch_size): - batch = sentences[i : i + self.batch_size] - - all_tokens = [] - used_indices = [] - for j, txt in enumerate(batch): - tokens = self.tokenizer.encode(txt, add_special_tokens=False) - token_len = len(tokens) - if token_len == 0: - raise ValueError("Empty items should be cleaned prior to running") - if token_len > self.max_token_len: - tokens = tokens[:self.max_token_len] - # For some characters the API raises weird errors, e.g. input=[[126]] - if decode: - tokens = self.tokenizer.decode(tokens) - all_tokens.append(tokens) - used_indices.append(j) - - out = [[]] * len(batch) - if all_tokens: - response = openai.Engine(id=self.engine).embeddings(input=all_tokens) - assert len(response["data"]) == len( - all_tokens - ), f"Sent {len(all_tokens)}, got {len(response['data'])}" - - for data in response["data"]: - idx = data["index"] - # OpenAI seems to return them ordered, but to be save use the index and insert - idx = used_indices[idx] - embedding = data["embedding"] - out[idx] = embedding - - fin_embeddings.extend(out) - # Save embeddings - if fin_embeddings and self.save_emb: - dump = { - "fin_embeddings": fin_embeddings, - } - pickle.dump(dump, open(embedding_path, "wb")) - - assert len(sentences) == len(fin_embeddings) - return fin_embeddings - - -def parse_args(): - # Parse command line arguments - parser = argparse.ArgumentParser() - parser.add_argument("--startid", type=int) - parser.add_argument("--endid", type=int) - parser.add_argument("--engine", type=str, default="text-similarity-ada-001") - parser.add_argument("--lang", type=str, default="en") - parser.add_argument("--taskname", type=str, default=None) - parser.add_argument("--batchsize", type=int, default=2048) - args = parser.parse_args() - return args - -def main(args): - - # Different batch size than the arg - # The below is used to send X embeddings to the API - # The CLI arg is how much will be saved / pickle file - - for task in TASK_LIST[args.startid:args.endid]: - print("Running task: ", task) - model = OpenAIEmbedder(args.engine, task_name=task, batchsize=256, save_emb=True) - eval_splits = ["validation"] if task == "MSMARCO" else ["test"] - model_name = args.engine.split("/")[-1].split("_")[-1] - evaluation = MTEB(tasks=[task], task_langs=[args.lang]) - evaluation.run(model, output_folder=f"results/{model_name}", batch_size=args.batchsize, eval_splits=eval_splits) - -if __name__ == "__main__": - args = parse_args() - main(args) diff --git a/run_array_openaiv2.py b/run_array_openaiv2.py deleted file mode 100644 index d29c6cc4..00000000 --- a/run_array_openaiv2.py +++ /dev/null @@ -1,226 +0,0 @@ -""" -openai==0.26.4 -tiktoken==0.2.0 -""" -import argparse -import logging -import os -import pathlib -import pickle - -from mteb import MTEB -import openai -import tiktoken -from transformers import GPT2TokenizerFast - -logging.basicConfig(level=logging.INFO) - -os.environ["HF_DATASETS_OFFLINE"]="1" # 1 for offline -os.environ["TRANSFORMERS_OFFLINE"]="1" # 1 for offline -os.environ["TRANSFORMERS_CACHE"]="/gpfswork/rech/six/commun/models" -os.environ["HF_DATASETS_CACHE"]="/gpfswork/rech/six/commun/datasets" -os.environ["HF_MODULES_CACHE"]="/gpfswork/rech/six/commun/modules" -os.environ["HF_METRICS_CACHE"]="/gpfswork/rech/six/commun/metrics" -os.environ["TOKENIZERS_PARALLELISM"] = "false" - -API_KEY = "YOUR_KEY" - -TASK_LIST_CLASSIFICATION = [ - "AmazonCounterfactualClassification", - "AmazonPolarityClassification", - "AmazonReviewsClassification", - "Banking77Classification", - "EmotionClassification", - "ImdbClassification", - "MassiveIntentClassification", - "MassiveScenarioClassification", - "MTOPDomainClassification", - "MTOPIntentClassification", - "ToxicConversationsClassification", - "TweetSentimentExtractionClassification", -] - -TASK_LIST_CLUSTERING = [ - "ArxivClusteringP2P", - "ArxivClusteringS2S", - "BiorxivClusteringP2P", - "BiorxivClusteringS2S", - "MedrxivClusteringP2P", - "MedrxivClusteringS2S", - "RedditClustering", - "RedditClusteringP2P", - "StackExchangeClustering", - "StackExchangeClusteringP2P", - "TwentyNewsgroupsClustering", -] - -TASK_LIST_PAIR_CLASSIFICATION = [ - "SprintDuplicateQuestions", - "TwitterSemEval2015", - "TwitterURLCorpus", -] - -TASK_LIST_RERANKING = [ - "AskUbuntuDupQuestions", - "MindSmallReranking", - "SciDocsRR", - "StackOverflowDupQuestions", -] - -TASK_LIST_RETRIEVAL = [ - "ArguAna", - "ClimateFEVER", - "CQADupstackAndroidRetrieval", - "CQADupstackEnglishRetrieval", - "CQADupstackGamingRetrieval", - "CQADupstackGisRetrieval", - "CQADupstackMathematicaRetrieval", - "CQADupstackPhysicsRetrieval", - "CQADupstackProgrammersRetrieval", - "CQADupstackStatsRetrieval", - "CQADupstackTexRetrieval", - "CQADupstackUnixRetrieval", - "CQADupstackWebmastersRetrieval", - "CQADupstackWordpressRetrieval", - "DBPedia", - "FEVER", - "FiQA2018", - "HotpotQA", - "MSMARCO", - "NFCorpus", - "NQ", - "QuoraRetrieval", - "SCIDOCS", - "SciFact", - "Touche2020", - "TRECCOVID", -] - -TASK_LIST_STS = [ - "BIOSSES", - "SICK-R", - "STS12", - "STS13", - "STS14", - "STS15", - "STS16", - "STS17", - "STS22", - "STSBenchmark", - "SummEval", -] - -TASK_LIST = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS - -class OpenAIEmbedder: - """ - Benchmark OpenAIs embeddings endpoint. - """ - def __init__(self, engine, task_name=None, batch_size=32, save_emb=False, **kwargs): - self.engine = engine - self.max_token_len = 8191 - self.batch_size = batch_size - self.save_emb = save_emb # Problematic as the filenames may end up being the same - self.base_path = f"embeddings/{engine.split('/')[-1]}/" - # self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2") - self.tokenizer = tiktoken.encoding_for_model(engine) - self.task_name = task_name - - if save_emb: - assert self.task_name is not None - - pathlib.Path(self.base_path).mkdir(parents=True, exist_ok=True) - - def encode(self, - sentences, - decode=True, - idx=None, - **kwargs - ): - - openai.api_key = API_KEY - - fin_embeddings = [] - - embedding_path = f"{self.base_path}/{self.task_name}_{sentences[0][:10]}_{sentences[-1][-10:]}.pickle" - if sentences and os.path.exists(embedding_path): - loaded = pickle.load(open(embedding_path, "rb")) - fin_embeddings = loaded["fin_embeddings"] - else: - for i in range(0, len(sentences), self.batch_size): - batch = sentences[i : i + self.batch_size] - - all_tokens = [] - used_indices = [] - for j, txt in enumerate(batch): - # tokens = self.tokenizer.encode(txt, add_special_tokens=False) - if not(txt): - print("Detected empty item, which is not allowed by the OpenAI API - Replacing with empty space") - txt = " " - tokens = self.tokenizer.encode(txt) - token_len = len(tokens) - if token_len > self.max_token_len: - tokens = tokens[:self.max_token_len] - # For some characters the API raises weird errors, e.g. input=[[126]] - if decode: - tokens = self.tokenizer.decode(tokens) - all_tokens.append(tokens) - used_indices.append(j) - - out = [[]] * len(batch) - if all_tokens: - response = openai.Embedding.create(input=all_tokens, model=self.engine) - # May want to sleep here to avoid getting too many requests error - # time.sleep(1) - assert len(response["data"]) == len( - all_tokens - ), f"Sent {len(all_tokens)}, got {len(response['data'])}" - - for data in response["data"]: - idx = data["index"] - # OpenAI seems to return them ordered, but to be save use the index and insert - idx = used_indices[idx] - embedding = data["embedding"] - out[idx] = embedding - - fin_embeddings.extend(out) - # Save embeddings - if fin_embeddings and self.save_emb: - dump = { - "fin_embeddings": fin_embeddings, - } - pickle.dump(dump, open(embedding_path, "wb")) - - assert len(sentences) == len(fin_embeddings) - return fin_embeddings - - -def parse_args(): - # Parse command line arguments - parser = argparse.ArgumentParser() - parser.add_argument("--startid", type=int) - parser.add_argument("--endid", type=int) - parser.add_argument("--engine", type=str, default="text-embedding-ada-002") - parser.add_argument("--lang", type=str, default="en") - parser.add_argument("--taskname", type=str, default=None) - parser.add_argument("--batchsize", type=int, default=2048) - args = parser.parse_args() - return args - -def main(args): - - # There are two different batch sizes - # OpenAIEmbedder(...) batch size arg is used to send X embeddings to the API - # evaluation.run(...) batch size arg is how much will be saved / pickle file (as it's the total sent to the embed function) - - for task in TASK_LIST[args.startid:args.endid]: - print("Running task: ", task) - model = OpenAIEmbedder(args.engine, task_name=task, batch_size=args.batchsize, save_emb=True) - eval_splits = ["validation"] if task == "MSMARCO" else ["test"] - model_name = args.engine.split("/")[-1].split("_")[-1] - evaluation = MTEB(tasks=[task], task_langs=[args.lang]) - evaluation.run(model, output_folder=f"results/{model_name}", batch_size=args.batchsize, eval_splits=eval_splits, corpus_chunk_size=10000) - -if __name__ == "__main__": - args = parse_args() - main(args) diff --git a/run_array_sgpt.py b/run_array_sgpt.py deleted file mode 100644 index f95da0a6..00000000 --- a/run_array_sgpt.py +++ /dev/null @@ -1,247 +0,0 @@ -import argparse -import logging -import os -from typing import Dict, List, Union - -from mteb import MTEB -import numpy as np -from sentence_transformers import SentenceTransformer -import torch.multiprocessing as mp -from torch import Tensor - -logging.basicConfig(level=logging.INFO) - -logger = logging.getLogger("main") - -os.environ["HF_DATASETS_OFFLINE"]="1" # 1 for offline -os.environ["TRANSFORMERS_OFFLINE"]="1" # 1 for offline -os.environ["TRANSFORMERS_CACHE"]="/gpfswork/rech/six/commun/models" -os.environ["HF_DATASETS_CACHE"]="/gpfswork/rech/six/commun/datasets" -os.environ["HF_MODULES_CACHE"]="/gpfswork/rech/six/commun/modules" -os.environ["HF_METRICS_CACHE"]="/gpfswork/rech/six/commun/metrics" -os.environ["TOKENIZERS_PARALLELISM"] = "false" - - -TASK_LIST_CLASSIFICATION = [ - "AmazonCounterfactualClassification", - "AmazonPolarityClassification", - "AmazonReviewsClassification", - "Banking77Classification", - "EmotionClassification", - "ImdbClassification", - "MassiveIntentClassification", - "MassiveScenarioClassification", - "MTOPDomainClassification", - "MTOPIntentClassification", - "ToxicConversationsClassification", - "TweetSentimentExtractionClassification", -] - -TASK_LIST_CLUSTERING = [ - "ArxivClusteringP2P", - "ArxivClusteringS2S", - "BiorxivClusteringP2P", - "BiorxivClusteringS2S", - "MedrxivClusteringP2P", - "MedrxivClusteringS2S", - "RedditClustering", - "RedditClusteringP2P", - "StackExchangeClustering", - "StackExchangeClusteringP2P", - "TwentyNewsgroupsClustering", -] - -TASK_LIST_PAIR_CLASSIFICATION = [ - "SprintDuplicateQuestions", - "TwitterSemEval2015", - "TwitterURLCorpus", -] - -TASK_LIST_RERANKING = [ - "AskUbuntuDupQuestions", - "MindSmallReranking", - "SciDocsRR", - "StackOverflowDupQuestions", -] - -TASK_LIST_RETRIEVAL = [ - "ArguAna", - "ClimateFEVER", - "CQADupstackAndroidRetrieval", - "CQADupstackEnglishRetrieval", - "CQADupstackGamingRetrieval", - "CQADupstackGisRetrieval", - "CQADupstackMathematicaRetrieval", - "CQADupstackPhysicsRetrieval", - "CQADupstackProgrammersRetrieval", - "CQADupstackStatsRetrieval", - "CQADupstackTexRetrieval", - "CQADupstackUnixRetrieval", - "CQADupstackWebmastersRetrieval", - "CQADupstackWordpressRetrieval", - "DBPedia", - "FEVER", - "FiQA2018", - "HotpotQA", - "MSMARCO", - "NFCorpus", - "NQ", - "QuoraRetrieval", - "SCIDOCS", - "SciFact", - "Touche2020", - "TRECCOVID", -] - -TASK_LIST_STS = [ - "BIOSSES", - "SICK-R", - "STS12", - "STS13", - "STS14", - "STS15", - "STS16", - "STS17", - "STS22", - "STSBenchmark", - "SummEval", -] - -TASK_LIST = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS - - -class SentenceTransformerSpecb(SentenceTransformer): - # Requires: - # https://github.com/Muennighoff/sentence-transformers/tree/sgpt_poolings_specb - # pip install git+https://github.com/Muennighoff/sentence-transformers.git@sgpt_poolings_specb - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - tokens = ["[SOS]", "{SOS}"] - self.sep = " " - self._first_module().tokenizer.add_tokens(tokens, special_tokens=True) - self._first_module().auto_model.resize_token_embeddings(len(self._first_module().tokenizer)) - # Will be replaced with the rep tokens in the model ones - # The problem is we don't know if a text is query or document when tokenizing in the Transformer.py module, - # so we use the SOS tokens as an identifier if we have a query or document at hand & then replace them - # If we would directly use the brackets here, they may become part of another token - self._first_module().bos_spec_token_q = self._first_module().tokenizer.encode("[SOS]", add_special_tokens=False)[0] - self._first_module().bos_spec_token_d = self._first_module().tokenizer.encode("{SOS}", add_special_tokens=False)[0] - self._first_module().bos_spec_token_q_rep = self._first_module().tokenizer.encode("[", add_special_tokens=False)[0] - self._first_module().eos_spec_token_q = self._first_module().tokenizer.encode("]", add_special_tokens=False)[0] - self._first_module().bos_spec_token_d_rep = self._first_module().tokenizer.encode("{", add_special_tokens=False)[0] - self._first_module().eos_spec_token_d = self._first_module().tokenizer.encode("}", add_special_tokens=False)[0] - self._first_module().replace_bos = True - - def encode(self, sentences, **kwargs): - """Returns a list of embeddings for the given sentences. - Args: - sentences (`List[str]`): List of sentences to encode - batch_size (`int`): Batch size for the encoding - - Returns: - `List[np.ndarray]` or `List[tensor]`: List of embeddings for the given sentences - """ - # Add specb query token - sentences = ["[SOS]" + sent for sent in sentences] - return super().encode(sentences, **kwargs) - - def encode_queries(self, queries: List[str], batch_size: int = 16, **kwargs) -> Union[List[Tensor], np.ndarray, Tensor]: - # Will be replaced with [ in the models tokenization - # If we would put [ here, there is a risk of it getting chained with a different token when encoding - queries = ["[SOS]" + q for q in queries] - return super().encode(queries, batch_size=batch_size, **kwargs) - - def encode_corpus(self, corpus: List[Dict[str, str]], batch_size: int = 8, **kwargs) -> Union[List[Tensor], np.ndarray, Tensor]: - # Will be replaced with { in the models tokenization - # If we would put { here, there is a risk of it getting chained with a different token when encoding - sentences = [("{SOS}" + doc["title"] + self.sep + doc["text"]).strip() if "title" in doc else "{SOS}" + doc["text"].strip() for doc in corpus] - return super().encode(sentences, batch_size=batch_size, **kwargs) - - def encode_corpus_parallel( - self, corpus: List[Dict[str, str]], pool: Dict[str, object], batch_size: int, chunk_id: int, **kwargs - ): - if type(corpus) is dict: - sentences = [ - ("{SOS}" + corpus["title"][i] + self.sep + corpus["text"][i]).strip() - if "title" in corpus - else "{SOS}" + corpus["text"][i].strip() - for i in range(len(corpus["text"])) - ] - else: - sentences = [ - ("{SOS}" + doc["title"] + self.sep + doc["text"]).strip() if "title" in doc else "{SOS}" + doc["text"].strip() - for doc in corpus - ] - - if chunk_id is not None and chunk_id >= len(pool["processes"]): - output_queue = pool["output"] - output_queue.get() - - input_queue = pool["input"] - input_queue.put([chunk_id, batch_size, sentences]) - - - def start_multi_process_pool(self, target_devices: List[str] = None) -> Dict[str, object]: - logger.info("Start multi-process pool on devices: {}".format(", ".join(map(str, target_devices)))) - - ctx = mp.get_context("spawn") - input_queue = ctx.Queue() - output_queue = ctx.Queue() - processes = [] - - for process_id, device_name in enumerate(target_devices): - p = ctx.Process( - target=SentenceTransformer._encode_multi_process_worker, - args=(process_id, device_name, self.model, input_queue, output_queue), - daemon=True, - ) - p.start() - processes.append(p) - - return {"input": input_queue, "output": output_queue, "processes": processes} - - def stop_multi_process_pool(self, pool: Dict[str, object]): - output_queue = pool["output"] - [output_queue.get() for _ in range(len(pool["processes"]))] - return self.model.stop_multi_process_pool(pool) - - -def parse_args(): - # Parse command line arguments - parser = argparse.ArgumentParser() - parser.add_argument("--startid", type=int) - parser.add_argument("--endid", type=int) - parser.add_argument("--addspecbdoc", action='store_true') - parser.add_argument("--addspecbquery", action='store_true') - parser.add_argument("--modelpath", type=str, default="/gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-msmarco-specb-bitfit") - parser.add_argument("--lang", type=str, default="en") - parser.add_argument("--taskname", type=str, default=None) - parser.add_argument("--batchsize", type=int, default=128) - args = parser.parse_args() - return args - -def main(args): - - if args.addspecbdoc or args.addspecbquery: - model = SentenceTransformerSpecb(args.modelpath) # Only used for SGPT-msmarco models - else: - model = SentenceTransformer(args.modelpath) - - if args.taskname is not None: - task = args.taskname - model_name = args.modelpath.split("/")[-1].split("_")[-1] - eval_splits = ["validation"] if task == "MSMARCO" else ["test"] - evaluation = MTEB(tasks=[task], task_langs=[args.lang]) - evaluation.run(model, output_folder=f"results/{model_name}", batch_size=args.batchsize, eval_splits=eval_splits) - exit() - - for task in TASK_LIST[args.startid:args.endid]: - print("Running task: ", task) - eval_splits = ["validation"] if task == "MSMARCO" else ["test"] - model_name = args.modelpath.split("/")[-1].split("_")[-1] - evaluation = MTEB(tasks=[task], task_langs=[args.lang]) - evaluation.run(model, output_folder=f"results/{model_name}", batch_size=args.batchsize, eval_splits=eval_splits) - -if __name__ == "__main__": - args = parse_args() - main(args) diff --git a/run_array_simcse.py b/run_array_simcse.py deleted file mode 100644 index cc3eefd8..00000000 --- a/run_array_simcse.py +++ /dev/null @@ -1,173 +0,0 @@ -import argparse -import logging -import os - -logging.basicConfig(level=logging.INFO) - -os.environ["HF_DATASETS_OFFLINE"]="1" # 1 for offline -os.environ["TRANSFORMERS_OFFLINE"]="1" # 1 for offline -os.environ["TRANSFORMERS_CACHE"]="/gpfswork/rech/six/commun/models" -os.environ["HF_DATASETS_CACHE"]="/gpfswork/rech/six/commun/datasets" -os.environ["HF_MODULES_CACHE"]="/gpfswork/rech/six/commun/modules" -os.environ["HF_METRICS_CACHE"]="/gpfswork/rech/six/commun/metrics" -os.environ["TOKENIZERS_PARALLELISM"] = "false" - -import numpy as np -from mteb import MTEB -from transformers import AutoModel, AutoTokenizer -import torch - - -TASK_LIST_CLASSIFICATION = [ - "AmazonCounterfactualClassification", - "AmazonPolarityClassification", - "AmazonReviewsClassification", - "Banking77Classification", - "EmotionClassification", - "ImdbClassification", - "MassiveIntentClassification", - "MassiveScenarioClassification", - "MTOPDomainClassification", - "MTOPIntentClassification", - "ToxicConversationsClassification", - "TweetSentimentExtractionClassification", -] - -TASK_LIST_CLUSTERING = [ - "ArxivClusteringP2P", - "ArxivClusteringS2S", - "BiorxivClusteringP2P", - "BiorxivClusteringS2S", - "MedrxivClusteringP2P", - "MedrxivClusteringS2S", - "RedditClustering", - "RedditClusteringP2P", - "StackExchangeClustering", - "StackExchangeClusteringP2P", - "TwentyNewsgroupsClustering", -] - -TASK_LIST_PAIR_CLASSIFICATION = [ - "SprintDuplicateQuestions", - "TwitterSemEval2015", - "TwitterURLCorpus", -] - -TASK_LIST_RERANKING = [ - "AskUbuntuDupQuestions", - "MindSmallReranking", - "SciDocsRR", - "StackOverflowDupQuestions", -] - -TASK_LIST_RETRIEVAL = [ - "ArguAna", - "ClimateFEVER", - "CQADupstackAndroidRetrieval", - "CQADupstackEnglishRetrieval", - "CQADupstackGamingRetrieval", - "CQADupstackGisRetrieval", - "CQADupstackMathematicaRetrieval", - "CQADupstackPhysicsRetrieval", - "CQADupstackProgrammersRetrieval", - "CQADupstackStatsRetrieval", - "CQADupstackTexRetrieval", - "CQADupstackUnixRetrieval", - "CQADupstackWebmastersRetrieval", - "CQADupstackWordpressRetrieval", - "DBPedia", - "FEVER", - "FiQA2018", - "HotpotQA", - "MSMARCO", - "NFCorpus", - "NQ", - "QuoraRetrieval", - "SCIDOCS", - "SciFact", - "Touche2020", - "TRECCOVID", -] - -TASK_LIST_STS = [ - "BIOSSES", - "SICK-R", - "STS12", - "STS13", - "STS14", - "STS15", - "STS16", - "STS17", - "STS22", - "STSBenchmark", - "SummEval", -] - -TASK_LIST = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS - - -class SimCSEWrapper: - def __init__(self, modelpath="princeton-nlp/sup-simcse-bert-base-uncased"): - self.device = "cuda" if torch.cuda.is_available() else "cpu" - self.tokenizer = AutoTokenizer.from_pretrained(modelpath) - self.model = AutoModel.from_pretrained(modelpath).to(self.device) - self.model.eval() - - def encode(self, sentences, batch_size=32, **kwargs): - """ Returns a list of embeddings for the given sentences. - Args: - sentences (`List[str]`): List of sentences to encode - batch_size (`int`): Batch size for the encoding - - Returns: - `List[np.ndarray]` or `List[tensor]`: List of embeddings for the given sentences - """ - all_embeddings = [] - length_sorted_idx = np.argsort([len(sen) for sen in sentences]) - sentences_sorted = [sentences[idx] for idx in length_sorted_idx] - - for start_index in range(0, len(sentences), batch_size): - sentences_batch = sentences_sorted[start_index:start_index+batch_size] - inputs = self.tokenizer(sentences_batch, padding=True, truncation=True, return_tensors="pt") - inputs = {k: v.to(self.device) for k,v in inputs.items()} - # Get the embeddings - with torch.no_grad(): - embeddings = self.model(**inputs, output_hidden_states=True, return_dict=True).pooler_output - all_embeddings.extend(embeddings.cpu().numpy()) - all_embeddings = [all_embeddings[idx] for idx in np.argsort(length_sorted_idx)] - return all_embeddings - -def parse_args(): - # Parse command line arguments - parser = argparse.ArgumentParser() - parser.add_argument("--startid", type=int) - parser.add_argument("--endid", type=int) - parser.add_argument("--modelpath", type=str, default="/gpfswork/rech/six/commun/models/princeton-nlp/sup-simcse-bert-base-uncased") - parser.add_argument("--lang", type=str, default="en") - parser.add_argument("--taskname", type=str, default=None) - parser.add_argument("--batchsize", type=int, default=128) - args = parser.parse_args() - return args - -def main(args): - - model = SimCSEWrapper(args.modelpath) - - if args.taskname is not None: - task = args.taskname - eval_splits = ["validation"] if task == "MSMARCO" else ["test"] - model_name = args.modelpath.split("/")[-1].split("_")[-1] - evaluation = MTEB(tasks=[task], task_langs=[args.lang], eval_splits=eval_splits) - evaluation.run(model, output_folder=f"results/{model_name}", batch_size=args.batchsize) - exit() - - for task in TASK_LIST[args.startid:args.endid]: - print("Running task: ", task) - eval_splits = ["validation"] if task == "MSMARCO" else ["test"] - model_name = args.modelpath.split("/")[-1].split("_")[-1] - evaluation = MTEB(tasks=[task], task_langs=[args.lang]) - evaluation.run(model, output_folder=f"results/{model_name}", batch_size=args.batchsize, eval_splits=eval_splits) - -if __name__ == "__main__": - args = parse_args() - main(args) diff --git a/run_benchmark.py b/run_benchmark.py deleted file mode 100644 index c333ee4c..00000000 --- a/run_benchmark.py +++ /dev/null @@ -1,201 +0,0 @@ -import argparse -import logging -import json -import os -import subprocess -import time - -logging.basicConfig(level=logging.INFO) - -os.environ["HF_DATASETS_OFFLINE"]="1" # 1 for offline -os.environ["TRANSFORMERS_OFFLINE"]="1" # 1 for offline -os.environ["TRANSFORMERS_CACHE"]="/gpfswork/rech/six/commun/models" -os.environ["HF_DATASETS_CACHE"]="/gpfswork/rech/six/commun/datasets" -os.environ["HF_MODULES_CACHE"]="/gpfswork/rech/six/commun/modules" -os.environ["HF_METRICS_CACHE"]="/gpfswork/rech/six/commun/metrics" -os.environ["TOKENIZERS_PARALLELISM"] = "false" - -import numpy as np -from mteb import MTEB -from sentence_transformers import SentenceTransformer -import torch -from transformers import AutoModel, AutoTokenizer - - -MODELS = [ - "LASER2", - "/gpfswork/rech/six/commun/models/sentence-transformers_average_word_embeddings_komninos", - "/gpfswork/rech/six/commun/models/sentence-transformers_average_word_embeddings_glove.6B.300d", - "/gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-nli-bitfit", - "/gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-msmarco-specb-bitfit", - "/gpfswork/rech/six/commun/models/Muennighoff_SGPT-5.8B-weightedmean-nli-bitfit", - "/gpfswork/rech/six/commun/models/Muennighoff_SGPT-5.8B-weightedmean-msmarco-specb-bitfit", - "/gpfswork/rech/six/commun/models/bigscience_sgpt-bloom-7b1-msmarco", - "/gpfswork/rech/six/commun/models/bigscience-catalogue-lm-data_sgpt-bloom-1b3-nli", - "/gpfswork/rech/six/commun/models/sentence-transformers_all-MiniLM-L6-v2", - "/gpfswork/rech/six/commun/models/sentence-transformers_all-mpnet-base-v2", - "/gpfswork/rech/six/commun/models/sentence-transformers_paraphrase-multilingual-mpnet-base-v2", - "/gpfswork/rech/six/commun/models/sentence-transformers_sentence-t5-base", - "/gpfswork/rech/six/commun/models/sentence-transformers_sentence-t5-xxl", - "/gpfswork/rech/six/commun/models/sentence-transformers_gtr-t5-base", - "/gpfswork/rech/six/commun/models/sentence-transformers_gtr-t5-xxl", - "/gpfswork/rech/six/commun/models/nthakur_contriever-base-msmarco", - "/gpfswork/rech/six/commun/models/sentence-transformers_msmarco-bert-co-condensor", - "/gpfswork/rech/six/commun/models/bert-base-uncased", - "/gpfswork/rech/six/commun/models/princeton-nlp_sup-simcse-bert-base-uncased", - "/gpfswork/rech/six/commun/models/princeton-nlp_unsup-simcse-bert-base-uncased", - "/gpfswork/rech/six/commun/models/sentence-transformers_LaBSE", -] - -MODELS = [ - "/gpfswork/rech/six/commun/models/sentence-transformers_all-MiniLM-L12-v2", - "/gpfswork/rech/six/commun/models/sentence-transformers_allenai-specter", -] - -TASKS = [ - "STS15", -] - -class SentenceTransformerSpecb(SentenceTransformer): - # Requires: - # https://github.com/Muennighoff/sentence-transformers/tree/sgpt_poolings_specb - # pip install git+https://github.com/Muennighoff/sentence-transformers.git@sgpt_poolings_specb - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - tokens = ["[SOS]", "{SOS}"] - self._first_module().tokenizer.add_tokens(tokens, special_tokens=True) - self._first_module().auto_model.resize_token_embeddings(len(self._first_module().tokenizer)) - # Will be replaced with the rep tokens in the model ones - # The problem is we don't know if a text is query or document when tokenizing in the Transformer.py module, - # so we use the SOS tokens as an identifier if we have a query or document at hand & then replace them - # If we would directly use the brackets here, they may become part of another token - self._first_module().bos_spec_token_q = self._first_module().tokenizer.encode("[SOS]", add_special_tokens=False)[0] - self._first_module().bos_spec_token_d = self._first_module().tokenizer.encode("{SOS}", add_special_tokens=False)[0] - self._first_module().bos_spec_token_q_rep = self._first_module().tokenizer.encode("[", add_special_tokens=False)[0] - self._first_module().eos_spec_token_q = self._first_module().tokenizer.encode("]", add_special_tokens=False)[0] - self._first_module().bos_spec_token_d_rep = self._first_module().tokenizer.encode("{", add_special_tokens=False)[0] - self._first_module().eos_spec_token_d = self._first_module().tokenizer.encode("}", add_special_tokens=False)[0] - self._first_module().replace_bos = True - - def encode(self, sentences, **kwargs): - """Returns a list of embeddings for the given sentences. - Args: - sentences (`List[str]`): List of sentences to encode - batch_size (`int`): Batch size for the encoding - - Returns: - `List[np.ndarray]` or `List[tensor]`: List of embeddings for the given sentences - """ - # Add specb query token - sentences = ["[SOS]" + sent for sent in sentences] - return super().encode(sentences, **kwargs) - -class SimCSEWrapper: - def __init__(self, modelpath="princeton-nlp/sup-simcse-bert-base-uncased"): - self.device = "cuda" if torch.cuda.is_available() else "cpu" - self.tokenizer = AutoTokenizer.from_pretrained(modelpath) - self.model = AutoModel.from_pretrained(modelpath).to(self.device) - self.model.eval() - - def encode(self, sentences, batch_size=32, **kwargs): - """ Returns a list of embeddings for the given sentences. - Args: - sentences (`List[str]`): List of sentences to encode - batch_size (`int`): Batch size for the encoding - - Returns: - `List[np.ndarray]` or `List[tensor]`: List of embeddings for the given sentences - """ - all_embeddings = [] - length_sorted_idx = np.argsort([len(sen) for sen in sentences]) - sentences_sorted = [sentences[idx] for idx in length_sorted_idx] - - for start_index in range(0, len(sentences), batch_size): - sentences_batch = sentences_sorted[start_index:start_index+batch_size] - inputs = self.tokenizer(sentences_batch, padding=True, truncation=True, return_tensors="pt") - inputs = {k: v.to(self.device) for k,v in inputs.items()} - # Get the embeddings - with torch.no_grad(): - embeddings = self.model(**inputs, output_hidden_states=True, return_dict=True).pooler_output - all_embeddings.extend(embeddings.cpu().numpy()) - all_embeddings = [all_embeddings[idx] for idx in np.argsort(length_sorted_idx)] - return all_embeddings - -class LASER(): - def encode(self, sentences, batch_size=32, **kwargs): - """ - Returns a list of embeddings for the given sentences. - Args: - sentences (`List[str]`): List of sentences to encode - batch_size (`int`): Batch size for the encoding - - Returns: - `List[np.ndarray]` or `List[tensor]`: List of embeddings for the given sentences - """ - if os.path.exists("tmp.txt"): - os.remove("tmp.txt") - if os.path.exists("tmp.bin"): - os.remove("tmp.bin") - - # LASER expects one text per line, so we need to replace newlines - sentences = [s.replace("\n", " ") for s in sentences] - with open("tmp.txt", "w") as f: - f.write("\n".join(sentences)) - - rc = subprocess.call("/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/mteb/LASER/LASER_script.sh", shell=True) - - dim = 1024 - X = np.fromfile("tmp.bin", dtype=np.float32, count=-1) - X.resize(X.shape[0] // dim, dim) - print(X.shape) - return X - - -def parse_args(): - # Parse command line arguments - parser = argparse.ArgumentParser() - parser.add_argument("--lang", type=str, default="en") - parser.add_argument("--batchsize", type=int, default=32) - args = parser.parse_args() - return args - -def main(args): - - out = {} - for model_name in MODELS: - if ("sgpt" in model_name.lower()) and ("msmarco" in model_name.lower()): - model = SentenceTransformerSpecb(model_name) # Only used for SGPT-msmarco models - elif "simcse" in model_name.lower(): - model = SimCSEWrapper(model_name) - elif "LASER2" == model_name: - model = LASER() - else: - model = SentenceTransformer(model_name) - - evaluation = MTEB(tasks=TASKS, task_langs=[args.lang]) - model_name = model_name.split("/")[-1].split("_")[-1] - for task, task_name in zip(evaluation.tasks, TASKS): - task.load_data() - - # Encode all with the same batch size for a fair comparison of speed / sentence - data = task.dataset["test"]["sentence1"] + task.dataset["test"]["sentence2"] - data_len = len(data) - # Warmup run to build py caches etc - embeddings = np.asarray(model.encode(data, batch_size=args.batchsize)) - tick = time.time() - embeddings = np.asarray(model.encode(data, batch_size=args.batchsize)) - tock = time.time() - - out.setdefault(model_name, {}) - out[model_name].setdefault(task_name, {}) - out[model_name][task_name]["speed_ms"] = ((tock - tick) / data_len) * 1000 - out[model_name][task_name]["embedding_size_kb"] = embeddings.nbytes / data_len / 1000 - - # Overwrite every iteration for intermed results - with open("benchmark.json", "w") as f: - json.dump(out, f) - - -if __name__ == "__main__": - args = parse_args() - main(args) diff --git a/script_mteb_french/README.md b/script_mteb_french/README.md deleted file mode 100644 index 03e6d2cf..00000000 --- a/script_mteb_french/README.md +++ /dev/null @@ -1,70 +0,0 @@ -# Scripts to run the French MTEB benchmark - -This folder contains the scripts used to generate the French tab results on the [MTEB](https://github.com/embeddings-benchmark/mteb) benchmark. - -Below are instructions to run the main scripts. - -## Benchmark - -### Running on host using venv - -* Navigate to the repository root folder -* Create your virtual env: - -```bash -python3 -m venv .venv -``` -* Activate it and install the requirements: -```bash -source .venv/bin/activate -pip install -r requirements.txt -``` -* Run the benchmark: -```bash -cd script_mteb_french -python run_benchmark.py -``` - -By default the benchmark runs on sentence_transformer models but you can specify the type with the argument `--model_type`: -```bash -# default ['sentence_transformer'] -python run_benchmark.py -# choosing other type ['voyage_ai'] -python run_benchmark.py --model_type voyage_ai -# running on two types ['voyage_ai', 'sentence_transformer'] -python run_benchmark.py --model_type voyage_ai sentence_transformer -``` - -You can also run the benchmark on one model only by specifying `--model_name`: -```bash -# default ['sentence_transformer'] -> all models of this type -python run_benchmark.py -# running on one model 'camembert-base' -python run_benchmark.py --model_type sentence_transformer --model_name "xlm-roberta-base" -``` -Note that the `model_name` should be included in models of specified `model_type`. - -You can run the benchmark on one task type in ["all", "classification", "clustering", "reranking", "retrieval", "pair_classification", "sts", "summarization", "bitextmining"], default is set to "all" and will run all tasks : -```bash -# running 'sentence_transformer' models on 'classification' task -python run_benchmark.py --model_type sentence_transformer --task_type classification -``` - -## Running using Docker - -* Navigate to the repository root folder -* Build the docker image: -```bash -docker build -t mtebscripts_image . -``` -* Run the benchmark in the container as follows: -``` -docker run -v $(pwd):/mtebscripts mtebscripts_image sh -c "cd script_mteb_french && python run_benchmark.py" -``` -If you want to use the gpu, make sure to add the `--gpus` option to your run command, or `--runtime=nvidia` if you are using an older version of docker. - -Note: Because the volume is shared between the host and the container, the results will be available in the host at the end. - -## Models' characteristics - -Additionnaly, you can find a script `get_model_specs.py` to compute models' characteristics (size, number of params, embeddings dimension). You can run it similarly to the benchmark by substituting `run_benchmark.py` with `get_model_specs.py`. diff --git a/script_mteb_french/results_analysis/__init__.py b/script_mteb_french/results_analysis/__init__.py deleted file mode 100644 index 11e8bfb7..00000000 --- a/script_mteb_french/results_analysis/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .results_parser import ResultsParser \ No newline at end of file diff --git a/slurmscripts/run_array_8a100_st5xxl.slurm b/slurmscripts/run_array_8a100_st5xxl.slurm deleted file mode 100644 index a6b37b60..00000000 --- a/slurmscripts/run_array_8a100_st5xxl.slurm +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-a100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:8 -#SBATCH --cpus-per-task=64 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@a100 -#SBATCH --reservation=hug -#SBATCH --constraint=a100 -#SBATCH --partition=gpu_p5 - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_sentence-t5-xxl diff --git a/slurmscripts/run_array_a100_100_gtrxxl.slurm b/slurmscripts/run_array_a100_100_gtrxxl.slurm deleted file mode 100644 index 02a73c0c..00000000 --- a/slurmscripts/run_array_a100_100_gtrxxl.slurm +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-a100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=8 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time 100:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@a100 -#SBATCH --reservation=hug -#SBATCH --constraint=a100 -#SBATCH --partition=gpu_p5 -#SBATCH --qos=qos_gpu-gc # up to 100h - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_gtr-t5-xxl diff --git a/slurmscripts/run_array_a100_100_sgpt5b8_asym.slurm b/slurmscripts/run_array_a100_100_sgpt5b8_asym.slurm deleted file mode 100644 index faa0fe98..00000000 --- a/slurmscripts/run_array_a100_100_sgpt5b8_asym.slurm +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-a100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=8 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time 100:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@a100 -#SBATCH --reservation=hug -#SBATCH --constraint=a100 -#SBATCH --partition=gpu_p5 -#SBATCH --qos=qos_gpu-gc # up to 100h - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/Muennighoff_SGPT-5.8B-weightedmean-msmarco-specb-bitfit \ - --addspecbquery diff --git a/slurmscripts/run_array_a100_100_sgpt5b8_asym_specb.slurm b/slurmscripts/run_array_a100_100_sgpt5b8_asym_specb.slurm deleted file mode 100644 index 1c3dd343..00000000 --- a/slurmscripts/run_array_a100_100_sgpt5b8_asym_specb.slurm +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-a100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=8 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time 80:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@a100 -#SBATCH --reservation=hug -#SBATCH --constraint=a100 -#SBATCH --partition=gpu_p5 -#SBATCH --qos=qos_gpu-gc # up to 100h - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array_sgpt.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/Muennighoff_SGPT-5.8B-weightedmean-msmarco-specb-bitfit \ - --addspecbquery diff --git a/slurmscripts/run_array_a100_100_sgpt5b8_sym.slurm b/slurmscripts/run_array_a100_100_sgpt5b8_sym.slurm deleted file mode 100644 index b667f34d..00000000 --- a/slurmscripts/run_array_a100_100_sgpt5b8_sym.slurm +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-a100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=8 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time 100:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@a100 -#SBATCH --reservation=hug -#SBATCH --constraint=a100 -#SBATCH --partition=gpu_p5 -#SBATCH --qos=qos_gpu-gc # up to 100h - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/Muennighoff_SGPT-5.8B-weightedmean-nli-bitfit diff --git a/slurmscripts/run_array_a100_100_st5xxl.slurm b/slurmscripts/run_array_a100_100_st5xxl.slurm deleted file mode 100644 index 5982a42b..00000000 --- a/slurmscripts/run_array_a100_100_st5xxl.slurm +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-a100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=8 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time 100:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@a100 -#SBATCH --reservation=hug -#SBATCH --constraint=a100 -#SBATCH --partition=gpu_p5 -#SBATCH --qos=qos_gpu-gc # up to 100h - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_sentence-t5-xxl diff --git a/slurmscripts/run_array_a100_sgpt5b8_sym.slurm b/slurmscripts/run_array_a100_sgpt5b8_sym.slurm deleted file mode 100644 index 98bfb93f..00000000 --- a/slurmscripts/run_array_a100_sgpt5b8_sym.slurm +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-a100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=8 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@a100 -#SBATCH --reservation=hug -#SBATCH --constraint=a100 -#SBATCH --partition=gpu_p5 - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/Muennighoff_SGPT-5.8B-weightedmean-nli-bitfit diff --git a/slurmscripts/run_array_a100_st5xxl.slurm b/slurmscripts/run_array_a100_st5xxl.slurm deleted file mode 100644 index d8edab6e..00000000 --- a/slurmscripts/run_array_a100_st5xxl.slurm +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-a100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=8 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@a100 -#SBATCH --reservation=hug -#SBATCH --constraint=a100 -#SBATCH --partition=gpu_p5 - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_sentence-t5-xxl diff --git a/slurmscripts/run_array_ada.slurm b/slurmscripts/run_array_ada.slurm deleted file mode 100644 index 2581afe9..00000000 --- a/slurmscripts/run_array_ada.slurm +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=ada -#SBATCH --partition=prepost -#SBATCH --nodes=1 -#SBATCH --ntasks-per-node=1 # crucial - only 1 task per dist per node! -#SBATCH --cpus-per-task=10 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time 20:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@cpu - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/mteb - -NUM_TASKS_PER_JOB=10 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python run_array_openai.py \ - --startid $TASK_START \ - --endid $TASK_END diff --git a/slurmscripts/run_array_v10032_bert.slurm b/slurmscripts/run_array_v10032_bert.slurm deleted file mode 100644 index 46a2c11f..00000000 --- a/slurmscripts/run_array_v10032_bert.slurm +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-v100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=10 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@gpu -#SBATCH -C v100-32g - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/bert-base-uncased diff --git a/slurmscripts/run_array_v10032_cocondensor.slurm b/slurmscripts/run_array_v10032_cocondensor.slurm deleted file mode 100644 index a08fbc51..00000000 --- a/slurmscripts/run_array_v10032_cocondensor.slurm +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-v100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=10 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --qos=qos_gpu-t3 -#SBATCH -C v100-32g -#SBATCH --time 20:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@gpu - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_msmarco-bert-co-condensor diff --git a/slurmscripts/run_array_v10032_contriever.slurm b/slurmscripts/run_array_v10032_contriever.slurm deleted file mode 100644 index 26d5045b..00000000 --- a/slurmscripts/run_array_v10032_contriever.slurm +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-v100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=10 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --qos=qos_gpu-t3 -#SBATCH -C v100-32g -#SBATCH --time 20:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@gpu - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/nthakur_contriever-base-msmarco diff --git a/slurmscripts/run_array_v10032_gtr.slurm b/slurmscripts/run_array_v10032_gtr.slurm deleted file mode 100644 index 2cd303ff..00000000 --- a/slurmscripts/run_array_v10032_gtr.slurm +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-v100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=10 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH -C v100-32g -#SBATCH --time 10:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@gpu - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_gtr-t5-base diff --git a/slurmscripts/run_array_v10032_st5.slurm b/slurmscripts/run_array_v10032_st5.slurm deleted file mode 100644 index 6c476c45..00000000 --- a/slurmscripts/run_array_v10032_st5.slurm +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-v100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:4 -#SBATCH --cpus-per-task=40 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --qos=qos_gpu-t3 -#SBATCH -C v100-32g -#SBATCH --time 20:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@gpu - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS=19 -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_sentence-t5-base diff --git a/slurmscripts/run_array_v100_glove.slurm b/slurmscripts/run_array_v100_glove.slurm deleted file mode 100644 index 4ed2a08a..00000000 --- a/slurmscripts/run_array_v100_glove.slurm +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-v100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=10 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@gpu - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_average_word_embeddings_glove.6B.300d diff --git a/slurmscripts/run_array_v100_internet_sgpt125m_sym.slurm b/slurmscripts/run_array_v100_internet_sgpt125m_sym.slurm deleted file mode 100644 index 8050ee48..00000000 --- a/slurmscripts/run_array_v100_internet_sgpt125m_sym.slurm +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-mteb-125M # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --cpus-per-task=10 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@cpu -#SBATCH --partition=prepost - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS=54 -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-nli-bitfit diff --git a/slurmscripts/run_array_v100_komninos.slurm b/slurmscripts/run_array_v100_komninos.slurm deleted file mode 100644 index f3f0753f..00000000 --- a/slurmscripts/run_array_v100_komninos.slurm +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-v100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=10 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@gpu - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_average_word_embeddings_komninos diff --git a/slurmscripts/run_array_v100_labse.slurm b/slurmscripts/run_array_v100_labse.slurm deleted file mode 100644 index 2a9a9a34..00000000 --- a/slurmscripts/run_array_v100_labse.slurm +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-v100-labse # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=10 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@gpu -#SBATCH -C v100-32g - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --taskname BUCC \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_LaBSE diff --git a/slurmscripts/run_array_v100_laser.slurm b/slurmscripts/run_array_v100_laser.slurm deleted file mode 100644 index 608ca482..00000000 --- a/slurmscripts/run_array_v100_laser.slurm +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-v100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=10 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@gpu - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array_laser.py \ - --startid $TASK_START \ - --endid $TASK_END diff --git a/slurmscripts/run_array_v100_minilm.slurm b/slurmscripts/run_array_v100_minilm.slurm deleted file mode 100644 index 6b600bf1..00000000 --- a/slurmscripts/run_array_v100_minilm.slurm +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-v100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=10 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@gpu - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_all-MiniLM-L6-v2 diff --git a/slurmscripts/run_array_v100_mpnet.slurm b/slurmscripts/run_array_v100_mpnet.slurm deleted file mode 100644 index 1b779454..00000000 --- a/slurmscripts/run_array_v100_mpnet.slurm +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-v100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=10 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@gpu - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_all-mpnet-base-v2 diff --git a/slurmscripts/run_array_v100_multimini.slurm b/slurmscripts/run_array_v100_multimini.slurm deleted file mode 100644 index 7044300a..00000000 --- a/slurmscripts/run_array_v100_multimini.slurm +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-v100-multimini # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=10 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@gpu -#SBATCH -C v100-32g - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --taskname BUCC \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2 diff --git a/slurmscripts/run_array_v100_multimpnet.slurm b/slurmscripts/run_array_v100_multimpnet.slurm deleted file mode 100644 index 0e1098fb..00000000 --- a/slurmscripts/run_array_v100_multimpnet.slurm +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-v100-multimpnet # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=10 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@gpu -#SBATCH -C v100-32g - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --taskname BUCC \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_paraphrase-multilingual-mpnet-base-v2 diff --git a/slurmscripts/run_array_v100_sgpt125m_asym.slurm b/slurmscripts/run_array_v100_sgpt125m_asym.slurm deleted file mode 100644 index b6352cb9..00000000 --- a/slurmscripts/run_array_v100_sgpt125m_asym.slurm +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-v100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=10 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@gpu - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS=20 -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array_sgpt.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/Muennighoff_SGPT-125M-weightedmean-msmarco-specb-bitfit \ - --addspecbquery diff --git a/slurmscripts/run_array_v100_sgpt1b3_sym.slurm b/slurmscripts/run_array_v100_sgpt1b3_sym.slurm deleted file mode 100644 index a1726be7..00000000 --- a/slurmscripts/run_array_v100_sgpt1b3_sym.slurm +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-v100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=10 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@gpu -#SBATCH -C v100-32g - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --lang zh \ - --modelpath /gpfswork/rech/six/commun/models/bigscience-catalogue-lm-data_sgpt-nli-bloom-1b3 diff --git a/slurmscripts/run_array_v100_simcsesup.slurm b/slurmscripts/run_array_v100_simcsesup.slurm deleted file mode 100644 index c260c907..00000000 --- a/slurmscripts/run_array_v100_simcsesup.slurm +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-v100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=10 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@gpu - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array_simcse.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/princeton-nlp_sup-simcse-bert-base-uncased - -echo "END TIME: $(date)" diff --git a/slurmscripts/run_array_v100_simcseunsup.slurm b/slurmscripts/run_array_v100_simcseunsup.slurm deleted file mode 100644 index 023af0b2..00000000 --- a/slurmscripts/run_array_v100_simcseunsup.slurm +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-v100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=10 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@gpu - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array_simcse.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/princeton-nlp_unsup-simcse-bert-base-uncased diff --git a/slurmscripts/run_array_v100_st5.slurm b/slurmscripts/run_array_v100_st5.slurm deleted file mode 100644 index 6f2af715..00000000 --- a/slurmscripts/run_array_v100_st5.slurm +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=run-array-v100 # job name -#SBATCH --ntasks=1 # number of MP tasks -#SBATCH --nodes=1 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=10 # number of cores per tasks -#SBATCH --hint=nomultithread # we get physical cores not logical -#SBATCH --time=10:00:00 # maximum execution time (HH:MM:SS) -#SBATCH --output=%x-%j.out # output file name -#SBATCH --account=six@gpu - -set -x -e - -source $six_ALL_CCFRWORK/start-prod -conda activate muennighoffmtb - -echo "START TIME: $(date)" - -cd /gpfsscratch/rech/six/commun/commun/experiments/muennighoff/ - -NUM_TASKS=19 -NUM_TASKS_PER_JOB=1 - -TASK_START=$(expr $SLURM_ARRAY_TASK_ID \* $NUM_TASKS_PER_JOB ) -TASK_END=$(expr $TASK_START + $NUM_TASKS_PER_JOB ) - -python mteb/run_array.py \ - --startid $TASK_START \ - --endid $TASK_END \ - --modelpath /gpfswork/rech/six/commun/models/sentence-transformers_sentence-t5-base diff --git a/script_mteb_french/results_analysis/README.md b/tools/README.md similarity index 91% rename from script_mteb_french/results_analysis/README.md rename to tools/README.md index 28e9990c..6a9da674 100644 --- a/script_mteb_french/results_analysis/README.md +++ b/tools/README.md @@ -6,11 +6,11 @@ Before starting, you can create your environment using the packages listed in *r ### result_parser.py -This scripts intent is to ***format results from json files in the results folder to a table*** (csv, excel or latex). +This script's intent is to ***format results from json files in the results folder to a table*** (csv, excel or latex). #### Usage -You can use the class ResultParser like so: +You can use the class ``ResultParser`` like so: ```py from results_analysis.results_parser import ResultParser @@ -30,7 +30,7 @@ rp = ResultParser() results_df = rp(RESULT_FOLDER_PATH, output_format="latex", apply_style=True) ``` -Alternatively, you can use a command line : +Alternatively, you can use a command line: ``` python .\script_mteb_french\results_analysis\results_parser.py --results_folder ./results --output_format csv ``` diff --git a/tools/analysis_tools/__init__.py b/tools/analysis_tools/__init__.py new file mode 100644 index 00000000..e6a677d2 --- /dev/null +++ b/tools/analysis_tools/__init__.py @@ -0,0 +1 @@ +from ...tools.results_parser import ResultsParser \ No newline at end of file diff --git a/script_mteb_french/results_analysis/dataset_correlation.py b/tools/analysis_tools/dataset_correlation.py similarity index 96% rename from script_mteb_french/results_analysis/dataset_correlation.py rename to tools/analysis_tools/dataset_correlation.py index 137e1be4..c2afdaa5 100644 --- a/script_mteb_french/results_analysis/dataset_correlation.py +++ b/tools/analysis_tools/dataset_correlation.py @@ -4,7 +4,7 @@ import seaborn as sns import numpy as np -from results_parser import ResultsParser +from mtebscripts.tools.results_parser import ResultsParser def parse_args() -> Namespace: @@ -15,7 +15,7 @@ def parse_args() -> Namespace: """ parser = ArgumentParser() parser.add_argument("--results_folder", required=True, type=str) - parser.add_argument("--output_folder", type=str, default="./analyses_outputs/results_correlations") + parser.add_argument("--output_folder", type=str, default="./analysis_outputs/results_correlations") parser.add_argument( "--output_format", type=str, diff --git a/script_mteb_french/results_analysis/datasets_similarity.py b/tools/analysis_tools/datasets_similarity.py similarity index 99% rename from script_mteb_french/results_analysis/datasets_similarity.py rename to tools/analysis_tools/datasets_similarity.py index c7c2e95f..9af5cb93 100644 --- a/script_mteb_french/results_analysis/datasets_similarity.py +++ b/tools/analysis_tools/datasets_similarity.py @@ -169,7 +169,7 @@ def parse_args() -> Namespace: parser = ArgumentParser() parser.add_argument("--task_type", type=str, default="all") parser.add_argument("--langs", type=list[str], default=["fr"]) - parser.add_argument("--output_folder", type=str, default="./analyses_outputs/datasets_similarity") + parser.add_argument("--output_folder", type=str, default="./analysis_outputs/datasets_similarity") parser.add_argument("--model_name", type=str, default="intfloat/multilingual-e5-large") parser.add_argument("--seed", type=int, default=42) parser.add_argument("--n_samples", type=int, default=90) diff --git a/script_mteb_french/results_analysis/performance_vs_characteristics.py b/tools/analysis_tools/performance_vs_characteristics.py similarity index 97% rename from script_mteb_french/results_analysis/performance_vs_characteristics.py rename to tools/analysis_tools/performance_vs_characteristics.py index 979598a2..458a1bd1 100644 --- a/script_mteb_french/results_analysis/performance_vs_characteristics.py +++ b/tools/analysis_tools/performance_vs_characteristics.py @@ -4,7 +4,7 @@ import os from argparse import ArgumentParser, Namespace -from results_parser import ResultsParser +from mtebscripts.tools.results_parser import ResultsParser import numpy as np # model,pretrained_or_tuned,multilingual_or_french,number_params,size_gb,seq_len,embedding_dim,model_type,license @@ -60,7 +60,7 @@ def parse_args() -> Namespace: parser.add_argument( "--output_folder", type=str, - default="./analyses_outputs/performance_vs_characteristics", + default="./analysis_outputs/performance_vs_characteristics", ) parser.add_argument( "--output_format", diff --git a/script_mteb_french/results_analysis/results_parser.py b/tools/analysis_tools/results_parser.py similarity index 99% rename from script_mteb_french/results_analysis/results_parser.py rename to tools/analysis_tools/results_parser.py index 95837835..8e56cf4e 100644 --- a/script_mteb_french/results_analysis/results_parser.py +++ b/tools/analysis_tools/results_parser.py @@ -262,7 +262,7 @@ def parse_args() -> Namespace: parser.add_argument("--results_folder", required=True, type=str) parser.add_argument("--output_format", type=str, choices=["excel", "csv", "latex"], default="excel") parser.add_argument("--apply_style", type=bool, default=True) - parser.add_argument("--output_folder", type=str, default="./analyses_outputs/") + parser.add_argument("--output_folder", type=str, default="./analysis_outputs/") args = parser.parse_args() return args diff --git a/script_mteb_french/results_analysis/statistical_tests.py b/tools/analysis_tools/statistical_tests.py similarity index 98% rename from script_mteb_french/results_analysis/statistical_tests.py rename to tools/analysis_tools/statistical_tests.py index 4030075b..2a590481 100644 --- a/script_mteb_french/results_analysis/statistical_tests.py +++ b/tools/analysis_tools/statistical_tests.py @@ -19,7 +19,7 @@ def parse_args() -> Namespace: parser.add_argument( "--output_folder", type=str, - default="./analyses_outputs/statistical_tests", + default="./analysis_outputs/statistical_tests", ) parser.add_argument( "--output_format", diff --git a/script_mteb_french/estimate_evaluation_cost.py b/tools/model_tools/estimate_evaluation_cost.py similarity index 100% rename from script_mteb_french/estimate_evaluation_cost.py rename to tools/model_tools/estimate_evaluation_cost.py diff --git a/script_mteb_french/get_model_specs.py b/tools/model_tools/get_model_specs.py similarity index 98% rename from script_mteb_french/get_model_specs.py rename to tools/model_tools/get_model_specs.py index a8469597..a2edf905 100644 --- a/script_mteb_french/get_model_specs.py +++ b/tools/model_tools/get_model_specs.py @@ -4,7 +4,7 @@ import os from huggingface_hub import HfFileSystem -import model_spec_utils +import mtebscripts.tools.model_tools.model_spec_utils as model_spec_utils from run_benchmark import TYPES_TO_MODELS import pandas as pd diff --git a/script_mteb_french/model_spec_utils.py b/tools/model_tools/model_spec_utils.py similarity index 100% rename from script_mteb_french/model_spec_utils.py rename to tools/model_tools/model_spec_utils.py diff --git a/script_mteb_french/preload_models.py b/tools/preload_models.py similarity index 100% rename from script_mteb_french/preload_models.py rename to tools/preload_models.py diff --git a/script_mteb_french/preload_tasks.py b/tools/preload_tasks.py similarity index 100% rename from script_mteb_french/preload_tasks.py rename to tools/preload_tasks.py diff --git a/script_mteb_french/run_benchmark.py b/tools/run_benchmark.py similarity index 70% rename from script_mteb_french/run_benchmark.py rename to tools/run_benchmark.py index febe880a..9b96278c 100644 --- a/script_mteb_french/run_benchmark.py +++ b/tools/run_benchmark.py @@ -6,6 +6,7 @@ from src.ModelConfig import ModelConfig from utils.tasks_list import get_tasks +from utils.models_list import TYPES_TO_MODELS, SENTENCE_TRANSORMER_MODELS_WITH_ERRORS logging.basicConfig( stream=sys.stdout, @@ -29,83 +30,9 @@ Example: MODELS = [ModelConfig("intfloat/multilingual-e5-base", model_type="sentence_transformer")] """ -############################# -# Step 1 : Setup model list # -############################# -SENTENCE_TRANSORMER_MODELS = [ - "bert-base-multilingual-cased", - "bert-base-multilingual-uncased", - "flaubert/flaubert_base_uncased", - "flaubert/flaubert_base_cased", - "flaubert/flaubert_large_cased", - "dangvantuan/sentence-camembert-base", - "sentence-transformers/distiluse-base-multilingual-cased-v2", - "sentence-transformers/all-MiniLM-L6-v2", - "sentence-transformers/all-MiniLM-L12-v2", - "sentence-transformers/LaBSE", - "sentence-transformers/multi-qa-MiniLM-L6-cos-v1", - "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", - "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", - "intfloat/multilingual-e5-base", - "intfloat/multilingual-e5-large", - "intfloat/multilingual-e5-small", - "distilbert-base-uncased", - "Geotrend/distilbert-base-25lang-cased", - "Geotrend/distilbert-base-en-fr-es-pt-it-cased", - "Geotrend/distilbert-base-en-fr-cased", - "Geotrend/distilbert-base-fr-cased", - "Geotrend/bert-base-25lang-cased", - "Geotrend/bert-base-15lang-cased", - "Geotrend/bert-base-10lang-cased", - "shibing624/text2vec-base-multilingual", - "izhx/udever-bloom-560m", - "izhx/udever-bloom-1b1", - "sentence-transformers/sentence-t5-base", - "sentence-transformers/sentence-t5-large", - "sentence-transformers/sentence-t5-xl", - "sentence-transformers/sentence-t5-xxl", - "intfloat/e5-mistral-7b-instruct", - "Wissam42/sentence-croissant-llm-base" -] - -# these models max_length is indicated to be 514 whereas the embedding layer actually supports 512 -SENTENCE_TRANSORMER_MODELS_WITH_ERRORS = [ - "camembert/camembert-base", - "camembert/camembert-large", - "dangvantuan/sentence-camembert-large", - "xlm-roberta-base", - "xlm-roberta-large", -] - -UNIVERSAL_SENTENCE_ENCODER_MODELS = [ - "vprelovac/universal-sentence-encoder-multilingual-3", - "vprelovac/universal-sentence-encoder-multilingual-large-3", -] - -LASER_MODELS = ["laser2"] - -VOYAGE_MODELS = ["voyage-2", "voyage-code-2"] - -OPEN_AI_MODELS = ["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"] - -COHERE_MODELS = ["embed-multilingual-light-v3.0", "embed-multilingual-v3.0"] - -MISTRAL_MODELS = ["mistral-embed"] - -TYPES_TO_MODELS = { - "sentence_transformer": SENTENCE_TRANSORMER_MODELS - + SENTENCE_TRANSORMER_MODELS_WITH_ERRORS, - "universal_sentence_encoder": UNIVERSAL_SENTENCE_ENCODER_MODELS, - "laser": LASER_MODELS, - "voyage_ai": VOYAGE_MODELS, - "open_ai": OPEN_AI_MODELS, - "cohere": COHERE_MODELS, - "mistral_ai": MISTRAL_MODELS, - -} ########################## -# Step 3 : Run benchmark # +# Step : Run benchmark # ########################## diff --git a/script_mteb_french/utils/__init__.py b/tools/utils/__init__.py similarity index 100% rename from script_mteb_french/utils/__init__.py rename to tools/utils/__init__.py diff --git a/tools/utils/models_list.py b/tools/utils/models_list.py new file mode 100644 index 00000000..26fdd926 --- /dev/null +++ b/tools/utils/models_list.py @@ -0,0 +1,71 @@ +SENTENCE_TRANSORMER_MODELS = [ + "bert-base-multilingual-cased", + "bert-base-multilingual-uncased", + "flaubert/flaubert_base_uncased", + "flaubert/flaubert_base_cased", + "flaubert/flaubert_large_cased", + "dangvantuan/sentence-camembert-base", + "sentence-transformers/distiluse-base-multilingual-cased-v2", + "sentence-transformers/all-MiniLM-L6-v2", + "sentence-transformers/all-MiniLM-L12-v2", + "sentence-transformers/LaBSE", + "sentence-transformers/multi-qa-MiniLM-L6-cos-v1", + "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", + "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", + "intfloat/multilingual-e5-base", + "intfloat/multilingual-e5-large", + "intfloat/multilingual-e5-small", + "distilbert-base-uncased", + "Geotrend/distilbert-base-25lang-cased", + "Geotrend/distilbert-base-en-fr-es-pt-it-cased", + "Geotrend/distilbert-base-en-fr-cased", + "Geotrend/distilbert-base-fr-cased", + "Geotrend/bert-base-25lang-cased", + "Geotrend/bert-base-15lang-cased", + "Geotrend/bert-base-10lang-cased", + "shibing624/text2vec-base-multilingual", + "izhx/udever-bloom-560m", + "izhx/udever-bloom-1b1", + "sentence-transformers/sentence-t5-base", + "sentence-transformers/sentence-t5-large", + "sentence-transformers/sentence-t5-xl", + "sentence-transformers/sentence-t5-xxl", + "intfloat/e5-mistral-7b-instruct", + "Wissam42/sentence-croissant-llm-base" +] + +# these models max_length is indicated to be 514 whereas the embedding layer actually supports 512 +SENTENCE_TRANSORMER_MODELS_WITH_ERRORS = [ + "camembert/camembert-base", + "camembert/camembert-large", + "dangvantuan/sentence-camembert-large", + "xlm-roberta-base", + "xlm-roberta-large", +] + +UNIVERSAL_SENTENCE_ENCODER_MODELS = [ + "vprelovac/universal-sentence-encoder-multilingual-3", + "vprelovac/universal-sentence-encoder-multilingual-large-3", +] +# TODO: use json file keys + +LASER_MODELS = ["laser2"] + +VOYAGE_MODELS = ["voyage-2", "voyage-code-2"] + +OPEN_AI_MODELS = ["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"] + +COHERE_MODELS = ["embed-multilingual-light-v3.0", "embed-multilingual-v3.0"] + +MISTRAL_MODELS = ["mistral-embed"] + +TYPES_TO_MODELS = { + "sentence_transformer": SENTENCE_TRANSORMER_MODELS + + SENTENCE_TRANSORMER_MODELS_WITH_ERRORS, + "universal_sentence_encoder": UNIVERSAL_SENTENCE_ENCODER_MODELS, + "laser": LASER_MODELS, + "voyage_ai": VOYAGE_MODELS, + "open_ai": OPEN_AI_MODELS, + "cohere": COHERE_MODELS, + "mistral_ai": MISTRAL_MODELS, +} diff --git a/script_mteb_french/utils/tasks_list.py b/tools/utils/tasks_list.py similarity index 100% rename from script_mteb_french/utils/tasks_list.py rename to tools/utils/tasks_list.py diff --git a/script_mteb_french/universal_sentence_encoder_models_paths.json b/tools/utils/universal_sentence_encoder_models_paths.json similarity index 100% rename from script_mteb_french/universal_sentence_encoder_models_paths.json rename to tools/utils/universal_sentence_encoder_models_paths.json