diff --git a/mteb_fr_avg_perfromance_clean.csv b/mteb_fr_avg_perfromance_clean.csv new file mode 100644 index 00000000..9f73746a --- /dev/null +++ b/mteb_fr_avg_perfromance_clean.csv @@ -0,0 +1,47 @@ +Rank,Model,Overall Avg.,BitextMining,Clustering,PairClassification,Summarization,Reranking,Classification,STS,Retrieval +1,text-embedding-ada-002,0.7,0.95,0.47,1,0.3,0.9,0.69,0.78,0.46 +2,sentence-t5-xxl,0.66,0.94,0.4,1,0.3,0.77,0.67,0.78,0.43 +3,voyage-code-2,0.66,0.86,0.45,1,0.28,0.79,0.67,0.78,0.45 +4,multilingual-e5-large,0.65,0.95,0.38,1,0.31,0.72,0.66,0.8,0.4 +5,embed-multilingual-v3.0,0.65,0.94,0.39,1,0.31,0.68,0.67,0.81,0.39 +6,multilingual-e5-base,0.65,0.95,0.39,1,0.31,0.72,0.65,0.78,0.38 +7,sentence-camembert-large,0.65,0.89,0.4,1,0.31,0.73,0.66,0.82,0.37 +8,sentence-t5-xl,0.65,0.91,0.4,1,0.32,0.73,0.65,0.77,0.38 +9,multilingual-e5-small,0.64,0.94,0.39,1,0.32,0.71,0.6,0.78,0.34 +10,paraphrase-multilingual-mpnet-base-v2,0.63,0.94,0.39,1,0.29,0.69,0.63,0.78,0.34 +11,embed-multilingual-light-v3.0,0.63,0.89,0.38,1,0.31,0.7,0.61,0.78,0.36 +12,sentence-croissant-llm-base,0.63,0.91,0.39,1,0.29,0.68,0.65,0.76,0.34 +13,universal-sentence-encoder-multilingual-3,0.63,0.94,0.4,1,0.28,0.65,0.64,0.75,0.34 +14,universal-sentence-encoder-multilingual-large-3,0.63,0.95,0.38,1,0.29,0.66,0.67,0.75,0.32 +15,sentence-t5-large,0.62,0.9,0.39,1,0.3,0.69,0.62,0.75,0.35 +16,voyage-2,0.62,0.76,0.41,1,0.31,0.73,0.59,0.72,0.4 +17,distiluse-base-multilingual-cased-v2,0.61,0.94,0.36,1,0.28,0.63,0.63,0.75,0.3 +18,LaBSE,0.61,0.96,0.36,1,0.3,0.61,0.65,0.74,0.23 +19,paraphrase-multilingual-MiniLM-L12-v2,0.61,0.92,0.37,1,0.29,0.62,0.6,0.75,0.3 +20,sentence-t5-base,0.6,0.83,0.38,1,0.3,0.64,0.58,0.74,0.3 +21,text2vec-base-multilingual,0.59,0.92,0.31,1,0.29,0.61,0.56,0.78,0.22 +22,sentence-camembert-base,0.58,0.72,0.32,1,0.29,0.64,0.57,0.78,0.29 +23,laser2,0.54,0.95,0.26,1,0.32,0.46,0.57,0.67,0.09 +24,e5-mistral-7b-instruct,0.52,0.37,0.39,1,0.32,0.62,0.58,0.65,0.23 +25,bert-base-multilingual-uncased,0.51,0.76,0.35,1,0.31,0.53,0.48,0.57,0.11 +26,all-MiniLM-L12-v2,0.51,0.48,0.3,1,0.27,0.57,0.52,0.66,0.3 +27,udever-bloom-1b1,0.5,0.52,0.35,1,0.29,0.51,0.55,0.62,0.16 +28,all-MiniLM-L6-v2,0.49,0.41,0.32,1,0.28,0.46,0.52,0.68,0.29 +29,multi-qa-MiniLM-L6-cos-v1,0.49,0.38,0.29,1,0.28,0.53,0.51,0.67,0.29 +30,bert-base-15lang-cased,0.48,0.75,0.33,1,0.29,0.45,0.46,0.5,0.05 +31,bert-base-10lang-cased,0.48,0.75,0.33,1,0.29,0.45,0.46,0.5,0.05 +32,bert-base-multilingual-cased,0.48,0.75,0.33,1,0.29,0.45,0.46,0.5,0.05 +33,bert-base-25lang-cased,0.48,0.75,0.33,1,0.29,0.45,0.46,0.5,0.05 +34,distilbert-base-en-fr-cased,0.47,0.65,0.34,1,0.31,0.42,0.45,0.54,0.06 +35,distilbert-base-en-fr-es-pt-it-cased,0.47,0.65,0.34,1,0.31,0.42,0.45,0.53,0.06 +36,distilbert-base-25lang-cased,0.47,0.65,0.34,1,0.31,0.42,0.45,0.53,0.06 +37,distilbert-base-fr-cased,0.45,0.45,0.34,1,0.31,0.42,0.45,0.54,0.06 +38,camembert-large,0.43,0.26,0.36,1,0.28,0.42,0.49,0.59,0.05 +39,xlm-roberta-base,0.4,0.48,0.25,1,0.29,0.35,0.31,0.51,0 +40,xlm-roberta-large,0.39,0.35,0.25,1,0.29,0.39,0.31,0.49,0.02 +41,camembert-base,0.39,0.19,0.29,1,0.3,0.33,0.42,0.57,0.02 +42,udever-bloom-560m,0.39,0.32,0.25,1,0.24,0.4,0.3,0.51,0.07 +43,flaubert_base_cased,0.38,0.23,0.23,1,0.31,0.45,0.25,0.52,0.06 +44,flaubert_base_uncased,0.35,0.12,0.18,1,0.29,0.46,0.23,0.43,0.06 +45,distilbert-base-uncased,0.33,0.04,0.23,1,0.31,0.35,0.32,0.39,0.02 +46,flaubert_large_cased,0.32,0.11,0.21,1,0.29,0.35,0.25,0.33,0.01 diff --git a/performance_analysis/avg_performance_overall_80.png b/performance_analysis/avg_performance_overall_80.png new file mode 100644 index 00000000..309480bd Binary files /dev/null and b/performance_analysis/avg_performance_overall_80.png differ diff --git a/performance_analysis/avg_performance_overall_90.png b/performance_analysis/avg_performance_overall_90.png new file mode 100644 index 00000000..1bc79f70 Binary files /dev/null and b/performance_analysis/avg_performance_overall_90.png differ diff --git a/performance_analysis/models_ranks_en_fr.csv b/performance_analysis/models_ranks_en_fr.csv new file mode 100644 index 00000000..63822ae1 --- /dev/null +++ b/performance_analysis/models_ranks_en_fr.csv @@ -0,0 +1,16 @@ +model,rank_fr,avg_perf_fr,rank_en,avg_perf_en +text-embedding-ada-002,1,69.51,4,60.99 +sentence-t5-xxl,2,66.26,6,59.51 +multilingual-e5-large,3,65.30,3,61.50 +embed-multilingual-v3.0,4,65.01,2,64.47 +multilingual-e5-base,5,64.62,7,59.45 +sentence-t5-xl,6,64.55,10,57.87 +multilingual-e5-small,7,63.52,9,57.87 +embed-multilingual-light-v3.0,8,62.91,5,60.08 +sentence-t5-large,9,62.48,11,57.06 +LaBSE,10,60.77,14,45.21 +sentence-t5-base,11,59.73,13,55.27 +laser2,12,53.95,15,34.95 +e5-mistral-7b-instruct,13,52.11,1,66.63 +udever-bloom-1b1,14,50.06,8,58.29 +udever-bloom-560m,15,38.55,12,55.81 \ No newline at end of file diff --git a/performance_analysis/mteb_fr_avg_perfromance.csv b/performance_analysis/mteb_fr_avg_perfromance.csv new file mode 100644 index 00000000..432c1f21 --- /dev/null +++ b/performance_analysis/mteb_fr_avg_perfromance.csv @@ -0,0 +1,47 @@ +rank,model,model_short,overall_avg,avg_BitextMining,avg_Clustering,avg_PairClassification,avg_Summarization,avg_Reranking,avg_Classification,avg_STS,avg_Retrieval +1,text-embedding-ada-002,text-embedding-ada-002,0.6951644355235754,0.9494668404389334,0.4695098049319921,1.0,0.3049962446901472,0.8986666666666667,0.6928530957176636,0.7830628317431997,0.46276 +2,sentence-transformers/sentence-t5-xxl,sentence-t5-xxl,0.6626105378757613,0.9428132611527825,0.40141444804587856,1.0,0.3039226775800043,0.7675599125789975,0.6736015215260864,0.7837158154556741,0.4278566666666667 +3,voyage-code-2,voyage-code-2,0.6607543354772605,0.8648817339979643,0.44772627976172225,1.0,0.2833835044555462,0.7877894580644871,0.674405741446389,0.7752412994253078,0.4526066666666666 +4,intfloat/multilingual-e5-large,multilingual-e5-large,0.6530770252450712,0.9494477963336697,0.38000952842494967,1.0,0.30919609149507166,0.7213476257110656,0.6647039061131861,0.8030279205492931,0.3968833333333333 +5,embed-multilingual-v3.0,embed-multilingual-v3.0,0.6501831721201174,0.9436180278908184,0.38710472703066007,1.0,0.31264009523973235,0.68364715235487,0.6708862541954806,0.812799120249379,0.39077 +6,intfloat/multilingual-e5-base,multilingual-e5-base,0.6462512097705435,0.9485477222576219,0.38545765042676977,1.0,0.3076006060850825,0.7176696947584695,0.6471235689941096,0.7832571023089624,0.3803533333333333 +7,dangvantuan/sentence-camembert-large,sentence-camembert-large,0.6461323149268167,0.8896500289986259,0.39601453569053924,1.0,0.3088268637624669,0.7288404268075016,0.6581381615866052,0.8173718359021275,0.3702166666666667 +8,sentence-transformers/sentence-t5-xl,sentence-t5-xl,0.6455782207682357,0.914194434666618,0.39973238961571084,1.0,0.31593902286506226,0.7318303641118233,0.6509082465602415,0.7732979749930968,0.3787233333333333 +9,intfloat/multilingual-e5-small,multilingual-e5-small,0.6352093923244534,0.9397273648926397,0.38660026421549304,1.0,0.31849046935173364,0.7143441730444517,0.6031649099555619,0.781857957135747,0.33749 +10,sentence-transformers/paraphrase-multilingual-mpnet-base-v2,paraphrase-multilingual-mpnet-base-v2,0.632789004844911,0.9351362799535213,0.38590408909583385,1.0,0.2946828386641493,0.6878437274635214,0.6339065747149032,0.7818485288673589,0.34298999999999996 +11,embed-multilingual-light-v3.0,embed-multilingual-light-v3.0,0.6291484133331817,0.8868288756037916,0.3763510620733425,1.0,0.314006702294,0.6981591823870812,0.6129809238696075,0.7826072271042985,0.3622533333333333 +12,Wissam42/sentence-croissant-llm-base,sentence-croissant-llm-base,0.627391358913071,0.9115055030186956,0.3901334861447883,1.0,0.2904259923005852,0.6794982333959555,0.645698371451514,0.7586659516596957,0.3432033333333333 +13,vprelovac/universal-sentence-encoder-multilingual-3,universal-sentence-encoder-multilingual-3,0.6265829443517389,0.9389235256726924,0.40302113821835384,1.0,0.28212107619806726,0.6503764883156413,0.6440286794825583,0.7491993135932651,0.3449933333333333 +14,vprelovac/universal-sentence-encoder-multilingual-large-3,universal-sentence-encoder-multilingual-large-3,0.6262365907145946,0.9462731316854489,0.38350292040664175,1.0,0.28557999486423186,0.6626326565349889,0.6705032602281474,0.7454940953306313,0.3159066666666666 +15,sentence-transformers/sentence-t5-large,sentence-t5-large,0.6248579629896693,0.8965230618449573,0.3944251705470086,1.0,0.30227656374127543,0.6887941209053229,0.6163331318590457,0.7514283216864115,0.34908333333333336 +16,voyage-2,voyage-2,0.6151915761090714,0.763619943373719,0.4065258527500973,1.0,0.3087952286335955,0.7309455175571458,0.5949259842992485,0.7181567489254315,0.3985633333333333 +17,sentence-transformers/distiluse-base-multilingual-cased-v2,distiluse-base-multilingual-cased-v2,0.612733038045636,0.9442576025375713,0.3568006274724116,1.0,0.28121115462556573,0.6327844297925111,0.6349029881952936,0.754650835075068,0.29725666666666667 +18,sentence-transformers/LaBSE,LaBSE,0.6077927900341175,0.9584800000509812,0.36423082609658525,1.0,0.30163745632848504,0.6139664534871745,0.6460218538714967,0.7433057304382169,0.23470000000000002 +19,sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2,paraphrase-multilingual-MiniLM-L12-v2,0.6070958258023563,0.9239648348401902,0.3689843152052393,1.0,0.2919931043966207,0.62020753052302,0.6008609074137077,0.7518192473734059,0.2989366666666667 +20,sentence-transformers/sentence-t5-base,sentence-t5-base,0.5973361564869367,0.8288174271503671,0.3787815338914756,1.0,0.30009114233509354,0.6408322841312224,0.5807320959427834,0.7448847684445514,0.30455000000000004 +21,shibing624/text2vec-base-multilingual,text2vec-base-multilingual,0.5861933242292041,0.9202347781942363,0.30844190733644167,1.0,0.2933489058303607,0.6088252146048312,0.5577811513544871,0.7827446365132761,0.21817 +22,dangvantuan/sentence-camembert-base,sentence-camembert-base,0.5757303623085768,0.7196562192123898,0.3209229378973184,1.0,0.2877116932900191,0.6421545445974397,0.5662851953794757,0.777855641425305,0.29125666666666666 +23,laser2,laser2,0.5395602370680996,0.9530804754795708,0.2588521302214146,1.0,0.3156475744412745,0.4561315484706734,0.5731661531610298,0.6667873481041676,0.09281666666666667 +24,intfloat/e5-mistral-7b-instruct,e5-mistral-7b-instruct,0.5211584106025406,0.372671620893762,0.39236855125870124,1.0,0.3221574896003379,0.6220602308202283,0.5771433352505323,0.6535927236634304,0.22927333333333333 +25,bert-base-multilingual-uncased,bert-base-multilingual-uncased,0.5133181941739059,0.761227175051093,0.35246670150517373,1.0,0.3072334740467725,0.5262463355227507,0.4836965383766139,0.5657086622221764,0.10996666666666666 +26,sentence-transformers/all-MiniLM-L12-v2,all-MiniLM-L12-v2,0.512558614576707,0.4750684404934165,0.2981421101986864,1.0,0.26628635658682365,0.5703415468971924,0.521461582676646,0.6645122130942248,0.30465666666666663 +27,izhx/udever-bloom-1b1,udever-bloom-1b1,0.5006144696402559,0.5185761589372356,0.3484199980997863,1.0,0.29477739086433014,0.5085692286618537,0.5492174562777438,0.6233988576144311,0.16195666666666667 +28,sentence-transformers/all-MiniLM-L6-v2,all-MiniLM-L6-v2,0.4933324952351013,0.407639269899297,0.3170242868527332,1.0,0.28281807578154883,0.45629653059171443,0.5157633810735549,0.6813884176819617,0.28573 +29,sentence-transformers/multi-qa-MiniLM-L6-cos-v1,multi-qa-MiniLM-L6-cos-v1,0.49276447531357576,0.3820720622217845,0.28846829533881896,1.0,0.27590643209882243,0.5268152094562065,0.5090080656334154,0.6685724044262251,0.29127333333333333 +30,Geotrend/bert-base-15lang-cased,bert-base-15lang-cased,0.47956739404892507,0.7484100239150743,0.33197144060408157,1.0,0.2913284671743393,0.4472866130606892,0.4610657032072243,0.5047335710966584,0.051743333333333336 +31,Geotrend/bert-base-10lang-cased,bert-base-10lang-cased,0.4791783722192484,0.7483481678365821,0.32998598047845495,1.0,0.29055398239730224,0.4472991001360561,0.4611012325191655,0.5044285143864266,0.05171 +32,bert-base-multilingual-cased,bert-base-multilingual-cased,0.4784275269395698,0.7484254924362301,0.3304280318198251,1.0,0.28807795872171416,0.44742327493995027,0.461136739618303,0.500135384647203,0.05179333333333334 +33,Geotrend/bert-base-25lang-cased,bert-base-25lang-cased,0.4783485377288698,0.7484254924362301,0.3304255148848392,1.0,0.2884414777790376,0.44751125845104356,0.4609919042829104,0.4992726539968981,0.051719999999999995 +34,Geotrend/distilbert-base-en-fr-cased,distilbert-base-en-fr-cased,0.4711504312165386,0.6462841438949981,0.341588906005046,1.0,0.31226674511530683,0.42253724132772413,0.4535877037185052,0.5363420430040616,0.05659666666666666 +35,Geotrend/distilbert-base-en-fr-es-pt-it-cased,distilbert-base-en-fr-es-pt-it-cased,0.47089427419098645,0.646905811373573,0.3426820130538851,1.0,0.3099238484208555,0.42240978675516194,0.45423163696943175,0.5342644302883172,0.056736666666666664 +36,Geotrend/distilbert-base-25lang-cased,distilbert-base-25lang-cased,0.4700887935578349,0.646728997287605,0.34069125231148156,1.0,0.3070335729873741,0.42208716688558023,0.4540327101358815,0.5333733155214233,0.05676333333333333 +37,Geotrend/distilbert-base-fr-cased,distilbert-base-fr-cased,0.44622649323425023,0.44755904164651844,0.3441308354905828,1.0,0.3098048164787563,0.4211180579200582,0.45328919656019884,0.5384699977778871,0.05544000000000001 +38,camembert/camembert-large,camembert-large,0.4314141120757555,0.25915844052750026,0.35539819419480195,1.0,0.27977116205095437,0.42059495556392223,0.49190189636509246,0.5929849145704393,0.051503333333333325 +39,xlm-roberta-base,xlm-roberta-base,0.39878476323031586,0.4821269386209579,0.25055604928864256,1.0,0.2914448969497285,0.3466560290458458,0.3123173121764971,0.505220213094188,0.0019566666666666665 +40,xlm-roberta-large,xlm-roberta-large,0.3877593672323856,0.34708423834289515,0.25352623148067505,1.0,0.28894306279406234,0.3900765807837409,0.3070959488543944,0.49273887560331725,0.022609999999999995 +41,camembert/camembert-base,camembert-base,0.38757941178574273,0.18502322000804974,0.2878733482356867,1.0,0.29630716022029846,0.32776878697185785,0.4212973320876008,0.5654054467624484,0.01696 +42,izhx/udever-bloom-560m,udever-bloom-560m,0.38557738841307115,0.3176968634737299,0.24744231331302316,1.0,0.236335996838468,0.3981434451285849,0.3043004945739746,0.5088966606434552,0.07180333333333333 +43,flaubert/flaubert_base_cased,flaubert_base_cased,0.381890067606526,0.22913762160209786,0.22640926272969017,1.0,0.312594655829165,0.4534814003007701,0.24899773800547045,0.5212265290516807,0.06327333333333333 +44,flaubert/flaubert_base_uncased,flaubert_base_uncased,0.3477936487122823,0.12151790182091164,0.17691473162499663,1.0,0.2942790176376187,0.4586745583595503,0.23278594236339425,0.43487037122512057,0.06330666666666666 +45,distilbert-base-uncased,distilbert-base-uncased,0.3333864887779967,0.040609701568365165,0.2259251428209009,1.0,0.31092622442837853,0.3516089220235861,0.3232174242573629,0.39013782845871353,0.024666666666666667 +46,flaubert/flaubert_large_cased,flaubert_large_cased,0.31803065121515967,0.11075184087052004,0.20832375750957685,1.0,0.2924968054601468,0.34543171919622384,0.2519598161245585,0.3292846038935842,0.005996666666666667 diff --git a/script_mteb_french/results_analysis/compute_avergae_performance.py b/script_mteb_french/results_analysis/compute_avergae_performance.py new file mode 100644 index 00000000..866f78dc --- /dev/null +++ b/script_mteb_french/results_analysis/compute_avergae_performance.py @@ -0,0 +1,73 @@ +import os +import sys +import argparse +import logging + +from datasets import load_dataset +import pandas as pd + +from results_parser import ResultsParser + +logging.basicConfig( + stream=sys.stdout, + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) + + +def split_model_name(model_name: str): + model_name = model_name.split('/') + if len(model_name) == 2: + model_name = model_name[1] + else: + model_name = model_name[0] + return model_name + + +def main(args): + res_parser = ResultsParser() + fr_results = res_parser(args.results_folder) + + task_types = [task_type for task_type, _ in set(fr_results.columns.values)] + + models_name_to_index = {idx: split_model_name(idx) for idx in fr_results.index} + + fr_results_avg = fr_results.copy() + new_df = pd.DataFrame({}) + + for task_type in set(task_types): + filtered_results = fr_results_avg.loc[:][task_type] + new_df[f"avg_{task_type}"] = filtered_results.mean(axis=1) + + overall_avg = new_df.mean(axis=1) + new_df.insert(0, "overall_avg", overall_avg) + + new_df.reset_index(inplace=True) + models_short_name = new_df.model.apply(lambda x: models_name_to_index[x]) + new_df.insert(1, "model_short", models_short_name) + + # Sort models from best to worse based on overall average performance score + new_df.sort_values(by=['overall_avg'], ascending=False, inplace=True) + new_df.reset_index(drop=True, inplace=True) + + # Get rank of models + new_df.reset_index(inplace=True) + new_df.rename(columns={'index': 'rank'}, inplace=True) + new_df['rank'] = new_df['rank'].apply(lambda x: x+1) + + # Save results to CSV + output_dir = "performance_analysis" + if not os.path.exists(output_dir): + os.mkdir(output_dir) + new_df.to_csv(os.path.join(output_dir, "mteb_fr_avg_perfromance.csv"), index=False) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--results_folder", type=str, default='results') + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/script_mteb_french/results_analysis/plot_performance_over_languages.py b/script_mteb_french/results_analysis/plot_performance_over_languages.py new file mode 100644 index 00000000..c8178baf --- /dev/null +++ b/script_mteb_french/results_analysis/plot_performance_over_languages.py @@ -0,0 +1,29 @@ +import os +import pandas as pd +import numpy as np +import seaborn as sns +import matplotlib.pyplot as plt + + +def main(): + df = pd.read_csv("performance_analysis/models_ranks_en_fr.csv") + + x = df.model.values + x_axis = np.arange(len(x)) + y_1 = df.avg_perf_fr.values + y_2 = df.avg_perf_en.values + + cmap = plt.get_cmap("Set2") + plt.bar(x_axis - 0.2, y_1, 0.4, label='French', color=cmap(0)) + plt.bar(x_axis + 0.2, y_2, 0.4, label='English', color=cmap(1)) + + plt.xticks(x_axis, x, rotation=80) + plt.ylim([30, 75]) + plt.ylabel("Overall avergae performance") + plt.tight_layout() + plt.legend(loc='lower left') + plt.savefig(f"performance_analysis/avg_performance_overall.png") + + +if __name__ =="__main__": + main() \ No newline at end of file