diff --git a/danish-nlg.csv b/danish-nlg.csv index 0f8dcc2b..2e7cec76 100644 --- a/danish-nlg.csv +++ b/danish-nlg.csv @@ -18,9 +18,9 @@ google/gemma-2-9b-it (few-shot),9242,256,8193,True,False,2062,1.65,49.05,52.26,5 CohereForAI/aya-expanse-32b (few-shot),32296,256,8193,False,False,2288,1.67,61.15,56.39,37.75,58.09,67.6,66.13,73.67,69.23 "152334H/miqu-1-70b-sf (few-shot, val)",68977,32,32764,True,False,2126,1.69,56.96,55.11,42.64,54.58,66.8,83.94,73.32,60.52 "meta-llama/Llama-2-70b-hf (few-shot, val)",68977,32,4096,True,False,1892,1.69,58.06,53.24,39.71,62.51,67.39,84.43,71.35,54.31 -"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,1.7,59.96,56.91,67.13,17.52,65.32,91.7,88.38,71.08 "gpt-4o-mini-2024-07-18 (zero-shot, val)",-1,200,8191,True,False,908,1.71,57.52,49.73,57.56,51.79,64.66,79.84,83.21,63.33 "claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,1.72,65.88,63.61,71.03,46.24,64.6,84.02,85.34,23.71 +"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,1.73,59.96,56.91,67.13,17.52,65.32,91.7,88.38,71.08 "gpt-4-1106-preview (zero-shot, val)",-1,100,8191,True,False,436,1.74,35.79,53.69,62.98,51.96,64.56,87.34,98.11,67.56 mhenrichsen/danskgpt-chat-v2.1 (few-shot),-1,32,32768,True,False,5085,1.93,51.08,54.69,30.95,56.56,66.9,79.39,73.22,48.16 google/gemma-2-9b (few-shot),9242,256,8193,True,False,2038,1.95,44.16,38.84,43.42,60.11,67.46,81.68,77.77,50.04 @@ -29,8 +29,8 @@ timpal0l/sol (few-shot),10732,32,4096,False,False,3701,1.98,54.91,44.38,21.11,58 "RJuro/munin-neuralbeagle-7b (few-shot, val)",7242,32,32768,False,True,2493,2.01,51.44,54.91,22.77,56.51,68.06,74.24,70.86,41.49 mistralai/Mistral-Nemo-Instruct-2407 (few-shot),12248,131,1024001,True,False,7095,2.03,53.95,48.97,31.78,56.44,66.24,67.09,72.87,42.77 CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,2.05,63.63,50.82,35.58,54.33,65.71,58.67,72.01,33.61 +meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1473,2.09,54.7,54.81,32.11,48.87,66.79,56.14,63.54,39.48 nvidia/Llama-3.1-Nemotron-70B-Instruct-HF (few-shot),70554,128,131072,True,False,1208,2.09,58.34,59.14,56.46,39.77,47.41,85.69,91.37,74.3 -meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1005,2.1,54.7,54.81,32.11,48.87,66.79,56.14,63.54,39.48 skole-gpt-mixtral (few-shot),-1,32,32768,False,False,3583,2.1,49.17,51.51,32.04,58.52,66.83,65.77,77.56,30.34 mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,2.11,52.62,50.07,37.37,54.87,66.46,69.0,79.86,24.68 utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.12,34.0,53.97,32.21,57.1,67.55,80.75,74.77,36.86 @@ -54,15 +54,15 @@ senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.27,52.61,49 ThatsGroes/munin-SkoleGPTOpenOrca-7b-16bit (few-shot),7242,32,32768,False,False,3006,2.28,45.37,39.63,21.77,58.28,67.91,78.71,63.74,28.58 danish-foundation-models/munin-7b-v0.1dev0 (few-shot),7242,32,8192,True,False,6113,2.28,39.12,36.47,26.76,58.75,67.89,91.32,79.92,24.76 four-two-labs/orpo-llama-3-swe (few-shot),8030,128,8192,False,False,4974,2.29,46.75,51.73,24.73,59.97,65.21,55.72,61.59,25.43 -meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1007,2.29,57.74,48.43,27.12,46.76,66.36,57.87,50.42,29.17 +meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1483,2.29,57.74,48.43,27.12,46.76,66.36,57.87,50.42,29.17 CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,2.3,51.32,52.0,18.48,52.43,66.18,41.32,52.24,37.67 "KennethEnevoldsen/munin_mistral-7b (few-shot, val)",7242,32,32768,False,True,2543,2.32,46.7,47.52,8.04,60.05,67.18,70.49,66.28,25.13 -NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,2.32,49.18,49.76,41.28,12.83,67.44,77.55,74.67,36.42 "mlabonne/AlphaMonarch-7B (few-shot, val)",7242,32,8192,False,True,5340,2.32,52.72,49.11,16.09,46.28,66.62,60.03,59.83,40.4 AI-Sweden-Models/Llama-3-8B (few-shot),8030,128,8192,True,False,4141,2.33,36.72,46.48,26.1,58.0,67.23,82.84,75.27,14.55 bineric/NorskGPT-Llama3-8b (few-shot),8030,128,8192,False,False,3382,2.33,51.4,42.13,3.91,57.81,66.7,65.15,57.36,44.55 meta-llama/Llama-3.1-8B (few-shot),8030,128,131072,True,False,2986,2.33,50.92,47.86,29.19,48.38,64.89,51.88,62.31,27.37 timpal0l/njord-alpha (few-shot),7242,32,32768,True,False,5431,2.33,38.25,39.37,29.76,57.02,67.57,83.45,70.43,16.43 +NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,2.34,49.18,49.76,41.28,12.83,67.44,77.55,74.67,36.42 "birgermoell/Flashback-Bellman (few-shot, val)",7242,32,32768,False,True,2887,2.34,47.71,48.21,19.55,56.46,65.46,61.29,60.29,28.33 "AI-Sweden-Models/tyr (few-shot, val)",7242,32,32768,False,True,6079,2.35,47.01,50.6,13.73,56.35,66.82,57.53,67.04,22.16 danish-foundation-models/munin-7b-alpha (few-shot),7242,32,32768,True,False,6116,2.35,40.6,36.89,26.41,57.81,67.27,77.63,67.83,25.16 @@ -72,11 +72,11 @@ mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,2 "merge-crew/da-sv-dare-ties-density-0.9 (few-shot, val)",7242,32,32768,True,True,2443,2.38,45.61,53.73,17.08,56.67,66.14,60.58,57.89,16.45 syvai/llama3-da-base (few-shot),8030,128,8192,True,False,3229,2.38,50.0,48.24,25.59,55.52,65.98,43.94,62.48,19.13 CohereForAI/c4ai-command-r-v01 (few-shot),34981,256,8192,False,False,1919,2.39,53.44,49.17,20.55,51.7,65.47,37.41,62.46,26.25 -meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1335,2.39,49.46,51.16,23.01,49.75,65.26,51.64,59.28,24.3 +meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1477,2.39,49.46,51.16,23.01,49.75,65.26,51.64,59.28,24.3 Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,2.4,51.94,51.97,29.99,38.99,64.49,35.95,59.76,34.63 "claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,2.4,59.48,56.46,20.57,38.23,64.38,53.54,69.09,21.03 bineric/NorskGPT-Mistral-7b (few-shot),7242,32,32768,False,False,1440,2.41,50.76,40.41,0.0,57.26,66.89,64.32,53.18,43.42 -google/gemma-7b (few-shot),8538,256,8192,True,False,1378,2.42,19.59,46.55,32.64,59.4,66.63,74.32,65.58,24.74 +google/gemma-7b (few-shot),8538,256,8192,True,False,1378,2.41,19.59,46.55,32.64,59.4,66.63,74.32,65.58,24.74 "merge-crew/da-sv-ties (few-shot, val)",7242,32,32768,True,True,2457,2.42,45.39,51.95,13.25,58.51,66.33,57.16,60.06,13.62 ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4097,True,False,2515,2.43,44.58,47.16,19.2,58.41,65.64,32.05,47.42,28.73 Mabeck/Heidrun-Mistral-7B-base (few-shot),7242,32,32768,True,False,3823,2.45,40.14,39.38,21.85,58.07,67.06,60.78,61.29,16.26 @@ -90,14 +90,14 @@ mhenrichsen/hestenettetLM (few-shot),7242,32,32768,True,False,1151,2.51,44.9,42. mistralai/Mistral-7B-v0.1 (few-shot),7242,32,32768,True,False,1446,2.51,45.42,43.16,8.79,59.43,66.47,53.26,58.26,18.53 CohereForAI/aya-23-8B (few-shot),8028,256,8192,False,False,2707,2.52,47.08,47.16,8.41,58.83,65.03,36.64,39.24,27.29 bineric/NorskGPT-Llama-13B-v0.1 (few-shot),-1,32,4096,False,False,2856,2.52,44.17,44.28,3.11,55.59,66.63,59.51,50.89,25.32 -timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,2.54,42.43,47.82,16.51,56.95,65.43,50.76,50.82,14.47 +timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,2.53,42.43,47.82,16.51,56.95,65.43,50.76,50.82,14.47 RuterNorway/Llama-2-13b-chat-norwegian (few-shot),-1,32,4096,False,False,3254,2.55,43.17,43.4,11.08,56.81,67.46,52.94,41.65,17.57 ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,2.55,44.92,49.31,10.14,57.34,66.02,33.71,43.55,21.34 timpal0l/Llama-3-8B-flashback-v1 (few-shot),8030,128,8192,True,False,3004,2.57,46.59,50.25,14.46,56.86,61.98,46.91,50.98,16.33 google/gemma-2-2b-it (few-shot),2614,256,8193,True,False,5374,2.59,28.22,47.11,19.99,48.0,66.73,52.85,52.74,29.94 neph1/bellman-7b-mistral-instruct-v0.2 (few-shot),7242,32,32768,False,False,2518,2.6,46.11,47.58,18.41,52.78,65.65,41.77,35.86,11.59 +Mabeck/Heidrun-Mistral-7B-chat (few-shot),7242,32,4096,False,False,1419,2.64,51.93,49.86,20.25,25.84,66.28,42.73,60.78,16.39 meta-llama/Llama-3.2-3B-Instruct (few-shot),3213,128,131073,False,False,10424,2.64,41.12,42.77,11.52,51.14,65.19,49.78,45.88,21.89 -Mabeck/Heidrun-Mistral-7B-chat (few-shot),7242,32,4096,False,False,1419,2.65,51.93,49.86,20.25,25.84,66.28,42.73,60.78,16.39 bineric/NorskGPT-Llama-7B-v0.1 (few-shot),6738,32,4096,False,False,5384,2.67,41.63,47.73,0.0,54.25,66.02,47.16,38.28,22.27 meta-llama/Llama-2-13b-hf (few-shot),13016,32,4096,True,False,2898,2.67,41.28,23.01,23.5,60.29,66.28,50.3,56.24,15.43 mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32768,False,False,634,2.67,37.93,44.49,14.09,51.38,65.8,45.07,35.36,14.85 @@ -105,9 +105,9 @@ occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,2.7, occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,2.73,37.93,44.62,0.28,58.05,66.05,38.54,45.89,12.38 meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4096,False,False,2643,2.76,35.44,44.88,9.74,55.04,66.15,32.17,35.74,11.32 meta-llama/Llama-3.2-3B (few-shot),3213,128,131073,True,False,3713,2.77,41.13,38.9,9.6,56.85,63.35,45.46,46.77,12.19 +neuralmagic/Sparse-Llama-3.1-8B-2of4 (few-shot),8030,128,131072,True,False,2996,2.77,27.6,37.08,10.84,58.67,63.45,38.31,46.65,28.92 emillykkejensen/Phi-3-mini-4k-instruct-dansk (few-shot),3821,32,4096,False,False,1360,2.78,39.96,44.93,4.01,55.01,65.29,35.28,41.62,11.81 -neuralmagic/Sparse-Llama-3.1-8B-2of4 (few-shot),8030,128,131072,True,False,2996,2.78,27.6,37.08,10.84,58.67,63.45,38.31,46.65,28.92 -microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,3194,2.81,41.37,42.6,6.52,50.57,64.55,38.64,42.12,13.66 +microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,8681,2.81,41.37,42.6,6.52,50.57,64.55,38.64,42.12,13.66 NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,2.82,50.83,53.23,23.02,0.0,57.74,77.42,82.47,30.39 nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,2.82,49.01,47.95,32.89,0.0,62.7,51.21,62.3,33.79 NorwAI/NorwAI-Mistral-7B (few-shot),7537,68,4096,True,False,3035,2.83,21.47,48.39,12.46,52.51,66.53,36.65,49.97,5.64 @@ -116,89 +116,90 @@ Qwen/Qwen1.5-4B-Chat (few-shot),3950,152,32768,False,False,4347,2.85,35.96,42.04 timpal0l/Mistral-7B-v0.1-flashback-v2-instruct (few-shot),7242,32,32768,False,False,5172,2.85,37.02,40.65,7.48,52.71,64.46,47.26,49.54,9.02 google/gemma-7b-it (few-shot),8538,256,8192,False,False,1792,2.86,43.83,29.21,12.96,49.76,65.36,41.3,31.26,15.02 ibm-granite/granite-3.0-2b-instruct (few-shot),2634,49,4097,True,False,10194,2.86,41.79,41.86,11.86,51.97,64.86,29.55,26.2,3.64 -ibm-granite/granite-8b-code-base-4k (few-shot),8055,49,4096,True,False,2313,2.88,48.44,39.07,9.72,51.18,63.93,33.11,18.96,9.03 -ibm-granite/granite-8b-code-base (few-shot),8055,49,4096,True,False,1002,2.88,48.55,39.16,10.43,51.28,63.95,33.33,17.65,8.97 +ibm-granite/granite-8b-code-base-4k (few-shot),8055,49,4096,True,False,2313,2.87,48.44,39.07,9.72,51.18,63.93,33.11,18.96,9.03 +ibm-granite/granite-8b-code-base (few-shot),8055,49,4096,True,False,1002,2.87,48.55,39.16,10.43,51.28,63.95,33.33,17.65,8.97 "merge-crew/da-sv-dare-ties-density-0.3 (few-shot, val)",7242,32,32768,True,True,2461,2.88,30.16,48.49,5.52,52.44,64.24,43.57,35.6,6.76 Qwen/Qwen1.5-4B (few-shot),3950,152,32768,True,False,3248,2.93,32.28,39.62,5.38,54.16,62.74,37.49,29.21,15.58 tollefj/nordavind-7b-instruct-warm (few-shot),7248,33,2048,False,False,6450,2.94,38.39,49.44,7.5,51.24,66.09,3.53,12.86,1.29 meta-llama/Llama-2-7b-hf (few-shot),6738,32,4096,True,False,930,2.95,31.77,43.91,0.31,58.44,65.5,20.18,35.69,7.93 ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,2.96,45.9,37.11,11.7,50.11,63.86,32.44,7.46,5.62 norallm/normistral-7b-warm-instruct (few-shot),-1,33,2048,True,False,6194,2.98,39.83,47.48,4.55,49.23,66.17,10.83,14.66,2.71 -google/gemma-2-2b (few-shot),2614,256,8193,True,False,5235,3.0,17.29,34.94,6.39,54.94,64.82,48.07,45.03,9.1 -NorwAI/NorwAI-Llama2-7B (few-shot),7033,68,4096,True,False,4438,3.01,16.72,45.89,11.25,53.17,66.51,14.84,27.95,2.41 TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot),7800,100,4096,True,False,6197,3.01,34.25,45.67,10.62,50.77,65.67,2.31,10.57,0.64 +google/gemma-2-2b (few-shot),2614,256,8193,True,False,5235,3.02,17.29,34.94,6.39,54.94,64.82,48.07,45.03,9.1 +NorwAI/NorwAI-Llama2-7B (few-shot),7033,68,4096,True,False,4438,3.03,16.72,45.89,11.25,53.17,66.51,14.84,27.95,2.41 NbAiLab/nb-llama-3.1-8B (few-shot),8030,128,131072,True,False,1297,3.05,44.83,37.14,10.13,8.09,62.82,76.03,75.6,14.76 01-ai/Yi-1.5-6B (few-shot),6061,64,4097,True,False,2867,3.06,35.21,12.73,4.75,55.95,64.28,46.17,36.46,18.01 +ibm-granite/granite-3.0-2b-base (few-shot),2534,49,4097,True,False,10187,3.06,32.34,29.5,3.89,53.67,64.48,24.4,31.93,10.0 LumiOpen/Viking-33B@1000B (few-shot),33119,131,4099,True,False,2080,3.07,34.22,45.05,9.4,54.92,62.78,0.43,4.81,1.77 -ibm-granite/granite-3.0-2b-base (few-shot),2534,49,4097,True,False,10187,3.07,32.34,29.5,3.89,53.67,64.48,24.4,31.93,10.0 +openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,3.11,34.66,21.93,1.5,52.36,64.84,24.39,33.37,13.98 allenai/OLMo-1.7-7B-hf (few-shot),6888,50,4096,True,False,3371,3.12,33.8,31.57,2.76,54.2,64.19,17.75,28.24,4.5 microsoft/Phi-3-mini-128k-instruct (few-shot),3821,32,131072,True,False,7312,3.12,4.51,40.85,5.43,51.76,64.64,37.45,36.39,17.42 norallm/normistral-7b-warm (few-shot),7248,33,2048,True,False,3175,3.12,37.8,40.51,3.35,49.08,65.81,-0.9,3.61,0.4 -openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,3.12,34.66,21.93,1.5,52.36,64.84,24.39,33.37,13.98 -ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,3.14,37.37,31.44,5.27,48.41,63.82,18.25,19.54,2.64 +ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,3.13,37.37,31.44,5.27,48.41,63.82,18.25,19.54,2.64 AI-Sweden-Models/gpt-sw3-20b (few-shot),20918,64,2048,True,False,1875,3.15,27.41,30.23,11.34,52.8,64.47,11.04,22.71,3.03 LumiOpen/Viking-13B (few-shot),14030,131,4097,True,False,840,3.15,28.6,48.71,2.3,53.85,64.05,0.55,0.0,0.36 ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,3.18,38.62,35.47,5.07,45.21,62.5,13.46,15.31,6.0 -google/gemma-2b (few-shot),2506,256,8192,True,False,6087,3.19,19.97,40.21,2.27,50.55,63.07,15.04,30.63,4.9 openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.19,29.49,13.77,0.0,51.53,66.31,24.59,39.09,15.6 +google/gemma-2b (few-shot),2506,256,8192,True,False,6087,3.22,19.97,40.21,2.27,50.55,63.07,15.04,30.63,4.9 meta-llama/Llama-3.2-1B-Instruct (few-shot),1236,128,131073,False,False,7436,3.23,35.45,36.94,1.12,44.61,61.33,27.14,18.57,3.16 HPLT/gpt-33b-nordic-prerelease (few-shot),33119,131,4099,True,False,501,3.25,25.35,44.7,1.43,52.29,62.23,-1.01,1.33,-0.19 ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,3.27,37.21,31.54,6.3,44.86,61.56,17.92,10.79,1.7 ibm-granite/granite-7b-base (few-shot),6738,32,2048,True,False,4405,3.27,24.93,31.65,0.06,51.47,62.67,16.13,24.21,1.27 HPLT/gpt-13b-nordic-prerelease (few-shot),14030,131,4099,True,False,3520,3.29,28.72,37.19,2.96,49.53,61.62,1.17,11.38,-0.16 AI-Sweden-Models/gpt-sw3-6.7b-v2 (few-shot),7111,64,2048,True,False,2351,3.3,20.84,18.07,10.54,51.22,65.15,6.34,18.0,5.57 -utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,3.32,19.61,37.92,2.81,50.05,63.91,2.54,17.85,0.52 +utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,3.31,19.61,37.92,2.81,50.05,63.91,2.54,17.85,0.52 stabilityai/stablelm-2-1_6b (few-shot),1645,100,4096,True,False,7259,3.33,28.45,39.09,1.43,51.67,57.67,11.94,18.18,2.67 +NorwAI/NorwAI-Mistral-7B-instruct (few-shot),7537,68,4096,False,False,3027,3.34,13.78,42.16,3.52,20.02,65.03,40.74,43.43,4.5 LumiOpen/Viking-7B (few-shot),7550,131,4096,True,False,1431,3.36,23.98,38.74,1.04,50.17,61.96,-0.06,-1.04,0.73 -NorwAI/NorwAI-Mistral-7B-instruct (few-shot),7537,68,4096,False,False,3027,3.36,13.78,42.16,3.52,20.02,65.03,40.74,43.43,4.5 -ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,3.37,23.87,31.21,2.04,47.36,62.33,17.68,17.54,1.72 -AI-Sweden-Models/gpt-sw3-6.7b-v2-instruct (few-shot),7111,64,2048,True,False,1473,3.38,15.35,2.85,10.99,50.51,66.38,30.9,16.81,11.08 +ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,3.36,23.87,31.21,2.04,47.36,62.33,17.68,17.54,1.72 +AI-Sweden-Models/gpt-sw3-6.7b-v2-instruct (few-shot),7111,64,2048,True,False,1473,3.37,15.35,2.85,10.99,50.51,66.38,30.9,16.81,11.08 MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,3.38,28.18,29.32,2.9,56.48,53.81,27.86,34.62,4.73 +google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,3.39,24.44,34.03,2.25,42.12,62.41,15.16,12.67,2.67 mhenrichsen/danskgpt-tiny-chat (few-shot),1100,32,2048,False,False,1745,3.39,22.31,34.05,0.7,41.82,65.27,6.27,6.25,2.11 -google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,3.4,24.44,34.03,2.25,42.12,62.41,15.16,12.67,2.67 NLPnorth/snakmodel-7b-base (few-shot),6738,32,4096,True,False,3083,3.41,23.74,44.71,21.24,0.0,58.05,47.83,55.62,5.51 HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,3.44,29.44,18.49,1.73,44.39,61.76,22.03,12.61,2.06 ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,3.44,31.8,6.85,0.97,49.83,63.43,15.97,17.19,3.07 +AI-Sweden-Models/gpt-sw3-6.7b (few-shot),7111,64,2048,True,False,2285,3.45,18.23,22.71,5.03,49.11,64.58,1.11,8.76,1.7 NbAiLab/nb-llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1296,3.45,21.87,24.82,2.89,44.86,63.2,-0.22,7.84,0.81 -AI-Sweden-Models/gpt-sw3-6.7b (few-shot),7111,64,2048,True,False,2285,3.46,18.23,22.71,5.03,49.11,64.58,1.11,8.76,1.7 -meta-llama/Llama-3.2-1B (few-shot),1236,128,131073,True,False,7577,3.48,19.82,35.97,2.14,46.59,58.65,2.65,9.51,-0.88 -AI-Sweden-Models/gpt-sw3-1.3b-instruct (few-shot),1445,64,2048,True,False,4544,3.49,14.73,27.14,2.65,46.38,65.48,0.32,7.0,0.13 -HPLT/gpt-7b-nordic-prerelease (few-shot),7550,131,4096,True,False,1382,3.52,21.98,37.77,1.26,46.03,58.21,0.0,-0.87,0.0 +AI-Sweden-Models/gpt-sw3-1.3b-instruct (few-shot),1445,64,2048,True,False,4544,3.48,14.73,27.14,2.65,46.38,65.48,0.32,7.0,0.13 +HPLT/gpt-7b-nordic-prerelease (few-shot),7550,131,4096,True,False,1382,3.51,21.98,37.77,1.26,46.03,58.21,0.0,-0.87,0.0 +meta-llama/Llama-3.2-1B (few-shot),1236,128,131073,True,False,7577,3.52,19.82,35.97,2.14,46.59,58.65,2.65,9.51,-0.88 AI-Sweden-Models/gpt-sw3-356m-instruct (few-shot),471,64,2048,True,False,5855,3.56,11.28,34.94,2.08,36.59,63.38,-0.09,-0.76,0.28 NbAiLab/nb-gpt-j-6B-alpaca (few-shot),6055,50,1024,False,False,2607,3.56,12.95,27.68,1.65,38.6,63.32,4.49,12.81,-0.68 AI-Sweden-Models/gpt-sw3-1.3b (few-shot),1445,64,2048,True,False,4608,3.59,8.8,28.65,2.84,45.34,62.17,-1.31,3.02,-0.33 HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,3.59,24.47,9.93,1.22,42.09,61.62,19.65,19.01,1.34 -Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,3.61,18.0,26.58,0.63,41.66,57.19,22.17,12.85,7.01 +Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,3.6,18.0,26.58,0.63,41.66,57.19,22.17,12.85,7.01 ibm-granite/granite-3.0-1b-a400m-instruct (few-shot),1335,49,4096,True,False,7964,3.61,30.63,22.35,1.95,37.3,59.66,2.78,11.15,-1.22 Qwen/Qwen1.5-1.8B (few-shot),1837,152,32768,True,False,5666,3.63,9.83,29.03,0.56,46.43,56.43,14.86,17.56,4.78 PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,3.66,28.3,28.95,0.2,36.39,56.6,-0.19,11.52,0.06 -state-spaces/mamba-2.8b-hf (few-shot),2768,50,32769,True,False,2722,3.68,17.58,10.47,1.23,42.56,62.56,2.18,4.94,1.84 -AI-Sweden-Models/gpt-sw3-356m (few-shot),471,64,2048,True,False,5758,3.7,16.13,27.61,1.96,34.79,59.05,1.0,4.85,0.28 -PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,3.7,16.17,29.12,-0.47,34.8,59.04,-0.22,6.94,0.52 +state-spaces/mamba-2.8b-hf (few-shot),2768,50,32769,True,False,2722,3.69,17.58,10.47,1.23,42.56,62.56,2.18,4.94,1.84 +AI-Sweden-Models/gpt-sw3-356m (few-shot),471,64,2048,True,False,5758,3.71,16.13,27.61,1.96,34.79,59.05,1.0,4.85,0.28 +PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,3.71,16.17,29.12,-0.47,34.8,59.04,-0.22,6.94,0.52 allenai/OLMo-7B (few-shot),6888,50,2051,True,False,5403,3.71,26.76,30.76,0.55,45.65,50.86,0.26,6.94,-0.11 mhenrichsen/danskgpt-tiny (few-shot),1100,32,2048,True,False,8597,3.73,14.13,26.31,-0.54,32.12,62.61,-2.76,5.08,-0.7 -ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,3.75,16.82,17.52,1.53,40.21,60.13,0.29,0.55,-0.21 -Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,3.85,17.38,10.72,1.32,34.58,55.87,4.56,22.41,1.71 -PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.87,15.93,13.01,0.05,36.85,58.6,-0.09,2.94,-0.35 -allenai/OLMo-7B-Twin-2T (few-shot),6888,50,2051,True,False,5484,3.92,7.52,18.3,3.23,46.35,53.01,2.17,0.22,-0.65 +ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,3.76,16.82,17.52,1.53,40.21,60.13,0.29,0.55,-0.21 +Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,3.86,17.38,10.72,1.32,34.58,55.87,4.56,22.41,1.71 +PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.88,15.93,13.01,0.05,36.85,58.6,-0.09,2.94,-0.35 +tiiuae/Falcon3-1B-Instruct (few-shot),1669,131,8192,True,False,9270,3.9,20.03,15.96,0.86,28.98,56.75,2.15,2.51,0.88 +allenai/OLMo-7B-Twin-2T (few-shot),6888,50,2051,True,False,5484,3.91,7.52,18.3,3.23,46.35,53.01,2.17,0.22,-0.65 Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,3.95,19.01,8.88,0.66,32.78,55.57,7.21,16.56,0.62 -AI-Sweden-Models/gpt-sw3-126m-instruct (few-shot),186,64,2048,True,False,7717,3.98,13.98,6.37,0.41,20.46,60.87,0.53,4.72,-0.07 -NbAiLab/nb-llama-3.2-1B (few-shot),1236,128,131072,True,False,3424,4.01,9.2,32.94,1.59,23.1,50.75,1.23,13.87,-1.16 -RuterNorway/Llama-2-7b-chat-norwegian (few-shot),-1,32,4096,False,False,10890,4.08,10.12,10.65,-0.66,26.08,56.92,0.1,4.29,-0.88 -HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.1,12.68,3.61,1.79,28.12,56.85,-0.03,6.03,0.2 -HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.17,8.97,2.66,1.65,24.92,55.39,0.28,3.55,-0.41 -HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.21,12.11,2.61,0.25,14.02,56.53,1.05,10.81,-0.5 -NorwAI/NorwAI-Mistral-7B-pretrain (few-shot),7537,68,4096,True,False,3024,4.22,12.82,3.55,0.68,19.85,55.58,-0.11,-2.13,0.88 -allenai/OLMo-1B (few-shot),1177,50,2051,True,False,8536,4.22,13.39,17.94,-2.02,23.65,48.87,-0.33,0.05,-0.08 -PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.23,10.59,13.31,0.52,16.61,53.52,0.89,4.3,-0.01 +AI-Sweden-Models/gpt-sw3-126m-instruct (few-shot),186,64,2048,True,False,7717,3.99,13.98,6.37,0.41,20.46,60.87,0.53,4.72,-0.07 +NbAiLab/nb-llama-3.2-1B (few-shot),1236,128,131072,True,False,3424,3.99,9.2,32.94,1.59,23.1,50.75,1.23,13.87,-1.16 +RuterNorway/Llama-2-7b-chat-norwegian (few-shot),-1,32,4096,False,False,10890,4.07,10.12,10.65,-0.66,26.08,56.92,0.1,4.29,-0.88 +HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.09,12.68,3.61,1.79,28.12,56.85,-0.03,6.03,0.2 +HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.19,8.97,2.66,1.65,24.92,55.39,0.28,3.55,-0.41 +NorwAI/NorwAI-Mistral-7B-pretrain (few-shot),7537,68,4096,True,False,3024,4.2,12.82,3.55,0.68,19.85,55.58,-0.11,-2.13,0.88 +allenai/OLMo-1B (few-shot),1177,50,2051,True,False,8536,4.2,13.39,17.94,-2.02,23.65,48.87,-0.33,0.05,-0.08 +PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.22,10.59,13.31,0.52,16.61,53.52,0.89,4.3,-0.01 +HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.23,12.11,2.61,0.25,14.02,56.53,1.05,10.81,-0.5 NbAiLab/nb-gpt-j-6B-v2 (few-shot),6051,50,1024,False,False,2556,4.25,0.24,27.8,0.56,6.84,53.76,-1.83,0.99,-0.48 RJuro/kanelsnegl-v0.1 (few-shot),7242,32,512,True,False,5847,4.29,0.0,13.0,0.0,0.0,61.25,0.0,0.0,0.04 -AI-Sweden-Models/gpt-sw3-126m (few-shot),186,64,2048,True,False,8958,4.31,3.43,9.18,-0.22,16.64,52.34,-0.58,5.18,-1.28 -PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.32,13.84,9.47,-0.36,22.1,45.28,-0.27,2.54,-0.66 +AI-Sweden-Models/gpt-sw3-126m (few-shot),186,64,2048,True,False,8958,4.33,3.43,9.18,-0.22,16.64,52.34,-0.58,5.18,-1.28 +PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.33,13.84,9.47,-0.36,22.1,45.28,-0.27,2.54,-0.66 NbAiLab/nb-llama-3.2-3B (few-shot),3213,128,131072,True,False,1880,4.37,0.06,8.71,2.9,1.4,52.26,20.78,34.59,1.18 RJuro/kanelsnegl-v0.2 (few-shot),7242,32,512,True,False,1373,4.37,0.0,4.81,0.0,0.0,61.06,0.0,0.0,0.15 NbAiLab/nb-gpt-j-6B@sharded (few-shot),-1,50,1024,True,False,2630,4.43,0.36,11.0,-0.11,5.15,51.83,-0.96,3.51,0.73 -HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.45,13.72,3.79,-0.45,14.69,47.44,-0.58,10.99,-0.51 +HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.44,13.72,3.79,-0.45,14.69,47.44,-0.58,10.99,-0.51 NorGLM/NorGPT-369M (few-shot),-1,64,1024,True,False,19896,4.54,1.13,2.06,-0.36,0.32,54.0,-2.57,3.26,-0.62 peter-sk/gpt-neox-da (few-shot),1515,50,1024,True,False,6025,4.67,0.64,-0.52,-0.02,0.48,50.23,0.9,4.1,0.04 ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.88,0.0,0.0,0.0,0.02,42.82,4.55,-1.17,-1.26 diff --git a/danish-nlg.md b/danish-nlg.md index bd534409..5918ebff 100644 --- a/danish-nlg.md +++ b/danish-nlg.md @@ -3,7 +3,7 @@ layout: leaderboard title: Danish NLG 🇩🇰 --- -
Last updated: 10/01/2025 12:29:56 CET
+
Last updated: 11/01/2025 11:03:05 CET
@@ -517,31 +517,6 @@ title: Danish NLG 🇩🇰 12.7.0 12.7.0 - - gpt-4o-mini-2024-07-18 (few-shot, val) - unknown - 200 - 8191 - True - 784 ± 310 / 95 ± 28 - 1.70 - 59.96 ± 1.64 / 41.55 ± 2.90 - 56.91 ± 2.34 / 71.25 ± 1.60 - 67.13 ± 4.29 / 83.05 ± 2.24 - 17.52 ± 2.69 / 33.63 ± 2.10 - 65.32 ± 0.28 / 15.39 ± 0.62 - 91.70 ± 1.66 / 93.79 ± 1.23 - 88.38 ± 2.84 / 92.19 ± 1.94 - 71.08 ± 1.15 / 78.12 ± 0.91 - 14.0.0 - 14.0.0 - 14.0.0 - 14.0.0 - 14.0.0 - 14.0.1 - 14.0.1 - 14.0.1 - gpt-4o-mini-2024-07-18 (zero-shot, val) unknown @@ -592,6 +567,31 @@ title: Danish NLG 🇩🇰 14.0.3 14.0.3 + + gpt-4o-mini-2024-07-18 (few-shot, val) + unknown + 200 + 8191 + True + 784 ± 310 / 95 ± 28 + 1.73 + 59.96 ± 1.64 / 41.55 ± 2.90 + 56.91 ± 2.34 / 71.25 ± 1.60 + 67.13 ± 4.29 / 83.05 ± 2.24 + 17.52 ± 2.69 / 33.63 ± 2.10 + 65.32 ± 0.28 / 15.39 ± 0.62 + 91.70 ± 1.66 / 93.79 ± 1.23 + 88.38 ± 2.84 / 92.19 ± 1.94 + 71.08 ± 1.15 / 78.12 ± 0.91 + 14.0.0 + 14.0.0 + 14.0.0 + 14.0.0 + 14.0.0 + 14.0.1 + 14.0.1 + 14.0.1 + gpt-4-1106-preview (zero-shot, val) unknown @@ -792,6 +792,31 @@ title: Danish NLG 🇩🇰 14.0.4 14.0.4 + + meta-llama/Llama-3.1-8B-Instruct (few-shot) + 8030 + 128 + 131072 + True + 1,473 ± 377 / 283 ± 96 + 2.09 + 54.70 ± 1.69 / 38.11 ± 2.31 + 54.81 ± 1.51 / 67.88 ± 1.39 + 32.11 ± 1.93 / 63.11 ± 1.61 + 48.87 ± 1.18 / 59.47 ± 0.67 + 66.79 ± 0.20 / 19.88 ± 0.44 + 56.14 ± 2.09 / 67.01 ± 1.58 + 63.54 ± 2.14 / 75.61 ± 1.50 + 39.48 ± 2.00 / 54.53 ± 1.52 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + nvidia/Llama-3.1-Nemotron-70B-Instruct-HF (few-shot) 70554 @@ -817,31 +842,6 @@ title: Danish NLG 🇩🇰 14.0.4 14.0.4 - - meta-llama/Llama-3.1-8B-Instruct (few-shot) - 8030 - 128 - 131072 - True - 1,005 ± 330 / 196 ± 74 - 2.10 - 54.70 ± 1.69 / 38.11 ± 2.31 - 54.81 ± 1.51 / 67.88 ± 1.39 - 32.11 ± 1.93 / 63.11 ± 1.61 - 48.87 ± 1.18 / 59.47 ± 0.67 - 66.79 ± 0.20 / 19.88 ± 0.44 - 56.14 ± 2.09 / 67.01 ± 1.58 - 63.54 ± 2.14 / 75.61 ± 1.50 - 39.48 ± 2.00 / 54.53 ± 1.52 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - skole-gpt-mixtral (few-shot) unknown @@ -1423,7 +1423,7 @@ title: Danish NLG 🇩🇰 128 8192 True - 1,007 ± 316 / 162 ± 45 + 1,483 ± 377 / 287 ± 97 2.29 57.74 ± 2.06 / 40.66 ± 2.58 48.43 ± 3.31 / 62.09 ± 3.62 @@ -1492,31 +1492,6 @@ title: Danish NLG 🇩🇰 12.3.2 12.3.2 - - NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot) - 46998 - 68 - 32768 - True - 9,015 ± 2,966 / 1,121 ± 510 - 2.32 - 49.18 ± 2.27 / 32.04 ± 1.45 - 49.76 ± 1.76 / 64.54 ± 1.57 - 41.28 ± 3.88 / 66.46 ± 4.23 - 12.83 ± 5.44 / 40.81 ± 3.18 - 67.44 ± 0.39 / 22.88 ± 0.66 - 77.55 ± 1.15 / 83.10 ± 0.89 - 74.67 ± 1.83 / 82.79 ± 1.28 - 36.42 ± 2.29 / 51.22 ± 1.72 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - mlabonne/AlphaMonarch-7B (few-shot, val) 7242 @@ -1642,6 +1617,31 @@ title: Danish NLG 🇩🇰 12.7.0 12.7.0 + + NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot) + 46998 + 68 + 32768 + True + 9,015 ± 2,966 / 1,121 ± 510 + 2.34 + 49.18 ± 2.27 / 32.04 ± 1.45 + 49.76 ± 1.76 / 64.54 ± 1.57 + 41.28 ± 3.88 / 66.46 ± 4.23 + 12.83 ± 5.44 / 40.81 ± 3.18 + 67.44 ± 0.39 / 22.88 ± 0.66 + 77.55 ± 1.15 / 83.10 ± 0.89 + 74.67 ± 1.83 / 82.79 ± 1.28 + 36.42 ± 2.29 / 51.22 ± 1.72 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + birgermoell/Flashback-Bellman (few-shot, val) 7242 @@ -1873,7 +1873,7 @@ title: Danish NLG 🇩🇰 128 8192 True - 1,335 ± 338 / 260 ± 88 + 1,477 ± 376 / 285 ± 97 2.39 49.46 ± 1.88 / 32.11 ± 2.41 51.16 ± 2.15 / 67.00 ± 1.51 @@ -1974,7 +1974,7 @@ title: Danish NLG 🇩🇰 8192 True 1,378 ± 260 / 387 ± 119 - 2.42 + 2.41 19.59 ± 2.54 / 15.47 ± 2.19 46.55 ± 1.89 / 59.52 ± 3.56 32.64 ± 2.91 / 63.84 ± 1.69 @@ -2324,7 +2324,7 @@ title: Danish NLG 🇩🇰 32768 True 5,054 ± 1,200 / 1,056 ± 339 - 2.54 + 2.53 42.43 ± 3.36 / 29.30 ± 2.53 47.82 ± 2.00 / 63.19 ± 2.09 16.51 ± 2.59 / 52.73 ± 3.91 @@ -2467,6 +2467,31 @@ title: Danish NLG 🇩🇰 10.0.1 9.2.0 + + Mabeck/Heidrun-Mistral-7B-chat (few-shot) + 7242 + 32 + 4096 + False + 1,419 ± 349 / 286 ± 97 + 2.64 + 51.93 ± 1.66 / 34.88 ± 2.57 + 49.86 ± 1.51 / 65.11 ± 1.52 + 20.25 ± 3.13 / 52.74 ± 4.16 + 25.84 ± 5.68 / 35.74 ± 5.40 + 66.28 ± 0.28 / 20.94 ± 0.42 + 42.73 ± 1.93 / 55.91 ± 1.44 + 60.78 ± 2.31 / 73.79 ± 1.60 + 16.39 ± 1.53 / 36.26 ± 1.38 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + meta-llama/Llama-3.2-3B-Instruct (few-shot) 3213 @@ -2492,31 +2517,6 @@ title: Danish NLG 🇩🇰 13.0.0 13.0.0 - - Mabeck/Heidrun-Mistral-7B-chat (few-shot) - 7242 - 32 - 4096 - False - 1,419 ± 349 / 286 ± 97 - 2.65 - 51.93 ± 1.66 / 34.88 ± 2.57 - 49.86 ± 1.51 / 65.11 ± 1.52 - 20.25 ± 3.13 / 52.74 ± 4.16 - 25.84 ± 5.68 / 35.74 ± 5.40 - 66.28 ± 0.28 / 20.94 ± 0.42 - 42.73 ± 1.93 / 55.91 ± 1.44 - 60.78 ± 2.31 / 73.79 ± 1.60 - 16.39 ± 1.53 / 36.26 ± 1.38 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - bineric/NorskGPT-Llama-7B-v0.1 (few-shot) 6738 @@ -2692,6 +2692,31 @@ title: Danish NLG 🇩🇰 13.0.0 13.0.0 + + neuralmagic/Sparse-Llama-3.1-8B-2of4 (few-shot) + 8030 + 128 + 131072 + True + 2,996 ± 817 / 284 ± 96 + 2.77 + 27.60 ± 3.21 / 25.68 ± 2.70 + 37.08 ± 2.51 / 54.37 ± 2.72 + 10.84 ± 2.11 / 49.05 ± 4.51 + 58.67 ± 0.97 / 63.61 ± 0.83 + 63.45 ± 1.07 / 17.77 ± 1.01 + 38.31 ± 1.15 / 53.54 ± 0.91 + 46.65 ± 1.64 / 63.48 ± 1.21 + 28.92 ± 3.15 / 44.79 ± 3.01 + 13.2.0 + 13.2.0 + 13.2.0 + 13.2.0 + 13.2.0 + 13.2.0 + 13.2.0 + 13.2.0 + emillykkejensen/Phi-3-mini-4k-instruct-dansk (few-shot) 3821 @@ -2717,38 +2742,13 @@ title: Danish NLG 🇩🇰 12.10.4 12.10.4 - - neuralmagic/Sparse-Llama-3.1-8B-2of4 (few-shot) - 8030 - 128 - 131072 - True - 2,996 ± 817 / 284 ± 96 - 2.78 - 27.60 ± 3.21 / 25.68 ± 2.70 - 37.08 ± 2.51 / 54.37 ± 2.72 - 10.84 ± 2.11 / 49.05 ± 4.51 - 58.67 ± 0.97 / 63.61 ± 0.83 - 63.45 ± 1.07 / 17.77 ± 1.01 - 38.31 ± 1.15 / 53.54 ± 0.91 - 46.65 ± 1.64 / 63.48 ± 1.21 - 28.92 ± 3.15 / 44.79 ± 3.01 - 13.2.0 - 13.2.0 - 13.2.0 - 13.2.0 - 13.2.0 - 13.2.0 - 13.2.0 - 13.2.0 - microsoft/Phi-3-mini-4k-instruct (few-shot) 3821 32 4096 True - 3,194 ± 687 / 650 ± 216 + 8,681 ± 1,650 / 2,177 ± 717 2.81 41.37 ± 2.50 / 24.64 ± 2.50 42.60 ± 1.06 / 61.52 ± 0.75 @@ -2974,7 +2974,7 @@ title: Danish NLG 🇩🇰 4096 True 2,313 ± 423 / 682 ± 210 - 2.88 + 2.87 48.44 ± 1.69 / 36.99 ± 1.77 39.07 ± 1.03 / 56.85 ± 1.91 9.72 ± 1.58 / 46.85 ± 3.66 @@ -2999,7 +2999,7 @@ title: Danish NLG 🇩🇰 4096 True 1,002 ± 95 / 416 ± 105 - 2.88 + 2.87 48.55 ± 1.75 / 37.15 ± 1.83 39.16 ± 0.95 / 56.83 ± 1.90 10.43 ± 1.91 / 46.99 ± 3.74 @@ -3168,14 +3168,39 @@ title: Danish NLG 🇩🇰 12.6.1 - google/gemma-2-2b (few-shot) - 2614 - 256 - 8193 - True - 5,235 ± 1,226 / 1,154 ± 366 - 3.00 - 17.29 ± 2.84 / 13.87 ± 2.03 + TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot) + 7800 + 100 + 4096 + True + 6,197 ± 1,118 / 1,730 ± 577 + 3.01 + 34.25 ± 2.28 / 30.39 ± 2.14 + 45.67 ± 2.41 / 58.41 ± 3.04 + 10.62 ± 2.37 / 53.20 ± 3.73 + 50.77 ± 2.48 / 56.92 ± 2.42 + 65.67 ± 1.05 / 19.97 ± 0.93 + 2.31 ± 1.92 / 24.65 ± 1.65 + 10.57 ± 2.47 / 40.33 ± 1.67 + 0.64 ± 1.55 / 25.40 ± 0.77 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + + + google/gemma-2-2b (few-shot) + 2614 + 256 + 8193 + True + 5,235 ± 1,226 / 1,154 ± 366 + 3.02 + 17.29 ± 2.84 / 13.87 ± 2.03 34.94 ± 2.71 / 42.58 ± 3.24 6.39 ± 2.41 / 45.03 ± 4.32 54.94 ± 1.00 / 59.95 ± 1.03 @@ -3199,7 +3224,7 @@ title: Danish NLG 🇩🇰 4096 True 4,438 ± 1,128 / 1,028 ± 346 - 3.01 + 3.03 16.72 ± 2.23 / 15.96 ± 2.08 45.89 ± 2.13 / 63.12 ± 1.98 11.25 ± 2.33 / 51.88 ± 2.35 @@ -3217,31 +3242,6 @@ title: Danish NLG 🇩🇰 12.10.5 12.10.5 - - TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot) - 7800 - 100 - 4096 - True - 6,197 ± 1,118 / 1,730 ± 577 - 3.01 - 34.25 ± 2.28 / 30.39 ± 2.14 - 45.67 ± 2.41 / 58.41 ± 3.04 - 10.62 ± 2.37 / 53.20 ± 3.73 - 50.77 ± 2.48 / 56.92 ± 2.42 - 65.67 ± 1.05 / 19.97 ± 0.93 - 2.31 ± 1.92 / 24.65 ± 1.65 - 10.57 ± 2.47 / 40.33 ± 1.67 - 0.64 ± 1.55 / 25.40 ± 0.77 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - NbAiLab/nb-llama-3.1-8B (few-shot) 8030 @@ -3292,6 +3292,31 @@ title: Danish NLG 🇩🇰 13.0.0 13.0.0 + + ibm-granite/granite-3.0-2b-base (few-shot) + 2534 + 49 + 4097 + True + 10,187 ± 2,363 / 2,204 ± 737 + 3.06 + 32.34 ± 3.77 / 24.48 ± 3.17 + 29.50 ± 3.63 / 42.61 ± 4.86 + 3.89 ± 1.49 / 37.29 ± 3.65 + 53.67 ± 0.84 / 59.15 ± 0.69 + 64.48 ± 0.83 / 19.25 ± 0.78 + 24.40 ± 2.55 / 42.44 ± 1.87 + 31.93 ± 1.90 / 54.43 ± 1.22 + 10.00 ± 1.73 / 32.31 ± 0.87 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + LumiOpen/Viking-33B@1000B (few-shot) 33119 @@ -3318,29 +3343,29 @@ title: Danish NLG 🇩🇰 12.9.0 - ibm-granite/granite-3.0-2b-base (few-shot) - 2534 - 49 - 4097 - True - 10,187 ± 2,363 / 2,204 ± 737 - 3.07 - 32.34 ± 3.77 / 24.48 ± 3.17 - 29.50 ± 3.63 / 42.61 ± 4.86 - 3.89 ± 1.49 / 37.29 ± 3.65 - 53.67 ± 0.84 / 59.15 ± 0.69 - 64.48 ± 0.83 / 19.25 ± 0.78 - 24.40 ± 2.55 / 42.44 ± 1.87 - 31.93 ± 1.90 / 54.43 ± 1.22 - 10.00 ± 1.73 / 32.31 ± 0.87 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 + openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot) + 7453 + 251 + 4096 + False + 1,254 ± 328 / 243 ± 83 + 3.11 + 34.66 ± 1.19 / 21.37 ± 1.52 + 21.93 ± 3.72 / 31.67 ± 4.68 + 1.50 ± 1.04 / 33.84 ± 0.24 + 52.36 ± 0.70 / 60.53 ± 0.63 + 64.84 ± 0.14 / 14.90 ± 0.35 + 24.39 ± 2.23 / 40.04 ± 1.96 + 33.37 ± 3.58 / 54.00 ± 2.48 + 13.98 ± 1.55 / 33.88 ± 1.46 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 allenai/OLMo-1.7-7B-hf (few-shot) @@ -3417,31 +3442,6 @@ title: Danish NLG 🇩🇰 11.0.0 11.0.0 - - openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot) - 7453 - 251 - 4096 - False - 1,254 ± 328 / 243 ± 83 - 3.12 - 34.66 ± 1.19 / 21.37 ± 1.52 - 21.93 ± 3.72 / 31.67 ± 4.68 - 1.50 ± 1.04 / 33.84 ± 0.24 - 52.36 ± 0.70 / 60.53 ± 0.63 - 64.84 ± 0.14 / 14.90 ± 0.35 - 24.39 ± 2.23 / 40.04 ± 1.96 - 33.37 ± 3.58 / 54.00 ± 2.48 - 13.98 ± 1.55 / 33.88 ± 1.46 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - ibm-granite/granite-3.0-3b-a800m-instruct (few-shot) 3374 @@ -3449,7 +3449,7 @@ title: Danish NLG 🇩🇰 4096 True 10,246 ± 3,021 / 1,629 ± 550 - 3.14 + 3.13 37.37 ± 2.46 / 26.81 ± 2.24 31.44 ± 1.82 / 48.96 ± 2.35 5.27 ± 2.40 / 40.63 ± 4.68 @@ -3542,31 +3542,6 @@ title: Danish NLG 🇩🇰 13.0.0 13.0.0 - - google/gemma-2b (few-shot) - 2506 - 256 - 8192 - True - 6,087 ± 1,046 / 1,902 ± 563 - 3.19 - 19.97 ± 3.91 / 16.51 ± 3.20 - 40.21 ± 1.00 / 46.73 ± 1.82 - 2.27 ± 2.39 / 38.71 ± 4.03 - 50.55 ± 1.22 / 56.27 ± 1.09 - 63.07 ± 1.07 / 16.98 ± 0.98 - 15.04 ± 1.21 / 35.51 ± 0.80 - 30.63 ± 3.36 / 50.02 ± 2.13 - 4.90 ± 0.95 / 28.18 ± 0.88 - 12.5.2 - 12.1.0 - 12.1.0 - 12.1.0 - 12.6.1 - 12.1.0 - 12.1.0 - 12.1.0 - openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot) 7453 @@ -3592,6 +3567,31 @@ title: Danish NLG 🇩🇰 14.0.4 14.0.4 + + google/gemma-2b (few-shot) + 2506 + 256 + 8192 + True + 6,087 ± 1,046 / 1,902 ± 563 + 3.22 + 19.97 ± 3.91 / 16.51 ± 3.20 + 40.21 ± 1.00 / 46.73 ± 1.82 + 2.27 ± 2.39 / 38.71 ± 4.03 + 50.55 ± 1.22 / 56.27 ± 1.09 + 63.07 ± 1.07 / 16.98 ± 0.98 + 15.04 ± 1.21 / 35.51 ± 0.80 + 30.63 ± 3.36 / 50.02 ± 2.13 + 4.90 ± 0.95 / 28.18 ± 0.88 + 12.5.2 + 12.1.0 + 12.1.0 + 12.1.0 + 12.6.1 + 12.1.0 + 12.1.0 + 12.1.0 + meta-llama/Llama-3.2-1B-Instruct (few-shot) 1236 @@ -3749,7 +3749,7 @@ title: Danish NLG 🇩🇰 4096 True 15,009 ± 4,072 / 2,702 ± 878 - 3.32 + 3.31 19.61 ± 2.68 / 17.44 ± 2.64 37.92 ± 1.74 / 46.23 ± 1.91 2.81 ± 1.13 / 38.15 ± 2.81 @@ -3792,6 +3792,31 @@ title: Danish NLG 🇩🇰 12.10.8 12.10.8 + + NorwAI/NorwAI-Mistral-7B-instruct (few-shot) + 7537 + 68 + 4096 + False + 3,027 ± 503 / 903 ± 296 + 3.34 + 13.78 ± 2.85 / 11.90 ± 2.13 + 42.16 ± 0.68 / 45.21 ± 0.45 + 3.52 ± 1.90 / 39.81 ± 3.18 + 20.02 ± 2.31 / 31.67 ± 2.19 + 65.03 ± 0.55 / 18.00 ± 0.59 + 40.74 ± 1.91 / 53.17 ± 1.96 + 43.43 ± 1.76 / 61.76 ± 1.32 + 4.50 ± 0.94 / 26.55 ± 0.96 + 12.10.5 + 12.10.4 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + LumiOpen/Viking-7B (few-shot) 7550 @@ -3817,31 +3842,6 @@ title: Danish NLG 🇩🇰 12.7.0 12.7.0 - - NorwAI/NorwAI-Mistral-7B-instruct (few-shot) - 7537 - 68 - 4096 - False - 3,027 ± 503 / 903 ± 296 - 3.36 - 13.78 ± 2.85 / 11.90 ± 2.13 - 42.16 ± 0.68 / 45.21 ± 0.45 - 3.52 ± 1.90 / 39.81 ± 3.18 - 20.02 ± 2.31 / 31.67 ± 2.19 - 65.03 ± 0.55 / 18.00 ± 0.59 - 40.74 ± 1.91 / 53.17 ± 1.96 - 43.43 ± 1.76 / 61.76 ± 1.32 - 4.50 ± 0.94 / 26.55 ± 0.96 - 12.10.5 - 12.10.4 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - ibm-granite/granite-7b-instruct (few-shot) 6738 @@ -3849,7 +3849,7 @@ title: Danish NLG 🇩🇰 4096 True 3,136 ± 558 / 942 ± 290 - 3.37 + 3.36 23.87 ± 1.61 / 20.19 ± 1.60 31.21 ± 1.84 / 50.13 ± 2.79 2.04 ± 1.44 / 45.31 ± 3.73 @@ -3874,7 +3874,7 @@ title: Danish NLG 🇩🇰 2048 True 1,473 ± 493 / 448 ± 143 - 3.38 + 3.37 15.35 ± 1.38 / 14.74 ± 1.30 2.85 ± 1.54 / 18.05 ± 0.23 10.99 ± 2.52 / 54.07 ± 1.93 @@ -3917,6 +3917,31 @@ title: Danish NLG 🇩🇰 13.0.0 13.0.0 + + google/gemma-2b-it (few-shot) + 2506 + 256 + 8192 + False + 6,471 ± 1,142 / 1,961 ± 584 + 3.39 + 24.44 ± 2.59 / 17.37 ± 2.03 + 34.03 ± 2.50 / 52.42 ± 2.16 + 2.25 ± 1.28 / 42.33 ± 3.11 + 42.12 ± 1.18 / 49.76 ± 1.22 + 62.41 ± 1.21 / 17.66 ± 0.54 + 15.16 ± 1.35 / 31.61 ± 1.92 + 12.67 ± 1.89 / 41.99 ± 1.17 + 2.67 ± 1.17 / 26.82 ± 0.69 + 12.5.2 + 12.1.0 + 12.1.0 + 12.4.0 + 12.6.1 + 12.1.0 + 12.1.0 + 12.1.0 + mhenrichsen/danskgpt-tiny-chat (few-shot) 1100 @@ -3942,31 +3967,6 @@ title: Danish NLG 🇩🇰 10.0.1 9.1.2 - - google/gemma-2b-it (few-shot) - 2506 - 256 - 8192 - False - 6,471 ± 1,142 / 1,961 ± 584 - 3.40 - 24.44 ± 2.59 / 17.37 ± 2.03 - 34.03 ± 2.50 / 52.42 ± 2.16 - 2.25 ± 1.28 / 42.33 ± 3.11 - 42.12 ± 1.18 / 49.76 ± 1.22 - 62.41 ± 1.21 / 17.66 ± 0.54 - 15.16 ± 1.35 / 31.61 ± 1.92 - 12.67 ± 1.89 / 41.99 ± 1.17 - 2.67 ± 1.17 / 26.82 ± 0.69 - 12.5.2 - 12.1.0 - 12.1.0 - 12.4.0 - 12.6.1 - 12.1.0 - 12.1.0 - 12.1.0 - NLPnorth/snakmodel-7b-base (few-shot) 6738 @@ -4042,31 +4042,6 @@ title: Danish NLG 🇩🇰 13.0.0 13.0.0 - - NbAiLab/nb-llama-3.1-8B-Instruct (few-shot) - 8030 - 128 - 131072 - True - 1,296 ± 335 / 246 ± 84 - 3.45 - 21.87 ± 4.65 / 18.10 ± 3.46 - 24.82 ± 6.87 / 35.81 ± 5.98 - 2.89 ± 2.02 / 41.50 ± 4.74 - 44.86 ± 1.12 / 52.94 ± 1.18 - 63.20 ± 1.64 / 15.14 ± 2.30 - -0.22 ± 1.17 / 23.41 ± 0.46 - 7.84 ± 5.61 / 30.02 ± 10.68 - 0.81 ± 1.19 / 25.18 ± 0.81 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - AI-Sweden-Models/gpt-sw3-6.7b (few-shot) 7111 @@ -4074,7 +4049,7 @@ title: Danish NLG 🇩🇰 2048 True 2,285 ± 443 / 671 ± 205 - 3.46 + 3.45 18.23 ± 5.87 / 14.77 ± 3.36 22.71 ± 5.21 / 35.11 ± 6.59 5.03 ± 2.51 / 49.00 ± 2.64 @@ -4093,29 +4068,29 @@ title: Danish NLG 🇩🇰 11.0.0 - meta-llama/Llama-3.2-1B (few-shot) - 1236 + NbAiLab/nb-llama-3.1-8B-Instruct (few-shot) + 8030 128 - 131073 + 131072 True - 7,577 ± 1,884 / 1,555 ± 492 - 3.48 - 19.82 ± 4.70 / 17.20 ± 3.57 - 35.97 ± 3.00 / 49.88 ± 3.80 - 2.14 ± 2.61 / 44.16 ± 4.48 - 46.59 ± 5.44 / 51.92 ± 6.14 - 58.65 ± 1.07 / 11.57 ± 0.77 - 2.65 ± 1.26 / 25.17 ± 1.16 - 9.51 ± 2.67 / 40.41 ± 1.62 - -0.88 ± 1.13 / 24.79 ± 0.98 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 + 1,296 ± 335 / 246 ± 84 + 3.45 + 21.87 ± 4.65 / 18.10 ± 3.46 + 24.82 ± 6.87 / 35.81 ± 5.98 + 2.89 ± 2.02 / 41.50 ± 4.74 + 44.86 ± 1.12 / 52.94 ± 1.18 + 63.20 ± 1.64 / 15.14 ± 2.30 + -0.22 ± 1.17 / 23.41 ± 0.46 + 7.84 ± 5.61 / 30.02 ± 10.68 + 0.81 ± 1.19 / 25.18 ± 0.81 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 AI-Sweden-Models/gpt-sw3-1.3b-instruct (few-shot) @@ -4124,7 +4099,7 @@ title: Danish NLG 🇩🇰 2048 True 4,544 ± 1,000 / 1,106 ± 359 - 3.49 + 3.48 14.73 ± 1.84 / 14.44 ± 1.74 27.14 ± 1.93 / 42.34 ± 2.51 2.65 ± 1.66 / 40.63 ± 4.02 @@ -4149,7 +4124,7 @@ title: Danish NLG 🇩🇰 4096 True 1,382 ± 337 / 257 ± 91 - 3.52 + 3.51 21.98 ± 3.33 / 18.42 ± 2.62 37.77 ± 3.06 / 55.35 ± 4.51 1.26 ± 1.86 / 34.03 ± 0.86 @@ -4167,6 +4142,31 @@ title: Danish NLG 🇩🇰 12.3.2 12.3.2 + + meta-llama/Llama-3.2-1B (few-shot) + 1236 + 128 + 131073 + True + 7,577 ± 1,884 / 1,555 ± 492 + 3.52 + 19.82 ± 4.70 / 17.20 ± 3.57 + 35.97 ± 3.00 / 49.88 ± 3.80 + 2.14 ± 2.61 / 44.16 ± 4.48 + 46.59 ± 5.44 / 51.92 ± 6.14 + 58.65 ± 1.07 / 11.57 ± 0.77 + 2.65 ± 1.26 / 25.17 ± 1.16 + 9.51 ± 2.67 / 40.41 ± 1.62 + -0.88 ± 1.13 / 24.79 ± 0.98 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + AI-Sweden-Models/gpt-sw3-356m-instruct (few-shot) 471 @@ -4274,7 +4274,7 @@ title: Danish NLG 🇩🇰 32768 False 8,304 ± 1,846 / 1,933 ± 617 - 3.61 + 3.60 18.00 ± 2.52 / 14.88 ± 1.68 26.58 ± 2.81 / 45.88 ± 3.40 0.63 ± 1.48 / 33.42 ± 0.28 @@ -4374,7 +4374,7 @@ title: Danish NLG 🇩🇰 32769 True 2,722 ± 495 / 766 ± 250 - 3.68 + 3.69 17.58 ± 1.95 / 15.48 ± 1.39 10.47 ± 3.28 / 19.91 ± 2.67 1.23 ± 1.53 / 36.92 ± 3.53 @@ -4399,7 +4399,7 @@ title: Danish NLG 🇩🇰 2048 True 5,758 ± 1,348 / 1,215 ± 391 - 3.70 + 3.71 16.13 ± 4.02 / 14.90 ± 3.13 27.61 ± 2.14 / 39.77 ± 1.85 1.96 ± 2.25 / 38.40 ± 2.99 @@ -4424,7 +4424,7 @@ title: Danish NLG 🇩🇰 2048 True 2,519 ± 841 / 323 ± 104 - 3.70 + 3.71 16.17 ± 3.44 / 14.33 ± 1.92 29.12 ± 4.09 / 49.93 ± 4.45 -0.47 ± 0.62 / 33.18 ± 0.28 @@ -4499,7 +4499,7 @@ title: Danish NLG 🇩🇰 4096 True 7,808 ± 2,183 / 1,289 ± 428 - 3.75 + 3.76 16.82 ± 3.53 / 14.28 ± 3.01 17.52 ± 2.86 / 27.96 ± 2.94 1.53 ± 2.06 / 36.59 ± 3.39 @@ -4524,7 +4524,7 @@ title: Danish NLG 🇩🇰 32768 False 11,740 ± 3,000 / 2,209 ± 721 - 3.85 + 3.86 17.38 ± 2.04 / 15.74 ± 1.99 10.72 ± 3.35 / 25.21 ± 3.80 1.32 ± 1.08 / 42.05 ± 3.69 @@ -4549,7 +4549,7 @@ title: Danish NLG 🇩🇰 4096 True 6,513 ± 1,241 / 1,282 ± 644 - 3.87 + 3.88 15.93 ± 3.91 / 14.68 ± 2.81 13.01 ± 2.33 / 28.28 ± 4.63 0.05 ± 1.37 / 40.73 ± 3.78 @@ -4567,6 +4567,31 @@ title: Danish NLG 🇩🇰 14.0.4 14.0.4 + + tiiuae/Falcon3-1B-Instruct (few-shot) + 1669 + 131 + 8192 + True + 9,270 ± 2,690 / 1,434 ± 437 + 3.90 + 20.03 ± 1.60 / 17.52 ± 1.50 + 15.96 ± 2.77 / 38.74 ± 2.88 + 0.86 ± 1.76 / 42.48 ± 3.82 + 28.98 ± 1.93 / 35.38 ± 2.10 + 56.75 ± 0.67 / 10.37 ± 0.34 + 2.15 ± 1.69 / 25.19 ± 0.96 + 2.51 ± 2.17 / 35.61 ± 1.10 + 0.88 ± 0.95 / 25.68 ± 0.52 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + allenai/OLMo-7B-Twin-2T (few-shot) 6888 @@ -4574,7 +4599,7 @@ title: Danish NLG 🇩🇰 2051 True 5,484 ± 1,125 / 1,317 ± 425 - 3.92 + 3.91 7.52 ± 3.92 / 6.60 ± 2.84 18.30 ± 3.89 / 27.62 ± 5.78 3.23 ± 1.94 / 45.74 ± 3.06 @@ -4624,7 +4649,7 @@ title: Danish NLG 🇩🇰 2048 True 7,717 ± 1,553 / 2,013 ± 625 - 3.98 + 3.99 13.98 ± 1.54 / 13.46 ± 1.42 6.37 ± 3.38 / 25.43 ± 4.09 0.41 ± 0.80 / 33.31 ± 0.24 @@ -4649,7 +4674,7 @@ title: Danish NLG 🇩🇰 131072 True 3,424 ± 1,080 / 464 ± 158 - 4.01 + 3.99 9.20 ± 3.54 / 8.11 ± 3.24 32.94 ± 4.62 / 46.54 ± 5.68 1.59 ± 2.72 / 45.39 ± 3.81 @@ -4674,7 +4699,7 @@ title: Danish NLG 🇩🇰 4096 False 10,890 ± 2,686 / 2,186 ± 750 - 4.08 + 4.07 10.12 ± 1.24 / 9.84 ± 1.12 10.65 ± 3.65 / 28.33 ± 5.27 -0.66 ± 1.24 / 33.61 ± 0.26 @@ -4699,7 +4724,7 @@ title: Danish NLG 🇩🇰 8192 True 22,023 ± 6,203 / 3,675 ± 1,231 - 4.10 + 4.09 12.68 ± 1.39 / 12.32 ± 1.19 3.61 ± 2.69 / 19.01 ± 3.95 1.79 ± 0.97 / 36.23 ± 3.19 @@ -4724,7 +4749,7 @@ title: Danish NLG 🇩🇰 8192 True 21,777 ± 6,115 / 3,617 ± 1,211 - 4.17 + 4.19 8.97 ± 3.18 / 8.62 ± 2.72 2.66 ± 2.70 / 16.29 ± 2.34 1.65 ± 1.38 / 44.50 ± 3.21 @@ -4742,31 +4767,6 @@ title: Danish NLG 🇩🇰 13.1.0 13.1.0 - - HuggingFaceTB/SmolLM2-135M-Instruct (few-shot) - 135 - 49 - 8192 - True - 25,602 ± 7,583 / 3,953 ± 1,325 - 4.21 - 12.11 ± 1.07 / 11.48 ± 1.07 - 2.61 ± 3.22 / 18.95 ± 3.93 - 0.25 ± 1.87 / 39.65 ± 4.00 - 14.02 ± 2.56 / 16.74 ± 2.74 - 56.53 ± 0.95 / 8.45 ± 0.77 - 1.05 ± 1.51 / 26.37 ± 0.83 - 10.81 ± 4.60 / 38.73 ± 1.77 - -0.50 ± 0.85 / 24.15 ± 0.67 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - NorwAI/NorwAI-Mistral-7B-pretrain (few-shot) 7537 @@ -4774,7 +4774,7 @@ title: Danish NLG 🇩🇰 4096 True 3,024 ± 496 / 909 ± 301 - 4.22 + 4.20 12.82 ± 2.64 / 12.37 ± 1.95 3.55 ± 3.64 / 22.75 ± 4.02 0.68 ± 1.41 / 35.13 ± 0.98 @@ -4799,7 +4799,7 @@ title: Danish NLG 🇩🇰 2051 True 8,536 ± 1,926 / 1,940 ± 619 - 4.22 + 4.20 13.39 ± 2.60 / 12.39 ± 2.46 17.94 ± 5.58 / 32.80 ± 3.63 -2.02 ± 2.28 / 40.63 ± 4.12 @@ -4824,7 +4824,7 @@ title: Danish NLG 🇩🇰 2048 True 2,331 ± 787 / 301 ± 97 - 4.23 + 4.22 10.59 ± 2.24 / 10.29 ± 1.37 13.31 ± 3.23 / 34.38 ± 3.13 0.52 ± 0.78 / 33.76 ± 0.37 @@ -4842,6 +4842,31 @@ title: Danish NLG 🇩🇰 14.0.4 14.0.4 + + HuggingFaceTB/SmolLM2-135M-Instruct (few-shot) + 135 + 49 + 8192 + True + 25,602 ± 7,583 / 3,953 ± 1,325 + 4.23 + 12.11 ± 1.07 / 11.48 ± 1.07 + 2.61 ± 3.22 / 18.95 ± 3.93 + 0.25 ± 1.87 / 39.65 ± 4.00 + 14.02 ± 2.56 / 16.74 ± 2.74 + 56.53 ± 0.95 / 8.45 ± 0.77 + 1.05 ± 1.51 / 26.37 ± 0.83 + 10.81 ± 4.60 / 38.73 ± 1.77 + -0.50 ± 0.85 / 24.15 ± 0.67 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + NbAiLab/nb-gpt-j-6B-v2 (few-shot) 6051 @@ -4899,7 +4924,7 @@ title: Danish NLG 🇩🇰 2048 True 8,958 ± 1,815 / 2,240 ± 696 - 4.31 + 4.33 3.43 ± 2.66 / 5.56 ± 1.90 9.18 ± 4.25 / 26.36 ± 3.94 -0.22 ± 1.53 / 34.20 ± 0.84 @@ -4924,7 +4949,7 @@ title: Danish NLG 🇩🇰 2048 True 10,242 ± 3,432 / 1,335 ± 484 - 4.32 + 4.33 13.84 ± 1.95 / 13.12 ± 1.60 9.47 ± 3.30 / 25.66 ± 3.36 -0.36 ± 1.60 / 39.52 ± 3.19 @@ -5024,7 +5049,7 @@ title: Danish NLG 🇩🇰 8192 True 26,346 ± 7,812 / 4,082 ± 1,372 - 4.45 + 4.44 13.72 ± 1.83 / 13.41 ± 1.52 3.79 ± 3.11 / 21.06 ± 4.74 -0.45 ± 0.70 / 39.69 ± 4.95 diff --git a/danish-nlu.csv b/danish-nlu.csv index 5f9b4715..2663017d 100644 --- a/danish-nlu.csv +++ b/danish-nlu.csv @@ -10,8 +10,8 @@ danish-foundation-models/encoder-large-v1,355,50,512,True,False,6671,1.41,74.6,5 KennethEnevoldsen/dfm-sentence-encoder-large-1,355,50,512,True,False,6245,1.42,74.99,53.85,75.71,44.85 meta-llama/Llama-3.1-70B-Instruct (few-shot),70554,128,131072,True,False,1409,1.44,68.57,60.52,57.57,54.33 "meta-llama/Meta-Llama-3-70B (few-shot, val)",70554,128,8192,True,False,312,1.44,63.62,60.19,50.07,60.97 +AI-Sweden-Models/roberta-large-1350k,355,50,512,True,False,5744,1.46,75.22,49.94,72.59,48.97 "gpt-4o-2024-05-13 (few-shot, val)",-1,200,128000,True,False,916,1.46,71.15,49.42,64.59,57.35 -AI-Sweden-Models/roberta-large-1350k,355,50,512,True,False,5744,1.47,75.22,49.94,72.59,48.97 ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot),28411,256,4096,True,False,3633,1.48,66.5,58.93,57.27,55.02 ltg/norbert3-large,354,50,508,True,False,5048,1.51,73.62,48.29,71.55,48.59 google/gemma-2-27b-it (few-shot),27227,256,8193,True,False,1516,1.52,59.94,59.06,58.57,57.48 @@ -26,7 +26,7 @@ Qwen/QwQ-32B-Preview (few-shot),32764,152,32768,True,False,2258,1.63,64.66,53.42 google/rembert,576,250,256,True,False,11736,1.66,70.19,50.19,69.72,39.85 CohereForAI/aya-expanse-32b (few-shot),32296,256,8193,False,False,2288,1.72,61.15,56.39,37.75,58.09 FacebookAI/xlm-roberta-large,560,250,512,True,False,17897,1.72,72.74,48.33,57.3,43.57 -KennethEnevoldsen/dfm-sentence-encoder-medium-3,178,120,512,True,False,14050,1.73,71.21,47.55,68.72,38.33 +KennethEnevoldsen/dfm-sentence-encoder-medium-3,178,120,512,True,False,14050,1.72,71.21,47.55,68.72,38.33 NbAiLab/nb-roberta-base-scandi,278,250,512,True,False,15079,1.73,73.28,52.08,67.99,32.39 microsoft/mdeberta-v3-base,279,251,512,True,False,20637,1.74,72.9,43.38,67.05,42.15 "152334H/miqu-1-70b-sf (few-shot, val)",68977,32,32764,True,False,2126,1.76,56.96,55.11,42.64,54.58 @@ -42,8 +42,8 @@ vesteinn/FoBERT,124,50,512,True,False,15623,1.83,69.65,49.18,65.45,32.4 "meta-llama/Meta-Llama-3-70B-Instruct (few-shot, val)",70554,128,8192,True,False,1673,1.84,63.1,53.09,40.98,51.13 vesteinn/ScandiBERT-no-faroese,124,50,512,True,False,15436,1.85,69.79,47.73,68.28,31.9 sentence-transformers/use-cmlm-multilingual,471,501,512,True,False,30231,1.86,69.17,48.03,55.31,42.34 +setu4993/LaBSE,471,501,512,True,False,25418,1.86,71.24,46.5,52.92,40.08 pere/roberta-base-exp-8,278,250,512,True,False,15112,1.87,68.77,49.66,60.13,32.6 -setu4993/LaBSE,471,501,512,True,False,25418,1.87,71.24,46.5,52.92,40.08 CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,1.89,63.63,50.82,35.58,54.33 intfloat/multilingual-e5-large-instruct,560,250,514,True,False,5947,1.9,69.86,55.45,31.14,45.51 pere/roberta-debug-32,278,250,512,True,False,14958,1.9,68.46,50.48,64.34,30.3 @@ -55,7 +55,7 @@ pere/roberta-base-exp-32,278,250,512,True,False,15081,2.0,71.9,51.33,44.45,32.51 "gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,2.01,59.96,56.91,67.13,17.52 mhenrichsen/danskgpt-chat-v2.1 (few-shot),-1,32,32768,True,False,5085,2.01,51.08,54.69,30.95,56.56 pere/roberta-base-exp-32B,278,250,512,True,False,15103,2.01,71.81,47.83,54.99,29.92 -meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1005,2.03,54.7,54.81,32.11,48.87 +meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1473,2.03,54.7,54.81,32.11,48.87 skole-gpt-mixtral (few-shot),-1,32,32768,False,False,3583,2.03,49.17,51.51,32.04,58.52 NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,2.04,51.95,52.11,44.47,43.32 mistralai/Mistral-Nemo-Instruct-2407 (few-shot),12248,131,1024001,True,False,7095,2.05,53.95,48.97,31.78,56.44 @@ -81,46 +81,46 @@ timpal0l/sol (few-shot),10732,32,4096,False,False,3701,2.2,54.91,44.38,21.11,58. cardiffnlp/twitter-xlm-roberta-base,278,250,512,True,False,34475,2.21,70.1,45.3,51.74,22.01 CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,2.22,51.32,52.0,18.48,52.43 facebook/xlm-v-base,778,902,512,True,False,25396,2.22,71.42,31.86,52.95,34.66 -meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1007,2.22,57.74,48.43,27.12,46.76 +meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1483,2.22,57.74,48.43,27.12,46.76 utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.22,34.0,53.97,32.21,57.1 "claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,2.23,59.48,56.46,20.57,38.23 emillykkejensen/Llama-3-8B-instruct-dansk (few-shot),8030,128,4096,False,False,967,2.23,41.74,50.07,24.49,59.29 mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot),341029,256,4096,True,False,1904,2.24,48.24,39.52,62.92,36.92 "mlabonne/NeuralBeagle14-7B (few-shot, val)",7242,32,8192,False,True,2549,2.24,53.02,51.29,19.73,51.69 "timpal0l/BeagleCatMunin2 (few-shot, val)",7242,32,32768,False,True,2477,2.25,51.53,47.95,14.1,58.28 -meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1335,2.26,49.46,51.16,23.01,49.75 +meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1477,2.26,49.46,51.16,23.01,49.75 Maltehb/danish-bert-botxo,111,32,512,True,False,16091,2.27,66.71,43.79,45.96,26.29 meta-llama/Llama-3.1-8B (few-shot),8030,128,131072,True,False,2986,2.27,50.92,47.86,29.19,48.38 +sarnikowski/convbert-medium-small-da-cased,24,29,512,True,False,13821,2.27,64.28,36.85,63.55,24.52 "AI-Sweden-Models/tyr (few-shot, val)",7242,32,32768,False,True,6079,2.28,47.01,50.6,13.73,56.35 -sarnikowski/convbert-medium-small-da-cased,24,29,512,True,False,13821,2.28,64.28,36.85,63.55,24.52 +"merge-crew/da-sv-ties (few-shot, val)",7242,32,32768,True,True,2457,2.28,45.39,51.95,13.25,58.51 CohereForAI/c4ai-command-r-v01 (few-shot),34981,256,8192,False,False,1919,2.29,53.44,49.17,20.55,51.7 "birgermoell/Flashback-Bellman (few-shot, val)",7242,32,32768,False,True,2887,2.29,47.71,48.21,19.55,56.46 -"merge-crew/da-sv-ties (few-shot, val)",7242,32,32768,True,True,2457,2.29,45.39,51.95,13.25,58.51 mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,2.29,55.49,49.18,7.4,57.72 timpal0l/Llama-3-8B-flashback-v1 (few-shot),8030,128,8192,True,False,3004,2.29,46.59,50.25,14.46,56.86 +Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,2.3,51.94,51.97,29.99,38.99 jannikskytt/MeDa-Bert,111,32,511,True,False,16114,2.3,64.64,44.62,47.47,23.14 -Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,2.31,51.94,51.97,29.99,38.99 +"RJuro/munin-neuralbeagle-SkoleGPTOpenOrca-7b (few-shot, val)",7242,32,32768,False,True,3008,2.31,50.83,43.41,19.72,57.87 +ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4097,True,False,2515,2.31,44.58,47.16,19.2,58.41 senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.31,52.61,49.81,19.64,48.03 -"RJuro/munin-neuralbeagle-SkoleGPTOpenOrca-7b (few-shot, val)",7242,32,32768,False,True,3008,2.32,50.83,43.41,19.72,57.87 danish-foundation-models/encoder-medium-v1,111,32,512,True,False,16130,2.32,63.42,39.91,51.01,25.76 google-bert/bert-base-multilingual-uncased,167,106,512,True,False,13993,2.32,64.92,33.5,46.75,37.09 -ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4097,True,False,2515,2.32,44.58,47.16,19.2,58.41 "merge-crew/da-sv-dare-ties-density-0.6 (few-shot, val)",7242,32,32768,True,True,2515,2.33,46.03,49.59,12.72,57.03 Geotrend/bert-base-en-fr-de-no-da-cased,118,42,512,True,False,13973,2.34,63.38,34.78,41.08,40.32 microsoft/infoxlm-large,560,250,512,True,False,6696,2.34,74.42,37.94,15.26,44.25 +NLPnorth/snakmodel-7b-instruct (few-shot),6738,32,4096,False,False,3028,2.36,35.82,50.46,18.6,58.57 neph1/bellman-7b-mistral-instruct-v0.2 (few-shot),7242,32,32768,False,False,2518,2.36,46.11,47.58,18.41,52.78 Geotrend/bert-base-en-no-cased,111,33,512,True,False,14081,2.37,62.66,33.91,40.96,39.93 "KennethEnevoldsen/munin_mistral-7b (few-shot, val)",7242,32,32768,False,True,2543,2.37,46.7,47.52,8.04,60.05 -NLPnorth/snakmodel-7b-instruct (few-shot),6738,32,4096,False,False,3028,2.37,35.82,50.46,18.6,58.57 AI-Sweden-Models/Llama-3-8B (few-shot),8030,128,8192,True,False,4141,2.38,36.72,46.48,26.1,58.0 timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,2.38,42.43,47.82,16.51,56.95 CohereForAI/aya-23-8B (few-shot),8028,256,8192,False,False,2707,2.39,47.08,47.16,8.41,58.83 alpindale/Mistral-7B-v0.2-hf (few-shot),7242,32,32768,True,False,1841,2.39,43.65,45.86,15.19,59.14 mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,2.39,43.6,45.92,15.43,59.13 +timpal0l/njord-alpha (few-shot),7242,32,32768,True,False,5431,2.39,38.25,39.37,29.76,57.02 ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,2.4,44.92,49.31,10.14,57.34 meta-llama/Llama-2-13b-chat-hf (few-shot),13016,32,4096,True,False,2849,2.4,44.0,45.41,16.17,57.06 mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,2.4,44.89,48.09,19.06,51.56 -timpal0l/njord-alpha (few-shot),7242,32,32768,True,False,5431,2.4,38.25,39.37,29.76,57.02 AI-Nordics/bert-large-swedish-cased,335,31,512,True,False,7199,2.42,60.66,38.46,32.29,37.68 ThatsGroes/munin-SkoleGPTOpenOrca-7b-16bit (few-shot),7242,32,32768,False,False,3006,2.42,45.37,39.63,21.77,58.28 "mlabonne/AlphaMonarch-7B (few-shot, val)",7242,32,8192,False,True,5340,2.42,52.72,49.11,16.09,46.28 @@ -130,22 +130,22 @@ Geotrend/bert-base-en-da-cased,111,33,512,True,False,14062,2.45,62.57,33.67,35.7 sentence-transformers/paraphrase-xlm-r-multilingual-v1,278,250,512,True,False,20154,2.45,61.17,46.39,38.61,19.9 Mabeck/Heidrun-Mistral-7B-base (few-shot),7242,32,32768,True,False,3823,2.46,40.14,39.38,21.85,58.07 google/gemma-7b (few-shot),8538,256,8192,True,False,1378,2.46,19.59,46.55,32.64,59.4 +mistralai/Mistral-7B-v0.1 (few-shot),7242,32,32768,True,False,1446,2.46,45.42,43.16,8.79,59.43 KBLab/megatron-bert-large-swedish-cased-110k,370,64,512,True,False,7075,2.47,60.18,39.2,26.68,39.34 "birgermoell/Munin-NeuralBeagle-NorskGPT (few-shot, val)",7242,32,32768,False,True,2903,2.47,51.85,44.02,1.22,57.69 "birgermoell/WestLake-Munin-Cat-NorskGPT (few-shot, val)",7242,32,32768,False,True,2856,2.47,51.85,44.02,1.22,57.69 -mistralai/Mistral-7B-v0.1 (few-shot),7242,32,32768,True,False,1446,2.47,45.42,43.16,8.79,59.43 danish-foundation-models/munin-7b-v0.1dev0 (few-shot),7242,32,8192,True,False,6113,2.48,39.12,36.47,26.76,58.75 microsoft/xlm-align-base,278,250,512,True,False,14744,2.48,70.36,47.83,11.87,29.87 RuterNorway/Llama-2-13b-chat-norwegian (few-shot),-1,32,4096,False,False,3254,2.49,43.17,43.4,11.08,56.81 bineric/NorskGPT-Llama3-8b (few-shot),8030,128,8192,False,False,3382,2.49,51.4,42.13,3.91,57.81 +danish-foundation-models/munin-7b-alpha (few-shot),7242,32,32768,True,False,6116,2.49,40.6,36.89,26.41,57.81 google-bert/bert-base-multilingual-cased,178,120,512,True,False,14083,2.49,63.17,32.38,27.93,39.57 -danish-foundation-models/munin-7b-alpha (few-shot),7242,32,32768,True,False,6116,2.5,40.6,36.89,26.41,57.81 flax-community/nordic-roberta-wiki,125,50,512,True,False,16227,2.5,60.82,34.45,41.89,26.83 jonfd/electra-small-nordic,22,96,128,True,False,5989,2.5,65.4,34.43,67.27,6.6 +mhenrichsen/hestenettetLM (few-shot),7242,32,32768,True,False,1151,2.5,44.9,42.61,8.65,59.62 +Geotrend/bert-base-25lang-cased,151,85,512,True,False,13908,2.51,62.53,32.88,29.01,39.51 Geotrend/bert-base-da-cased,104,23,512,True,False,15432,2.51,62.76,32.06,30.95,37.79 -mhenrichsen/hestenettetLM (few-shot),7242,32,32768,True,False,1151,2.51,44.9,42.61,8.65,59.62 sentence-transformers/paraphrase-multilingual-mpnet-base-v2,278,250,512,True,False,15100,2.51,61.18,49.13,29.66,19.99 -Geotrend/bert-base-25lang-cased,151,85,512,True,False,13908,2.52,62.53,32.88,29.01,39.51 Twitter/twhin-bert-base,279,250,512,True,False,11514,2.52,60.01,42.17,29.43,29.79 jhu-clsp/bernice,278,250,512,True,False,5567,2.52,61.98,47.2,40.52,13.53 sarnikowski/electra-small-discriminator-da-256-cased,13,29,512,True,False,20340,2.52,60.63,24.38,68.58,21.03 @@ -169,31 +169,31 @@ sentence-transformers/stsb-xlm-r-multilingual,278,250,512,True,False,15040,2.63, KBLab/megatron-bert-base-swedish-cased-600k,135,64,512,True,False,15726,2.64,57.97,39.4,23.5,31.87 LumiOpen/Viking-33B@1000B (few-shot),33119,131,4099,True,False,2080,2.64,34.22,45.05,9.4,54.92 ibm-granite/granite-3.0-2b-instruct (few-shot),2634,49,4097,True,False,10194,2.64,41.79,41.86,11.86,51.97 +meta-llama/Llama-3.2-3B (few-shot),3213,128,131073,True,False,3713,2.64,41.13,38.9,9.6,56.85 Maltehb/aelaectra-danish-electra-small-cased,14,32,128,True,False,4593,2.65,63.31,32.72,67.74,0.0 Maltehb/aelaectra-danish-electra-small-uncased,14,32,128,True,False,5995,2.65,62.52,34.45,65.15,2.51 bineric/NorskGPT-Llama-7B-v0.1 (few-shot),6738,32,4096,False,False,5384,2.65,41.63,47.73,0.0,54.25 ibm-granite/granite-8b-code-base-4k (few-shot),8055,49,4096,True,False,2313,2.65,48.44,39.07,9.72,51.18 ibm-granite/granite-8b-code-base (few-shot),8055,49,4096,True,False,1002,2.65,48.55,39.16,10.43,51.28 -meta-llama/Llama-3.2-3B (few-shot),3213,128,131073,True,False,3713,2.65,41.13,38.9,9.6,56.85 Geotrend/distilbert-base-en-da-cased,69,33,512,True,False,26196,2.66,59.5,31.89,36.0,28.41 Geotrend/distilbert-base-en-fr-de-no-da-cased,76,42,512,True,False,26081,2.66,58.78,31.3,34.92,27.86 TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot),7800,100,4096,True,False,6197,2.66,34.25,45.67,10.62,50.77 +occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,2.66,40.19,42.31,1.14,57.89 AI-Sweden-Models/gpt-sw3-40b (few-shot),39927,64,2048,True,False,409,2.67,26.57,47.81,11.13,53.78 DDSC/roberta-base-scandinavian,125,50,512,True,False,14491,2.67,43.9,44.48,30.37,28.89 Twitter/twhin-bert-large,561,250,512,True,False,9707,2.67,66.39,39.36,7.06,33.88 +google/gemma-2-2b-it (few-shot),2614,256,8193,True,False,5374,2.67,28.22,47.11,19.99,48.0 +ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,2.67,45.9,37.11,11.7,50.11 norallm/normistral-7b-warm-instruct (few-shot),-1,33,2048,True,False,6194,2.67,39.83,47.48,4.55,49.23 -occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,2.67,40.19,42.31,1.14,57.89 -google/gemma-2-2b-it (few-shot),2614,256,8193,True,False,5374,2.68,28.22,47.11,19.99,48.0 -ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,2.68,45.9,37.11,11.7,50.11 +Qwen/Qwen1.5-4B-Chat (few-shot),3950,152,32768,False,False,4347,2.68,35.96,42.04,8.65,53.68 "merge-crew/da-sv-dare-ties-density-0.3 (few-shot, val)",7242,32,32768,True,True,2461,2.68,30.16,48.49,5.52,52.44 occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,2.68,37.93,44.62,0.28,58.05 Geotrend/distilbert-base-25lang-cased,109,85,512,True,False,26099,2.69,58.44,31.81,34.13,27.6 Geotrend/distilbert-base-da-cased,61,23,512,True,False,28950,2.69,58.36,32.13,34.75,27.5 -Qwen/Qwen1.5-4B-Chat (few-shot),3950,152,32768,False,False,4347,2.69,35.96,42.04,8.65,53.68 distilbert/distilbert-base-multilingual-cased,135,120,512,True,False,26355,2.69,58.12,32.53,35.53,28.19 Geotrend/distilbert-base-en-no-cased,69,33,512,True,False,26597,2.71,57.53,32.95,33.63,27.21 NorwAI/NorwAI-Mistral-7B (few-shot),7537,68,4096,True,False,3035,2.72,21.47,48.39,12.46,52.51 -microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,3194,2.72,41.37,42.6,6.52,50.57 +microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,8681,2.72,41.37,42.6,6.52,50.57 meta-llama/Llama-2-13b-hf (few-shot),13016,32,4096,True,False,2898,2.73,41.28,23.01,23.5,60.29 sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2,118,250,512,True,False,29201,2.73,56.75,44.48,26.74,17.89 timpal0l/Mistral-7B-v0.1-flashback-v2-instruct (few-shot),7242,32,32768,False,False,5172,2.73,37.02,40.65,7.48,52.71 @@ -201,11 +201,11 @@ meta-llama/Llama-2-7b-hf (few-shot),6738,32,4096,True,False,930,2.74,31.77,43.91 LumiOpen/Viking-13B (few-shot),14030,131,4097,True,False,840,2.76,28.6,48.71,2.3,53.85 neuralmagic/Sparse-Llama-3.1-8B-2of4 (few-shot),8030,128,131072,True,False,2996,2.79,27.6,37.08,10.84,58.67 KBLab/megatron-bert-base-swedish-cased-125k,135,64,512,True,False,15763,2.81,53.93,36.31,23.46,27.85 -norallm/normistral-7b-warm (few-shot),7248,33,2048,True,False,3175,2.82,37.8,40.51,3.35,49.08 +norallm/normistral-7b-warm (few-shot),7248,33,2048,True,False,3175,2.81,37.8,40.51,3.35,49.08 flax-community/swe-roberta-wiki-oscar,125,50,512,True,False,15437,2.83,55.98,36.66,22.69,24.81 Qwen/Qwen1.5-4B (few-shot),3950,152,32768,True,False,3248,2.84,32.28,39.62,5.38,54.16 +google/gemma-7b-it (few-shot),8538,256,8192,False,False,1792,2.85,43.83,29.21,12.96,49.76 NorwAI/NorwAI-Llama2-7B (few-shot),7033,68,4096,True,False,4438,2.86,16.72,45.89,11.25,53.17 -google/gemma-7b-it (few-shot),8538,256,8192,False,False,1792,2.86,43.83,29.21,12.96,49.76 mideind/IceBERT-xlmr-ic3,278,250,512,True,False,11004,2.86,58.49,37.47,6.71,30.6 HPLT/gpt-33b-nordic-prerelease (few-shot),33119,131,4099,True,False,501,2.9,25.35,44.7,1.43,52.29 ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,2.92,38.62,35.47,5.07,45.21 @@ -216,9 +216,9 @@ ltg/norbert3-xs,15,50,508,True,False,14208,2.97,59.94,39.16,2.16,24.69 stabilityai/stablelm-2-1_6b (few-shot),1645,100,4096,True,False,7259,2.97,28.45,39.09,1.43,51.67 AI-Sweden-Models/gpt-sw3-20b (few-shot),20918,64,2048,True,False,1875,2.98,27.41,30.23,11.34,52.8 allenai/OLMo-1.7-7B-hf (few-shot),6888,50,4096,True,False,3371,2.98,33.8,31.57,2.76,54.2 +ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,2.98,37.37,31.44,5.27,48.41 Addedk/mbert-swedish-distilled-cased,135,120,512,True,False,26091,2.99,56.36,31.16,21.08,19.63 HPLT/gpt-13b-nordic-prerelease (few-shot),14030,131,4099,True,False,3520,2.99,28.72,37.19,2.96,49.53 -ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,2.99,37.37,31.44,5.27,48.41 meta-llama/Llama-3.2-1B-Instruct (few-shot),1236,128,131073,False,False,7436,2.99,35.45,36.94,1.12,44.61 ibm-granite/granite-3.0-2b-base (few-shot),2534,49,4097,True,False,10187,3.0,32.34,29.5,3.89,53.67 Addedk/kbbert-distilled-cased,82,50,512,True,False,29698,3.01,57.84,31.18,13.25,22.73 @@ -228,32 +228,32 @@ MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,3.04,28.18,29 birgermoell/roberta-swedish-scandi,125,50,512,True,False,15385,3.05,49.22,33.51,12.08,24.49 google/gemma-2-2b (few-shot),2614,256,8193,True,False,5235,3.05,17.29,34.94,6.39,54.94 google/gemma-2b (few-shot),2506,256,8192,True,False,6087,3.07,19.97,40.21,2.27,50.55 +utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,3.09,19.61,37.92,2.81,50.05 HPLT/gpt-7b-nordic-prerelease (few-shot),7550,131,4096,True,False,1382,3.1,21.98,37.77,1.26,46.03 -utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,3.1,19.61,37.92,2.81,50.05 sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking,135,120,512,True,False,33753,3.11,54.48,36.6,8.84,15.42 sentence-transformers/quora-distilbert-multilingual,135,120,512,True,False,26458,3.11,54.48,36.6,8.84,13.97 +AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot),20918,64,2048,True,False,1831,3.15,15.94,32.78,7.86,52.16 ibm-granite/granite-7b-base (few-shot),6738,32,2048,True,False,4405,3.15,24.93,31.65,0.06,51.47 openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,3.15,34.66,21.93,1.5,52.36 -AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot),20918,64,2048,True,False,1831,3.16,15.94,32.78,7.86,52.16 meta-llama/Llama-3.2-1B (few-shot),1236,128,131073,True,False,7577,3.17,19.82,35.97,2.14,46.59 -ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,3.19,23.87,31.21,2.04,47.36 +ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,3.18,23.87,31.21,2.04,47.36 microsoft/Phi-3-mini-128k-instruct (few-shot),3821,32,131072,True,False,7312,3.19,4.51,40.85,5.43,51.76 EuropeanParliament/EUBERT,94,66,512,True,False,20070,3.21,41.09,27.33,21.58,20.68 allenai/OLMo-7B (few-shot),6888,50,2051,True,False,5403,3.21,26.76,30.76,0.55,45.65 01-ai/Yi-1.5-6B (few-shot),6061,64,4097,True,False,2867,3.22,35.21,12.73,4.75,55.95 google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,3.23,24.44,34.03,2.25,42.12 +mhenrichsen/danskgpt-tiny-chat (few-shot),1100,32,2048,False,False,1745,3.25,22.31,34.05,0.7,41.82 dbmdz/bert-medium-historic-multilingual-cased,42,32,512,True,False,24291,3.26,49.88,27.93,5.42,22.93 -mhenrichsen/danskgpt-tiny-chat (few-shot),1100,32,2048,False,False,1745,3.26,22.31,34.05,0.7,41.82 dbmdz/bert-base-historic-multilingual-cased,111,32,512,True,False,20047,3.27,47.61,24.17,8.14,25.19 AI-Sweden-Models/gpt-sw3-6.7b-v2 (few-shot),7111,64,2048,True,False,2351,3.29,20.84,18.07,10.54,51.22 +NbAiLab/nb-llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1296,3.34,21.87,24.82,2.89,44.86 AI-Sweden-Models/gpt-sw3-6.7b (few-shot),7111,64,2048,True,False,2285,3.35,18.23,22.71,5.03,49.11 -NbAiLab/nb-llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1296,3.35,21.87,24.82,2.89,44.86 -NbAiLab/nb-llama-3.1-8B (few-shot),8030,128,131072,True,False,1297,3.36,44.83,37.14,10.13,8.09 +NbAiLab/nb-llama-3.1-8B (few-shot),8030,128,131072,True,False,1297,3.35,44.83,37.14,10.13,8.09 openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.37,29.49,13.77,0.0,51.53 HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,3.39,29.44,18.49,1.73,44.39 PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,3.39,28.3,28.95,0.2,36.39 +ibm-granite/granite-3.0-1b-a400m-instruct (few-shot),1335,49,4096,True,False,7964,3.39,30.63,22.35,1.95,37.3 AI-Sweden-Models/gpt-sw3-1.3b-instruct (few-shot),1445,64,2048,True,False,4544,3.4,14.73,27.14,2.65,46.38 -ibm-granite/granite-3.0-1b-a400m-instruct (few-shot),1335,49,4096,True,False,7964,3.4,30.63,22.35,1.95,37.3 Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,3.41,18.0,26.58,0.63,41.66 AI-Sweden-Models/gpt-sw3-356m-instruct (few-shot),471,64,2048,True,False,5855,3.42,11.28,34.94,2.08,36.59 sentence-transformers/distiluse-base-multilingual-cased-v1,135,120,512,True,False,34042,3.43,46.78,27.78,3.04,15.52 @@ -261,8 +261,8 @@ NLPnorth/snakmodel-7b-base (few-shot),6738,32,4096,True,False,3083,3.44,23.74,44 AI-Sweden-Models/gpt-sw3-1.3b (few-shot),1445,64,2048,True,False,4608,3.48,8.8,28.65,2.84,45.34 Qwen/Qwen1.5-1.8B (few-shot),1837,152,32768,True,False,5666,3.49,9.83,29.03,0.56,46.43 NbAiLab/nb-gpt-j-6B-alpaca (few-shot),6055,50,1024,False,False,2607,3.5,12.95,27.68,1.65,38.6 +NorwAI/NorwAI-Mistral-7B-instruct (few-shot),7537,68,4096,False,False,3027,3.5,13.78,42.16,3.52,20.02 ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,3.5,31.8,6.85,0.97,49.83 -NorwAI/NorwAI-Mistral-7B-instruct (few-shot),7537,68,4096,False,False,3027,3.51,13.78,42.16,3.52,20.02 AI-Sweden-Models/gpt-sw3-356m (few-shot),471,64,2048,True,False,5758,3.54,16.13,27.61,1.96,34.79 PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,3.58,16.17,29.12,-0.47,34.8 allenai/OLMo-7B-Twin-2T (few-shot),6888,50,2051,True,False,5484,3.59,7.52,18.3,3.23,46.35 @@ -273,18 +273,19 @@ mhenrichsen/danskgpt-tiny (few-shot),1100,32,2048,True,False,8597,3.64,14.13,26. AI-Sweden-Models/gpt-sw3-6.7b-v2-instruct (few-shot),7111,64,2048,True,False,1473,3.66,15.35,2.85,10.99,50.51 ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,3.66,16.82,17.52,1.53,40.21 KBLab/albert-base-swedish-cased-alpha,14,50,512,True,False,15925,3.69,29.9,19.79,6.15,15.96 +jannesg/bertsson,124,50,512,True,False,15314,3.71,32.63,24.11,2.91,15.37 state-spaces/mamba-2.8b-hf (few-shot),2768,50,32769,True,False,2722,3.71,17.58,10.47,1.23,42.56 -jannesg/bertsson,124,50,512,True,False,15314,3.72,32.63,24.11,2.91,15.37 -NbAiLab/nb-llama-3.2-1B (few-shot),1236,128,131072,True,False,3424,3.74,9.2,32.94,1.59,23.1 +NbAiLab/nb-llama-3.2-1B (few-shot),1236,128,131072,True,False,3424,3.73,9.2,32.94,1.59,23.1 sentence-transformers/distiluse-base-multilingual-cased-v2,135,120,512,True,False,33247,3.74,26.96,30.13,2.01,8.22 sentence-transformers/distiluse-base-multilingual-cased,135,120,512,True,False,19206,3.74,26.96,30.13,2.01,8.25 +tiiuae/Falcon3-1B-Instruct (few-shot),1669,131,8192,True,False,9270,3.79,20.03,15.96,0.86,28.98 PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.8,15.93,13.01,0.05,36.85 3ebdola/Dialectal-Arabic-XLM-R-Base,278,250,512,True,False,12783,3.82,36.51,22.07,1.63,3.09 Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,3.82,17.38,10.72,1.32,34.58 Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,3.88,19.01,8.88,0.66,32.78 -allenai/OLMo-1B (few-shot),1177,50,2051,True,False,8536,3.9,13.39,17.94,-2.02,23.65 +allenai/OLMo-1B (few-shot),1177,50,2051,True,False,8536,3.89,13.39,17.94,-2.02,23.65 dbmdz/bert-tiny-historic-multilingual-cased,5,32,512,True,False,78027,3.92,33.62,20.71,1.19,4.19 -PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.06,13.84,9.47,-0.36,22.1 +PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.05,13.84,9.47,-0.36,22.1 RuterNorway/Llama-2-7b-chat-norwegian (few-shot),-1,32,4096,False,False,10890,4.06,10.12,10.65,-0.66,26.08 RabotaRu/HRBert-mini,80,200,512,True,False,54951,4.08,22.21,20.33,0.9,2.73 HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.12,12.68,3.61,1.79,28.12 @@ -292,18 +293,18 @@ AI-Sweden-Models/gpt-sw3-126m-instruct (few-shot),186,64,2048,True,False,7717,4. NbAiLab/nb-gpt-j-6B-v2 (few-shot),6051,50,1024,False,False,2556,4.15,0.24,27.8,0.56,6.84 PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.15,10.59,13.31,0.52,16.61 HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.2,8.97,2.66,1.65,24.92 -fresh-xlm-roberta-base,278,250,512,True,False,2214,4.23,16.04,17.37,1.34,1.58 +fresh-xlm-roberta-base,278,250,512,True,False,2214,4.22,16.04,17.37,1.34,1.58 AI-Sweden-Models/gpt-sw3-126m (few-shot),186,64,2048,True,False,8958,4.25,3.43,9.18,-0.22,16.64 NorwAI/NorwAI-Mistral-7B-pretrain (few-shot),7537,68,4096,True,False,3024,4.25,12.82,3.55,0.68,19.85 -fresh-electra-small,14,31,512,True,False,7840,4.28,12.87,18.61,0.3,0.0 -HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.32,13.72,3.79,-0.45,14.69 +fresh-electra-small,14,31,512,True,False,7840,4.27,12.87,18.61,0.3,0.0 +HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.31,13.72,3.79,-0.45,14.69 HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.33,12.11,2.61,0.25,14.02 NbAiLab/nb-gpt-j-6B@sharded (few-shot),-1,50,1024,True,False,2630,4.43,0.36,11.0,-0.11,5.15 RJuro/kanelsnegl-v0.1 (few-shot),7242,32,512,True,False,5847,4.57,0.0,13.0,0.0,0.0 NbAiLab/nb-llama-3.2-3B (few-shot),3213,128,131072,True,False,1880,4.58,0.06,8.71,2.9,1.4 ai-forever/mGPT (few-shot),-1,100,1024,True,False,11734,4.68,0.65,2.61,-0.73,1.99 NorGLM/NorGPT-369M (few-shot),-1,64,1024,True,False,19896,4.71,1.13,2.06,-0.36,0.32 -RJuro/kanelsnegl-v0.2 (few-shot),7242,32,512,True,False,1373,4.72,0.0,4.81,0.0,0.0 +RJuro/kanelsnegl-v0.2 (few-shot),7242,32,512,True,False,1373,4.71,0.0,4.81,0.0,0.0 peter-sk/gpt-neox-da (few-shot),1515,50,1024,True,False,6025,4.78,0.64,-0.52,-0.02,0.48 ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.79,0.0,0.0,0.0,0.02 Sigurdur/icebreaker (few-shot),110,32,1024,False,False,48619,4.8,0.0,0.0,0.0,0.0 diff --git a/danish-nlu.md b/danish-nlu.md index b1c01184..e6babea6 100644 --- a/danish-nlu.md +++ b/danish-nlu.md @@ -3,7 +3,7 @@ layout: leaderboard title: Danish NLU 🇩🇰 --- -
Last updated: 10/01/2025 12:29:52 CET
+
Last updated: 11/01/2025 11:03:01 CET
@@ -221,6 +221,23 @@ title: Danish NLU 🇩🇰 12.7.0 12.7.0 + + AI-Sweden-Models/roberta-large-1350k + 355 + 50 + 512 + True + 5,744 ± 969 / 1,539 ± 492 + 1.46 + 75.22 ± 1.64 / 71.57 ± 1.50 + 49.94 ± 3.25 / 65.66 ± 2.73 + 72.59 ± 1.38 / 85.58 ± 0.97 + 48.97 ± 1.22 / 54.79 ± 1.18 + 10.0.1 + 10.0.1 + 10.0.1 + 10.0.1 + gpt-4o-2024-05-13 (few-shot, val) unknown @@ -238,23 +255,6 @@ title: Danish NLU 🇩🇰 12.10.2 12.10.0 - - AI-Sweden-Models/roberta-large-1350k - 355 - 50 - 512 - True - 5,744 ± 969 / 1,539 ± 492 - 1.47 - 75.22 ± 1.64 / 71.57 ± 1.50 - 49.94 ± 3.25 / 65.66 ± 2.73 - 72.59 ± 1.38 / 85.58 ± 0.97 - 48.97 ± 1.22 / 54.79 ± 1.18 - 10.0.1 - 10.0.1 - 10.0.1 - 10.0.1 - ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot) 28411 @@ -500,7 +500,7 @@ title: Danish NLU 🇩🇰 512 True 14,050 ± 3,278 / 2,749 ± 894 - 1.73 + 1.72 71.21 ± 1.46 / 67.27 ± 1.78 47.55 ± 1.25 / 64.66 ± 0.96 68.72 ± 1.40 / 83.85 ± 0.85 @@ -766,34 +766,34 @@ title: Danish NLU 🇩🇰 0.0.0 - pere/roberta-base-exp-8 - 278 - 250 + setu4993/LaBSE + 471 + 501 512 True - 15,112 ± 2,969 / 3,347 ± 1,093 - 1.87 - 68.77 ± 2.07 / 64.77 ± 1.86 - 49.66 ± 0.99 / 66.21 ± 0.72 - 60.13 ± 13.57 / 78.92 ± 6.96 - 32.60 ± 0.75 / 37.37 ± 0.72 + 25,418 ± 6,435 / 4,536 ± 1,452 + 1.86 + 71.24 ± 1.63 / 66.41 ± 1.64 + 46.50 ± 1.57 / 64.31 ± 1.21 + 52.92 ± 4.42 / 75.11 ± 3.22 + 40.08 ± 1.22 / 45.40 ± 1.14 0.0.0 0.0.0 0.0.0 0.0.0 - setu4993/LaBSE - 471 - 501 + pere/roberta-base-exp-8 + 278 + 250 512 True - 25,418 ± 6,435 / 4,536 ± 1,452 + 15,112 ± 2,969 / 3,347 ± 1,093 1.87 - 71.24 ± 1.63 / 66.41 ± 1.64 - 46.50 ± 1.57 / 64.31 ± 1.21 - 52.92 ± 4.42 / 75.11 ± 3.22 - 40.08 ± 1.22 / 45.40 ± 1.14 + 68.77 ± 2.07 / 64.77 ± 1.86 + 49.66 ± 0.99 / 66.21 ± 0.72 + 60.13 ± 13.57 / 78.92 ± 6.96 + 32.60 ± 0.75 / 37.37 ± 0.72 0.0.0 0.0.0 0.0.0 @@ -992,7 +992,7 @@ title: Danish NLU 🇩🇰 128 131072 True - 1,005 ± 330 / 196 ± 74 + 1,473 ± 377 / 283 ± 96 2.03 54.70 ± 1.69 / 38.11 ± 2.31 54.81 ± 1.51 / 67.88 ± 1.39 @@ -1434,7 +1434,7 @@ title: Danish NLU 🇩🇰 128 8192 True - 1,007 ± 316 / 162 ± 45 + 1,483 ± 377 / 287 ± 97 2.22 57.74 ± 2.06 / 40.66 ± 2.58 48.43 ± 3.31 / 62.09 ± 3.62 @@ -1553,7 +1553,7 @@ title: Danish NLU 🇩🇰 128 8192 True - 1,335 ± 338 / 260 ± 88 + 1,477 ± 376 / 285 ± 97 2.26 49.46 ± 1.88 / 32.11 ± 2.41 51.16 ± 2.15 / 67.00 ± 1.51 @@ -1598,6 +1598,23 @@ title: Danish NLU 🇩🇰 14.1.2 14.0.3 + + sarnikowski/convbert-medium-small-da-cased + 24 + 29 + 512 + True + 13,821 ± 2,209 / 3,547 ± 1,184 + 2.27 + 64.28 ± 1.74 / 59.29 ± 1.54 + 36.85 ± 3.28 / 56.27 ± 3.98 + 63.55 ± 1.19 / 81.41 ± 0.64 + 24.52 ± 1.11 / 29.88 ± 1.13 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + AI-Sweden-Models/tyr (few-shot, val) 7242 @@ -1615,22 +1632,22 @@ title: Danish NLU 🇩🇰 12.3.2 12.3.2 - - sarnikowski/convbert-medium-small-da-cased - 24 - 29 - 512 + + merge-crew/da-sv-ties (few-shot, val) + 7242 + 32 + 32768 True - 13,821 ± 2,209 / 3,547 ± 1,184 + 2,457 ± 451 / 757 ± 237 2.28 - 64.28 ± 1.74 / 59.29 ± 1.54 - 36.85 ± 3.28 / 56.27 ± 3.98 - 63.55 ± 1.19 / 81.41 ± 0.64 - 24.52 ± 1.11 / 29.88 ± 1.13 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 + 45.39 ± 2.46 / 34.45 ± 2.56 + 51.95 ± 2.65 / 65.69 ± 2.11 + 13.25 ± 6.27 / 45.66 ± 5.58 + 58.51 ± 1.35 / 62.73 ± 1.19 + 10.0.1 + 10.0.1 + 10.0.1 + 10.0.1 CohereForAI/c4ai-command-r-v01 (few-shot) @@ -1666,23 +1683,6 @@ title: Danish NLU 🇩🇰 9.3.1 12.5.2 - - merge-crew/da-sv-ties (few-shot, val) - 7242 - 32 - 32768 - True - 2,457 ± 451 / 757 ± 237 - 2.29 - 45.39 ± 2.46 / 34.45 ± 2.56 - 51.95 ± 2.65 / 65.69 ± 2.11 - 13.25 ± 6.27 / 45.66 ± 5.58 - 58.51 ± 1.35 / 62.73 ± 1.19 - 10.0.1 - 10.0.1 - 10.0.1 - 10.0.1 - mistralai/Ministral-8B-Instruct-2410 (few-shot) 8020 @@ -1717,6 +1717,23 @@ title: Danish NLU 🇩🇰 12.7.0 12.7.0 + + Nexusflow/Starling-LM-7B-beta (few-shot) + 7242 + 32 + 4096 + False + 4,136 ± 1,282 / 668 ± 326 + 2.30 + 51.94 ± 2.00 / 27.59 ± 1.79 + 51.97 ± 1.36 / 68.62 ± 0.93 + 29.99 ± 0.73 / 60.05 ± 1.33 + 38.99 ± 2.82 / 52.38 ± 2.25 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + jannikskytt/MeDa-Bert 111 @@ -1734,22 +1751,39 @@ title: Danish NLU 🇩🇰 0.0.0 0.0.0 - - Nexusflow/Starling-LM-7B-beta (few-shot) + + RJuro/munin-neuralbeagle-SkoleGPTOpenOrca-7b (few-shot, val) 7242 32 - 4096 + 32768 False - 4,136 ± 1,282 / 668 ± 326 + 3,008 ± 429 / 991 ± 323 2.31 - 51.94 ± 2.00 / 27.59 ± 1.79 - 51.97 ± 1.36 / 68.62 ± 0.93 - 29.99 ± 0.73 / 60.05 ± 1.33 - 38.99 ± 2.82 / 52.38 ± 2.25 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 + 50.83 ± 2.28 / 36.96 ± 2.58 + 43.41 ± 2.56 / 48.74 ± 2.83 + 19.72 ± 4.69 / 52.71 ± 5.26 + 57.87 ± 2.32 / 64.53 ± 1.73 + 9.3.2 + 9.3.2 + 9.3.2 + 12.5.2 + + + ibm-granite/granite-3.0-8b-base (few-shot) + 8171 + 49 + 4097 + True + 2,515 ± 625 / 476 ± 159 + 2.31 + 44.58 ± 2.62 / 33.50 ± 2.75 + 47.16 ± 1.36 / 64.63 ± 1.18 + 19.20 ± 2.44 / 53.44 ± 4.36 + 58.41 ± 1.36 / 63.32 ± 0.89 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 senseable/WestLake-7B-v2 (few-shot) @@ -1768,23 +1802,6 @@ title: Danish NLU 🇩🇰 12.6.1 12.6.1 - - RJuro/munin-neuralbeagle-SkoleGPTOpenOrca-7b (few-shot, val) - 7242 - 32 - 32768 - False - 3,008 ± 429 / 991 ± 323 - 2.32 - 50.83 ± 2.28 / 36.96 ± 2.58 - 43.41 ± 2.56 / 48.74 ± 2.83 - 19.72 ± 4.69 / 52.71 ± 5.26 - 57.87 ± 2.32 / 64.53 ± 1.73 - 9.3.2 - 9.3.2 - 9.3.2 - 12.5.2 - danish-foundation-models/encoder-medium-v1 111 @@ -1819,23 +1836,6 @@ title: Danish NLU 🇩🇰 0.0.0 0.0.0 - - ibm-granite/granite-3.0-8b-base (few-shot) - 8171 - 49 - 4097 - True - 2,515 ± 625 / 476 ± 159 - 2.32 - 44.58 ± 2.62 / 33.50 ± 2.75 - 47.16 ± 1.36 / 64.63 ± 1.18 - 19.20 ± 2.44 / 53.44 ± 4.36 - 58.41 ± 1.36 / 63.32 ± 0.89 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - merge-crew/da-sv-dare-ties-density-0.6 (few-shot, val) 7242 @@ -1887,6 +1887,23 @@ title: Danish NLU 🇩🇰 0.0.0 0.0.0 + + NLPnorth/snakmodel-7b-instruct (few-shot) + 6738 + 32 + 4096 + False + 3,028 ± 534 / 941 ± 289 + 2.36 + 35.82 ± 4.16 / 29.62 ± 3.38 + 50.46 ± 1.30 / 64.72 ± 2.20 + 18.60 ± 3.80 / 52.87 ± 6.20 + 58.57 ± 0.74 / 64.68 ± 0.58 + 13.2.0 + 13.2.0 + 13.2.0 + 13.2.0 + neph1/bellman-7b-mistral-instruct-v0.2 (few-shot) 7242 @@ -1938,23 +1955,6 @@ title: Danish NLU 🇩🇰 12.3.1 12.3.2 - - NLPnorth/snakmodel-7b-instruct (few-shot) - 6738 - 32 - 4096 - False - 3,028 ± 534 / 941 ± 289 - 2.37 - 35.82 ± 4.16 / 29.62 ± 3.38 - 50.46 ± 1.30 / 64.72 ± 2.20 - 18.60 ± 3.80 / 52.87 ± 6.20 - 58.57 ± 0.74 / 64.68 ± 0.58 - 13.2.0 - 13.2.0 - 13.2.0 - 13.2.0 - AI-Sweden-Models/Llama-3-8B (few-shot) 8030 @@ -2040,6 +2040,23 @@ title: Danish NLU 🇩🇰 12.10.4 12.10.4 + + timpal0l/njord-alpha (few-shot) + 7242 + 32 + 32768 + True + 5,431 ± 1,267 / 1,139 ± 365 + 2.39 + 38.25 ± 3.75 / 28.14 ± 3.06 + 39.37 ± 2.09 / 56.57 ± 1.90 + 29.76 ± 3.24 / 61.80 ± 2.22 + 57.02 ± 1.28 / 63.16 ± 0.90 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + ibm-granite/granite-3.0-8b-instruct (few-shot) 8171 @@ -2091,23 +2108,6 @@ title: Danish NLU 🇩🇰 9.3.1 12.4.0 - - timpal0l/njord-alpha (few-shot) - 7242 - 32 - 32768 - True - 5,431 ± 1,267 / 1,139 ± 365 - 2.40 - 38.25 ± 3.75 / 28.14 ± 3.06 - 39.37 ± 2.09 / 56.57 ± 1.90 - 29.76 ± 3.24 / 61.80 ± 2.22 - 57.02 ± 1.28 / 63.16 ± 0.90 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - AI-Nordics/bert-large-swedish-cased 335 @@ -2261,6 +2261,23 @@ title: Danish NLU 🇩🇰 12.9.1 12.9.1 + + mistralai/Mistral-7B-v0.1 (few-shot) + 7242 + 32 + 32768 + True + 1,446 ± 354 / 295 ± 100 + 2.46 + 45.42 ± 2.88 / 32.66 ± 2.49 + 43.16 ± 1.69 / 54.53 ± 2.83 + 8.79 ± 3.23 / 38.38 ± 4.22 + 59.43 ± 1.04 / 64.55 ± 0.68 + 0.0.0 + 0.0.0 + 0.0.0 + 12.5.1 + KBLab/megatron-bert-large-swedish-cased-110k 370 @@ -2312,23 +2329,6 @@ title: Danish NLU 🇩🇰 9.3.1 12.5.2 - - mistralai/Mistral-7B-v0.1 (few-shot) - 7242 - 32 - 32768 - True - 1,446 ± 354 / 295 ± 100 - 2.47 - 45.42 ± 2.88 / 32.66 ± 2.49 - 43.16 ± 1.69 / 54.53 ± 2.83 - 8.79 ± 3.23 / 38.38 ± 4.22 - 59.43 ± 1.04 / 64.55 ± 0.68 - 0.0.0 - 0.0.0 - 0.0.0 - 12.5.1 - danish-foundation-models/munin-7b-v0.1dev0 (few-shot) 7242 @@ -2397,6 +2397,23 @@ title: Danish NLU 🇩🇰 12.7.0 12.7.0 + + danish-foundation-models/munin-7b-alpha (few-shot) + 7242 + 32 + 32768 + True + 6,116 ± 1,049 / 1,784 ± 577 + 2.49 + 40.60 ± 2.25 / 28.71 ± 1.42 + 36.89 ± 2.27 / 43.77 ± 2.64 + 26.41 ± 5.40 / 53.03 ± 6.56 + 57.81 ± 1.11 / 63.44 ± 0.81 + 12.5.2 + 12.4.0 + 12.4.0 + 12.4.0 + google-bert/bert-base-multilingual-cased 178 @@ -2414,23 +2431,6 @@ title: Danish NLU 🇩🇰 0.0.0 0.0.0 - - danish-foundation-models/munin-7b-alpha (few-shot) - 7242 - 32 - 32768 - True - 6,116 ± 1,049 / 1,784 ± 577 - 2.50 - 40.60 ± 2.25 / 28.71 ± 1.42 - 36.89 ± 2.27 / 43.77 ± 2.64 - 26.41 ± 5.40 / 53.03 ± 6.56 - 57.81 ± 1.11 / 63.44 ± 0.81 - 12.5.2 - 12.4.0 - 12.4.0 - 12.4.0 - flax-community/nordic-roberta-wiki 125 @@ -2465,6 +2465,40 @@ title: Danish NLU 🇩🇰 0.0.0 0.0.0 + + mhenrichsen/hestenettetLM (few-shot) + 7242 + 32 + 32768 + True + 1,151 ± 294 / 227 ± 76 + 2.50 + 44.90 ± 3.15 / 31.91 ± 2.65 + 42.61 ± 1.79 / 53.47 ± 3.00 + 8.65 ± 3.44 / 38.18 ± 4.21 + 59.62 ± 1.12 / 64.70 ± 0.75 + 12.5.2 + 12.3.2 + 12.3.2 + 12.3.2 + + + Geotrend/bert-base-25lang-cased + 151 + 85 + 512 + True + 13,908 ± 3,201 / 2,700 ± 872 + 2.51 + 62.53 ± 2.60 / 58.99 ± 2.88 + 32.88 ± 1.24 / 53.56 ± 1.47 + 29.01 ± 11.25 / 61.89 ± 6.94 + 39.51 ± 1.53 / 44.11 ± 1.58 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + Geotrend/bert-base-da-cased 104 @@ -2482,23 +2516,6 @@ title: Danish NLU 🇩🇰 0.0.0 0.0.0 - - mhenrichsen/hestenettetLM (few-shot) - 7242 - 32 - 32768 - True - 1,151 ± 294 / 227 ± 76 - 2.51 - 44.90 ± 3.15 / 31.91 ± 2.65 - 42.61 ± 1.79 / 53.47 ± 3.00 - 8.65 ± 3.44 / 38.18 ± 4.21 - 59.62 ± 1.12 / 64.70 ± 0.75 - 12.5.2 - 12.3.2 - 12.3.2 - 12.3.2 - sentence-transformers/paraphrase-multilingual-mpnet-base-v2 278 @@ -2516,23 +2533,6 @@ title: Danish NLU 🇩🇰 0.0.0 0.0.0 - - Geotrend/bert-base-25lang-cased - 151 - 85 - 512 - True - 13,908 ± 3,201 / 2,700 ± 872 - 2.52 - 62.53 ± 2.60 / 58.99 ± 2.88 - 32.88 ± 1.24 / 53.56 ± 1.47 - 29.01 ± 11.25 / 61.89 ± 6.94 - 39.51 ± 1.53 / 44.11 ± 1.58 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - Twitter/twhin-bert-base 279 @@ -2924,6 +2924,23 @@ title: Danish NLU 🇩🇰 13.0.0 13.0.0 + + meta-llama/Llama-3.2-3B (few-shot) + 3213 + 128 + 131073 + True + 3,713 ± 877 / 836 ± 267 + 2.64 + 41.13 ± 2.85 / 27.15 ± 1.82 + 38.90 ± 1.93 / 44.34 ± 1.64 + 9.60 ± 2.51 / 44.34 ± 5.20 + 56.85 ± 1.27 / 62.25 ± 0.88 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + Maltehb/aelaectra-danish-electra-small-cased 14 @@ -3009,23 +3026,6 @@ title: Danish NLU 🇩🇰 12.10.5 12.10.5 - - meta-llama/Llama-3.2-3B (few-shot) - 3213 - 128 - 131073 - True - 3,713 ± 877 / 836 ± 267 - 2.65 - 41.13 ± 2.85 / 27.15 ± 1.82 - 38.90 ± 1.93 / 44.34 ± 1.64 - 9.60 ± 2.51 / 44.34 ± 5.20 - 56.85 ± 1.27 / 62.25 ± 0.88 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - Geotrend/distilbert-base-en-da-cased 69 @@ -3077,6 +3077,23 @@ title: Danish NLU 🇩🇰 13.0.0 13.0.0 + + occiglot/occiglot-7b-eu5-instruct (few-shot) + 7242 + 32 + 32768 + False + 2,088 ± 352 / 706 ± 214 + 2.66 + 40.19 ± 2.55 / 29.73 ± 1.44 + 42.31 ± 1.55 / 59.29 ± 2.00 + 1.14 ± 1.22 / 33.83 ± 0.72 + 57.89 ± 1.16 / 63.95 ± 0.82 + 12.5.2 + 12.2.0 + 12.3.1 + 12.4.0 + AI-Sweden-Models/gpt-sw3-40b (few-shot) 39927 @@ -3128,40 +3145,6 @@ title: Danish NLU 🇩🇰 0.0.0 0.0.0 - - norallm/normistral-7b-warm-instruct (few-shot) - unknown - 33 - 2048 - True - 6,194 ± 949 / 1,967 ± 619 - 2.67 - 39.83 ± 2.18 / 25.99 ± 1.56 - 47.48 ± 2.00 / 63.93 ± 1.86 - 4.55 ± 2.34 / 42.91 ± 4.05 - 49.23 ± 0.63 / 57.45 ± 0.53 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - - - occiglot/occiglot-7b-eu5-instruct (few-shot) - 7242 - 32 - 32768 - False - 2,088 ± 352 / 706 ± 214 - 2.67 - 40.19 ± 2.55 / 29.73 ± 1.44 - 42.31 ± 1.55 / 59.29 ± 2.00 - 1.14 ± 1.22 / 33.83 ± 0.72 - 57.89 ± 1.16 / 63.95 ± 0.82 - 12.5.2 - 12.2.0 - 12.3.1 - 12.4.0 - google/gemma-2-2b-it (few-shot) 2614 @@ -3169,7 +3152,7 @@ title: Danish NLU 🇩🇰 8193 True 5,374 ± 1,233 / 1,193 ± 377 - 2.68 + 2.67 28.22 ± 1.66 / 19.95 ± 1.55 47.11 ± 1.36 / 63.36 ± 1.39 19.99 ± 1.86 / 58.86 ± 1.11 @@ -3186,7 +3169,7 @@ title: Danish NLU 🇩🇰 4096 True 5,617 ± 995 / 1,623 ± 540 - 2.68 + 2.67 45.90 ± 2.53 / 33.00 ± 1.93 37.11 ± 1.88 / 56.47 ± 1.59 11.70 ± 2.16 / 50.31 ± 3.91 @@ -3196,6 +3179,40 @@ title: Danish NLU 🇩🇰 13.0.0 13.0.0 + + norallm/normistral-7b-warm-instruct (few-shot) + unknown + 33 + 2048 + True + 6,194 ± 949 / 1,967 ± 619 + 2.67 + 39.83 ± 2.18 / 25.99 ± 1.56 + 47.48 ± 2.00 / 63.93 ± 1.86 + 4.55 ± 2.34 / 42.91 ± 4.05 + 49.23 ± 0.63 / 57.45 ± 0.53 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + + + Qwen/Qwen1.5-4B-Chat (few-shot) + 3950 + 152 + 32768 + False + 4,347 ± 893 / 1,135 ± 365 + 2.68 + 35.96 ± 2.61 / 28.58 ± 2.58 + 42.04 ± 1.42 / 60.76 ± 1.41 + 8.65 ± 1.52 / 49.56 ± 3.60 + 53.68 ± 0.94 / 59.73 ± 0.86 + 12.5.2 + 10.0.1 + 12.1.0 + 12.5.2 + merge-crew/da-sv-dare-ties-density-0.3 (few-shot, val) 7242 @@ -3264,23 +3281,6 @@ title: Danish NLU 🇩🇰 0.0.0 0.0.0 - - Qwen/Qwen1.5-4B-Chat (few-shot) - 3950 - 152 - 32768 - False - 4,347 ± 893 / 1,135 ± 365 - 2.69 - 35.96 ± 2.61 / 28.58 ± 2.58 - 42.04 ± 1.42 / 60.76 ± 1.41 - 8.65 ± 1.52 / 49.56 ± 3.60 - 53.68 ± 0.94 / 59.73 ± 0.86 - 12.5.2 - 10.0.1 - 12.1.0 - 12.5.2 - distilbert/distilbert-base-multilingual-cased 135 @@ -3338,7 +3338,7 @@ title: Danish NLU 🇩🇰 32 4096 True - 3,194 ± 687 / 650 ± 216 + 8,681 ± 1,650 / 2,177 ± 717 2.72 41.37 ± 2.50 / 24.64 ± 2.50 42.60 ± 1.06 / 61.52 ± 0.75 @@ -3475,7 +3475,7 @@ title: Danish NLU 🇩🇰 2048 True 3,175 ± 456 / 1,186 ± 354 - 2.82 + 2.81 37.80 ± 2.75 / 24.74 ± 2.30 40.51 ± 1.75 / 55.84 ± 2.46 3.35 ± 1.84 / 44.60 ± 4.67 @@ -3519,6 +3519,23 @@ title: Danish NLU 🇩🇰 12.1.0 12.1.0 + + google/gemma-7b-it (few-shot) + 8538 + 256 + 8192 + False + 1,792 ± 249 / 668 ± 203 + 2.85 + 43.83 ± 1.93 / 34.03 ± 1.59 + 29.21 ± 1.92 / 52.86 ± 1.54 + 12.96 ± 1.67 / 55.83 ± 0.88 + 49.76 ± 0.59 / 56.52 ± 0.50 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + NorwAI/NorwAI-Llama2-7B (few-shot) 7033 @@ -3536,23 +3553,6 @@ title: Danish NLU 🇩🇰 12.10.5 12.10.5 - - google/gemma-7b-it (few-shot) - 8538 - 256 - 8192 - False - 1,792 ± 249 / 668 ± 203 - 2.86 - 43.83 ± 1.93 / 34.03 ± 1.59 - 29.21 ± 1.92 / 52.86 ± 1.54 - 12.96 ± 1.67 / 55.83 ± 0.88 - 49.76 ± 0.59 / 56.52 ± 0.50 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - mideind/IceBERT-xlmr-ic3 278 @@ -3723,6 +3723,23 @@ title: Danish NLU 🇩🇰 12.10.4 12.10.5 + + ibm-granite/granite-3.0-3b-a800m-instruct (few-shot) + 3374 + 49 + 4096 + True + 10,246 ± 3,021 / 1,629 ± 550 + 2.98 + 37.37 ± 2.46 / 26.81 ± 2.24 + 31.44 ± 1.82 / 48.96 ± 2.35 + 5.27 ± 2.40 / 40.63 ± 4.68 + 48.41 ± 1.10 / 55.05 ± 0.97 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + Addedk/mbert-swedish-distilled-cased 135 @@ -3744,35 +3761,18 @@ title: Danish NLU 🇩🇰 HPLT/gpt-13b-nordic-prerelease (few-shot) 14030 131 - 4099 - True - 3,520 ± 736 / 823 ± 273 - 2.99 - 28.72 ± 2.61 / 20.53 ± 2.46 - 37.19 ± 3.92 / 53.63 ± 4.06 - 2.96 ± 1.73 / 46.67 ± 3.16 - 49.53 ± 1.49 / 54.83 ± 1.67 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - - - ibm-granite/granite-3.0-3b-a800m-instruct (few-shot) - 3374 - 49 - 4096 - True - 10,246 ± 3,021 / 1,629 ± 550 - 2.99 - 37.37 ± 2.46 / 26.81 ± 2.24 - 31.44 ± 1.82 / 48.96 ± 2.35 - 5.27 ± 2.40 / 40.63 ± 4.68 - 48.41 ± 1.10 / 55.05 ± 0.97 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 + 4099 + True + 3,520 ± 736 / 823 ± 273 + 2.99 + 28.72 ± 2.61 / 20.53 ± 2.46 + 37.19 ± 3.92 / 53.63 ± 4.06 + 2.96 ± 1.73 / 46.67 ± 3.16 + 49.53 ± 1.49 / 54.83 ± 1.67 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 meta-llama/Llama-3.2-1B-Instruct (few-shot) @@ -3927,6 +3927,23 @@ title: Danish NLU 🇩🇰 12.1.0 12.1.0 + + utter-project/EuroLLM-1.7B-Instruct (few-shot) + 1657 + 128 + 4096 + True + 15,009 ± 4,072 / 2,702 ± 878 + 3.09 + 19.61 ± 2.68 / 17.44 ± 2.64 + 37.92 ± 1.74 / 46.23 ± 1.91 + 2.81 ± 1.13 / 38.15 ± 2.81 + 50.05 ± 1.02 / 56.82 ± 0.77 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + HPLT/gpt-7b-nordic-prerelease (few-shot) 7550 @@ -3944,23 +3961,6 @@ title: Danish NLU 🇩🇰 12.3.2 12.3.2 - - utter-project/EuroLLM-1.7B-Instruct (few-shot) - 1657 - 128 - 4096 - True - 15,009 ± 4,072 / 2,702 ± 878 - 3.10 - 19.61 ± 2.68 / 17.44 ± 2.64 - 37.92 ± 1.74 / 46.23 ± 1.91 - 2.81 ± 1.13 / 38.15 ± 2.81 - 50.05 ± 1.02 / 56.82 ± 0.77 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking 135 @@ -3995,6 +3995,23 @@ title: Danish NLU 🇩🇰 0.0.0 0.0.0 + + AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot) + 20918 + 64 + 2048 + True + 1,831 ± 587 / 268 ± 90 + 3.15 + 15.94 ± 1.59 / 14.05 ± 1.45 + 32.78 ± 4.04 / 47.79 ± 5.25 + 7.86 ± 1.87 / 41.91 ± 2.25 + 52.16 ± 0.84 / 60.27 ± 0.51 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + ibm-granite/granite-7b-base (few-shot) 6738 @@ -4029,23 +4046,6 @@ title: Danish NLU 🇩🇰 14.0.4 14.0.4 - - AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot) - 20918 - 64 - 2048 - True - 1,831 ± 587 / 268 ± 90 - 3.16 - 15.94 ± 1.59 / 14.05 ± 1.45 - 32.78 ± 4.04 / 47.79 ± 5.25 - 7.86 ± 1.87 / 41.91 ± 2.25 - 52.16 ± 0.84 / 60.27 ± 0.51 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - meta-llama/Llama-3.2-1B (few-shot) 1236 @@ -4070,7 +4070,7 @@ title: Danish NLU 🇩🇰 4096 True 3,136 ± 558 / 942 ± 290 - 3.19 + 3.18 23.87 ± 1.61 / 20.19 ± 1.60 31.21 ± 1.84 / 50.13 ± 2.79 2.04 ± 1.44 / 45.31 ± 3.73 @@ -4165,6 +4165,23 @@ title: Danish NLU 🇩🇰 12.1.0 12.4.0 + + mhenrichsen/danskgpt-tiny-chat (few-shot) + 1100 + 32 + 2048 + False + 1,745 ± 978 / 686 ± 159 + 3.25 + 22.31 ± 2.55 / 19.30 ± 2.14 + 34.05 ± 2.37 / 52.43 ± 2.46 + 0.70 ± 1.11 / 40.47 ± 3.04 + 41.82 ± 2.07 / 48.91 ± 2.47 + 9.1.2 + 9.1.2 + 9.1.2 + 12.4.0 + dbmdz/bert-medium-historic-multilingual-cased 42 @@ -4182,23 +4199,6 @@ title: Danish NLU 🇩🇰 0.0.0 0.0.0 - - mhenrichsen/danskgpt-tiny-chat (few-shot) - 1100 - 32 - 2048 - False - 1,745 ± 978 / 686 ± 159 - 3.26 - 22.31 ± 2.55 / 19.30 ± 2.14 - 34.05 ± 2.37 / 52.43 ± 2.46 - 0.70 ± 1.11 / 40.47 ± 3.04 - 41.82 ± 2.07 / 48.91 ± 2.47 - 9.1.2 - 9.1.2 - 9.1.2 - 12.4.0 - dbmdz/bert-base-historic-multilingual-cased 111 @@ -4233,6 +4233,23 @@ title: Danish NLU 🇩🇰 9.2.0 12.5.1 + + NbAiLab/nb-llama-3.1-8B-Instruct (few-shot) + 8030 + 128 + 131072 + True + 1,296 ± 335 / 246 ± 84 + 3.34 + 21.87 ± 4.65 / 18.10 ± 3.46 + 24.82 ± 6.87 / 35.81 ± 5.98 + 2.89 ± 2.02 / 41.50 ± 4.74 + 44.86 ± 1.12 / 52.94 ± 1.18 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + AI-Sweden-Models/gpt-sw3-6.7b (few-shot) 7111 @@ -4250,23 +4267,6 @@ title: Danish NLU 🇩🇰 11.0.0 11.0.0 - - NbAiLab/nb-llama-3.1-8B-Instruct (few-shot) - 8030 - 128 - 131072 - True - 1,296 ± 335 / 246 ± 84 - 3.35 - 21.87 ± 4.65 / 18.10 ± 3.46 - 24.82 ± 6.87 / 35.81 ± 5.98 - 2.89 ± 2.02 / 41.50 ± 4.74 - 44.86 ± 1.12 / 52.94 ± 1.18 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - NbAiLab/nb-llama-3.1-8B (few-shot) 8030 @@ -4274,7 +4274,7 @@ title: Danish NLU 🇩🇰 131072 True 1,297 ± 338 / 245 ± 83 - 3.36 + 3.35 44.83 ± 1.57 / 29.75 ± 1.72 37.14 ± 1.61 / 44.74 ± 2.01 10.13 ± 5.10 / 38.99 ± 4.78 @@ -4335,6 +4335,23 @@ title: Danish NLU 🇩🇰 14.1.2 14.0.4 + + ibm-granite/granite-3.0-1b-a400m-instruct (few-shot) + 1335 + 49 + 4096 + True + 7,964 ± 2,255 / 1,299 ± 433 + 3.39 + 30.63 ± 2.60 / 20.55 ± 1.52 + 22.35 ± 2.26 / 44.97 ± 2.66 + 1.95 ± 1.48 / 46.94 ± 1.55 + 37.30 ± 0.69 / 43.95 ± 1.09 + 13.2.0 + 13.2.0 + 13.2.0 + 13.2.0 + AI-Sweden-Models/gpt-sw3-1.3b-instruct (few-shot) 1445 @@ -4352,23 +4369,6 @@ title: Danish NLU 🇩🇰 12.1.0 12.4.0 - - ibm-granite/granite-3.0-1b-a400m-instruct (few-shot) - 1335 - 49 - 4096 - True - 7,964 ± 2,255 / 1,299 ± 433 - 3.40 - 30.63 ± 2.60 / 20.55 ± 1.52 - 22.35 ± 2.26 / 44.97 ± 2.66 - 1.95 ± 1.48 / 46.94 ± 1.55 - 37.30 ± 0.69 / 43.95 ± 1.09 - 13.2.0 - 13.2.0 - 13.2.0 - 13.2.0 - Qwen/Qwen1.5-1.8B-Chat (few-shot) 1837 @@ -4488,6 +4488,23 @@ title: Danish NLU 🇩🇰 10.0.1 12.4.0 + + NorwAI/NorwAI-Mistral-7B-instruct (few-shot) + 7537 + 68 + 4096 + False + 3,027 ± 503 / 903 ± 296 + 3.50 + 13.78 ± 2.85 / 11.90 ± 2.13 + 42.16 ± 0.68 / 45.21 ± 0.45 + 3.52 ± 1.90 / 39.81 ± 3.18 + 20.02 ± 2.31 / 31.67 ± 2.19 + 12.10.5 + 12.10.4 + 12.10.5 + 12.10.5 + ibm-granite/granite-3.0-3b-a800m-base (few-shot) 3374 @@ -4505,23 +4522,6 @@ title: Danish NLU 🇩🇰 13.0.0 13.0.0 - - NorwAI/NorwAI-Mistral-7B-instruct (few-shot) - 7537 - 68 - 4096 - False - 3,027 ± 503 / 903 ± 296 - 3.51 - 13.78 ± 2.85 / 11.90 ± 2.13 - 42.16 ± 0.68 / 45.21 ± 0.45 - 3.52 ± 1.90 / 39.81 ± 3.18 - 20.02 ± 2.31 / 31.67 ± 2.19 - 12.10.5 - 12.10.4 - 12.10.5 - 12.10.5 - AI-Sweden-Models/gpt-sw3-356m (few-shot) 471 @@ -4692,6 +4692,23 @@ title: Danish NLU 🇩🇰 0.0.0 0.0.0 + + jannesg/bertsson + 124 + 50 + 512 + True + 15,314 ± 2,786 / 3,666 ± 1,201 + 3.71 + 32.63 ± 1.06 / 32.76 ± 1.02 + 24.11 ± 1.74 / 44.78 ± 2.78 + 2.91 ± 1.07 / 46.98 ± 3.78 + 15.37 ± 1.22 / 22.17 ± 1.24 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + state-spaces/mamba-2.8b-hf (few-shot) 2768 @@ -4709,23 +4726,6 @@ title: Danish NLU 🇩🇰 13.0.0 13.0.0 - - jannesg/bertsson - 124 - 50 - 512 - True - 15,314 ± 2,786 / 3,666 ± 1,201 - 3.72 - 32.63 ± 1.06 / 32.76 ± 1.02 - 24.11 ± 1.74 / 44.78 ± 2.78 - 2.91 ± 1.07 / 46.98 ± 3.78 - 15.37 ± 1.22 / 22.17 ± 1.24 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - NbAiLab/nb-llama-3.2-1B (few-shot) 1236 @@ -4733,7 +4733,7 @@ title: Danish NLU 🇩🇰 131072 True 3,424 ± 1,080 / 464 ± 158 - 3.74 + 3.73 9.20 ± 3.54 / 8.11 ± 3.24 32.94 ± 4.62 / 46.54 ± 5.68 1.59 ± 2.72 / 45.39 ± 3.81 @@ -4777,6 +4777,23 @@ title: Danish NLU 🇩🇰 12.6.1 12.6.1 + + tiiuae/Falcon3-1B-Instruct (few-shot) + 1669 + 131 + 8192 + True + 9,270 ± 2,690 / 1,434 ± 437 + 3.79 + 20.03 ± 1.60 / 17.52 ± 1.50 + 15.96 ± 2.77 / 38.74 ± 2.88 + 0.86 ± 1.76 / 42.48 ± 3.82 + 28.98 ± 1.93 / 35.38 ± 2.10 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + PleIAs/Pleias-3b-Preview (few-shot) 3212 @@ -4852,7 +4869,7 @@ title: Danish NLU 🇩🇰 2051 True 8,536 ± 1,926 / 1,940 ± 619 - 3.90 + 3.89 13.39 ± 2.60 / 12.39 ± 2.46 17.94 ± 5.58 / 32.80 ± 3.63 -2.02 ± 2.28 / 40.63 ± 4.12 @@ -4886,7 +4903,7 @@ title: Danish NLU 🇩🇰 2048 True 10,242 ± 3,432 / 1,335 ± 484 - 4.06 + 4.05 13.84 ± 1.95 / 13.12 ± 1.60 9.47 ± 3.30 / 25.66 ± 3.36 -0.36 ± 1.60 / 39.52 ± 3.19 @@ -5022,7 +5039,7 @@ title: Danish NLU 🇩🇰 512 True 2,214 ± 94 / 1,494 ± 229 - 4.23 + 4.22 16.04 ± 2.47 / 15.60 ± 2.62 17.37 ± 3.82 / 36.83 ± 4.86 1.34 ± 0.97 / 35.45 ± 3.20 @@ -5073,7 +5090,7 @@ title: Danish NLU 🇩🇰 512 True 7,840 ± 1,538 / 3,024 ± 438 - 4.28 + 4.27 12.87 ± 1.63 / 13.23 ± 1.55 18.61 ± 4.17 / 35.23 ± 3.90 0.30 ± 1.39 / 37.84 ± 3.88 @@ -5090,7 +5107,7 @@ title: Danish NLU 🇩🇰 8192 True 26,346 ± 7,812 / 4,082 ± 1,372 - 4.32 + 4.31 13.72 ± 1.83 / 13.41 ± 1.52 3.79 ± 3.11 / 21.06 ± 4.74 -0.45 ± 0.70 / 39.69 ± 4.95 @@ -5209,7 +5226,7 @@ title: Danish NLU 🇩🇰 512 True 1,373 ± 120 / 709 ± 172 - 4.72 + 4.71 0.00 ± 0.00 / 0.00 ± 0.00 4.81 ± 2.69 / 19.31 ± 1.01 0.00 ± 0.00 / 33.25 ± 0.23 diff --git a/dutch-nlg.csv b/dutch-nlg.csv index 40b19dc5..7f6e64a4 100644 --- a/dutch-nlg.csv +++ b/dutch-nlg.csv @@ -1,146 +1,147 @@ model_id,num_model_parameters,vocabulary_size,max_sequence_length,commercially_licensed,merge,speed,rank,conll_nl,dutch_social,scala_nl,squad_nl,wiki_lingua_nl,mmlu_nl,hellaswag_nl -"gpt-4-0613 (few-shot, val)",-1,100,8316,True,False,597,1.23,73.35,18.92,76.7,55.03,69.97,70.71,90.07 -"meta-llama/Meta-Llama-3-70B (few-shot, val)",70554,128,8317,True,False,312,1.41,72.91,19.08,54.33,63.99,70.41,65.33,71.06 -"gpt-4-1106-preview (few-shot, val)",-1,100,128000,True,False,576,1.54,66.44,14.22,72.3,57.81,67.13,70.04,88.29 -"meta-llama/Meta-Llama-3-70B-Instruct (few-shot, val)",70554,128,8317,True,False,1673,1.59,74.64,18.9,49.54,44.77,70.28,65.8,76.21 -meta-llama/Llama-3.1-405B-Instruct-FP8 (few-shot),405869,128,131072,True,False,799,1.64,69.12,11.23,68.74,55.25,70.51,74.89,80.93 -"gpt-4o-2024-05-13 (few-shot, val)",-1,200,128000,True,False,916,1.68,76.75,10.8,56.26,55.55,66.86,73.11,92.69 -"152334H/miqu-1-70b-sf (few-shot, val)",68977,32,32889,True,False,2126,1.78,67.0,15.33,55.48,61.02,68.95,54.36,70.26 +"gpt-4-0613 (few-shot, val)",-1,100,8316,True,False,597,1.22,73.35,18.92,76.7,55.03,69.97,70.71,90.07 +"meta-llama/Meta-Llama-3-70B (few-shot, val)",70554,128,8317,True,False,312,1.4,72.91,19.08,54.33,63.99,70.41,65.33,71.06 +"gpt-4-1106-preview (few-shot, val)",-1,100,128000,True,False,576,1.53,66.44,14.22,72.3,57.81,67.13,70.04,88.29 +"meta-llama/Meta-Llama-3-70B-Instruct (few-shot, val)",70554,128,8317,True,False,1673,1.58,74.64,18.9,49.54,44.77,70.28,65.8,76.21 +meta-llama/Llama-3.1-405B-Instruct-FP8 (few-shot),405869,128,131072,True,False,799,1.63,69.12,11.23,68.74,55.25,70.51,74.89,80.93 +"gpt-4o-2024-05-13 (few-shot, val)",-1,200,128000,True,False,916,1.67,76.75,10.8,56.26,55.55,66.86,73.11,92.69 +"152334H/miqu-1-70b-sf (few-shot, val)",68977,32,32889,True,False,2126,1.77,67.0,15.33,55.48,61.02,68.95,54.36,70.26 meta-llama/Llama-3.1-70B-Instruct (few-shot),70554,128,131072,True,False,1409,1.8,68.82,11.41,61.66,55.43,69.51,68.61,69.72 -Qwen/QwQ-32B-Preview (few-shot),32764,152,32768,True,False,2258,1.81,71.32,9.12,63.96,58.36,66.59,67.82,78.26 +Qwen/QwQ-32B-Preview (few-shot),32764,152,32768,True,False,2258,1.82,71.32,9.12,63.96,58.36,66.59,67.82,78.26 google/gemma-2-27b-it (few-shot),27227,256,8320,True,False,1516,1.84,65.2,14.8,59.02,59.6,64.03,59.71,70.7 -"gpt-4o-2024-05-13 (zero-shot, val)",-1,200,8191,True,False,637,1.85,69.12,12.36,58.88,45.88,64.37,70.81,84.34 -meta-llama/Llama-3.3-70B-Instruct (few-shot),70554,128,131072,True,False,1353,1.86,70.37,10.87,62.87,44.3,69.36,68.82,74.72 +"gpt-4o-2024-05-13 (zero-shot, val)",-1,200,8191,True,False,637,1.84,69.12,12.36,58.88,45.88,64.37,70.81,84.34 +meta-llama/Llama-3.3-70B-Instruct (few-shot),70554,128,131072,True,False,1353,1.85,70.37,10.87,62.87,44.3,69.36,68.82,74.72 Qwen/Qwen2.5-72B-Instruct (few-shot),72706,152,32768,True,False,1219,1.9,67.16,9.84,66.06,50.91,64.83,73.09,79.0 -"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,1.92,68.71,20.33,49.52,34.06,66.13,62.12,65.74 -"gpt-4-1106-preview (zero-shot, val)",-1,100,8191,True,False,436,1.97,55.72,11.13,67.28,54.2,64.09,67.45,72.71 +"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,1.91,68.71,20.33,49.52,34.06,66.13,62.12,65.74 +"gpt-4-1106-preview (zero-shot, val)",-1,100,8191,True,False,436,1.96,55.72,11.13,67.28,54.2,64.09,67.45,72.71 +CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,2.01,68.58,14.41,55.01,58.63,65.69,50.48,49.08 google/gemma-2-9b (few-shot),9242,256,8320,True,False,2038,2.01,57.13,17.43,31.39,59.33,66.59,53.71,53.26 -CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,2.02,68.58,14.41,55.01,58.63,65.69,50.48,49.08 -"gpt-4o-mini-2024-07-18 (zero-shot, val)",-1,200,8191,True,False,908,2.05,64.15,12.67,62.44,45.65,64.26,56.45,62.09 -upstage/SOLAR-10.7B-v1.0 (few-shot),10732,32,4096,True,False,3780,2.05,65.37,11.93,41.67,67.75,68.83,40.18,65.34 -CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,2.07,62.07,13.7,35.14,49.15,74.4,46.76,60.36 +"gpt-4o-mini-2024-07-18 (zero-shot, val)",-1,200,8191,True,False,908,2.04,64.15,12.67,62.44,45.65,64.26,56.45,62.09 +upstage/SOLAR-10.7B-v1.0 (few-shot),10732,32,4096,True,False,3780,2.04,65.37,11.93,41.67,67.75,68.83,40.18,65.34 +ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot),28411,256,4096,True,False,3633,2.08,68.17,10.56,56.89,53.05,64.01,58.74,65.18 google/gemma-2-9b-it (few-shot),9242,256,8320,True,False,2062,2.12,52.62,11.78,59.23,55.78,67.39,53.99,65.58 -ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot),28411,256,4096,True,False,3633,2.13,68.17,10.56,56.89,53.05,64.01,58.74,65.18 -"gpt-3.5-turbo-0613 (few-shot, val)",-1,100,4095,True,False,921,2.17,68.96,8.81,58.95,55.57,69.13,42.28,61.52 -"meta-llama/Llama-2-70b-hf (few-shot, val)",68977,32,4221,True,False,1892,2.2,66.5,7.82,49.55,65.26,67.28,49.41,54.5 -mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,2.23,66.51,11.91,34.46,59.23,68.87,40.81,60.68 -mistralai/Mistral-Nemo-Instruct-2407 (few-shot),12248,131,1024128,True,False,7095,2.28,66.57,10.1,40.31,59.99,68.66,42.5,48.51 +"gpt-3.5-turbo-0613 (few-shot, val)",-1,100,4095,True,False,921,2.12,68.96,8.81,58.95,55.57,69.13,42.28,61.52 +"meta-llama/Llama-2-70b-hf (few-shot, val)",68977,32,4221,True,False,1892,2.22,66.5,7.82,49.55,65.26,67.28,49.41,54.5 +mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,2.27,63.3,11.82,32.2,59.45,68.87,40.81,60.68 +"meta-llama/Llama-2-70b-chat-hf (few-shot, val)",68977,32,4221,True,False,1979,2.29,64.0,13.3,30.88,54.14,68.71,45.47,42.91 +mistralai/Mistral-Nemo-Instruct-2407 (few-shot),12248,131,1024128,True,False,7095,2.29,66.57,10.1,40.31,59.99,68.66,42.5,48.51 nvidia/Llama-3.1-Nemotron-70B-Instruct-HF (few-shot),70554,128,131072,True,False,1208,2.29,55.08,10.8,61.31,49.8,52.72,68.15,72.5 -"meta-llama/Llama-2-70b-chat-hf (few-shot, val)",68977,32,4221,True,False,1979,2.3,64.0,13.3,30.88,54.14,68.71,45.47,42.91 -senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.34,64.25,13.66,28.59,49.64,68.66,35.37,47.5 -meta-llama/Llama-3.1-8B (few-shot),8030,128,131200,True,False,2986,2.37,64.79,11.95,32.97,63.89,66.29,38.44,30.88 -mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot),46703,32,32768,True,False,5535,2.38,58.8,12.5,45.22,47.03,65.95,43.41,43.33 -"mlabonne/NeuralBeagle14-7B (few-shot, val)",7242,32,8192,False,True,2549,2.38,63.53,11.25,27.76,50.94,71.2,40.23,47.87 -CohereForAI/aya-23-8B (few-shot),8028,256,8192,False,False,2707,2.39,60.81,7.9,31.12,63.0,72.9,32.37,53.32 -meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1007,2.41,68.72,14.67,32.91,45.36,67.62,36.18,33.91 -utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.42,43.06,11.95,40.85,63.42,69.33,41.9,36.69 +CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,2.3,53.02,13.68,29.97,53.4,73.93,38.81,40.95 +senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.33,64.25,13.66,28.59,49.64,68.66,35.37,47.5 +mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot),46703,32,32768,True,False,5535,2.37,58.8,12.5,45.22,47.03,65.95,43.41,43.33 +"mlabonne/NeuralBeagle14-7B (few-shot, val)",7242,32,8192,False,True,2549,2.37,63.53,11.25,27.76,50.94,71.2,40.23,47.87 +meta-llama/Llama-3.1-8B (few-shot),8030,128,131200,True,False,2986,2.38,64.79,11.95,32.97,63.89,66.29,38.44,30.88 +CohereForAI/aya-23-8B (few-shot),8028,256,8192,False,False,2707,2.4,60.81,7.9,31.12,63.0,72.9,32.37,53.32 +utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.41,43.06,11.95,40.85,63.42,69.33,41.9,36.69 +meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1483,2.42,68.72,14.67,32.91,45.36,67.62,36.18,33.91 mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,2.43,64.81,12.99,39.38,49.08,66.41,45.6,24.71 -meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131200,True,False,1005,2.5,69.76,9.09,37.58,41.26,68.84,40.98,39.98 +skole-gpt-mixtral (few-shot),-1,32,32768,False,False,3583,2.49,62.16,8.92,32.76,56.87,68.39,46.17,33.31 +"mlabonne/AlphaMonarch-7B (few-shot, val)",7242,32,8192,False,True,5340,2.5,64.71,11.14,25.22,46.34,67.71,38.49,47.66 Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,2.51,62.86,15.11,39.11,36.48,64.42,32.99,42.56 -"mlabonne/AlphaMonarch-7B (few-shot, val)",7242,32,8192,False,True,5340,2.51,64.71,11.14,25.22,46.34,67.71,38.49,47.66 -skole-gpt-mixtral (few-shot),-1,32,32768,False,False,3583,2.52,62.16,8.92,32.76,56.87,68.39,46.17,33.31 +meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1477,2.53,62.26,10.45,30.3,62.99,65.17,36.38,28.33 +yhavinga/Boreas-7B-chat (few-shot),7242,32,32768,False,False,2913,2.53,60.22,11.97,30.94,52.19,66.6,35.34,31.47 CohereForAI/c4ai-command-r-v01 (few-shot),34981,256,8192,False,False,1919,2.54,61.21,9.03,38.15,44.91,67.58,41.03,44.57 -yhavinga/Boreas-7B-chat (few-shot),7242,32,32768,False,False,2913,2.56,60.22,11.97,30.94,52.19,66.6,35.34,31.47 -"claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,2.58,62.41,12.64,74.06,35.77,64.25,27.77,14.21 -nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,2.58,66.29,12.71,31.39,48.33,64.1,37.55,29.69 -robinsmits/Qwen1.5-7B-Dutch-Chat-Sft-Bf16 (few-shot),7719,152,32768,False,False,2413,2.58,56.83,14.79,23.58,55.9,65.06,33.73,29.29 -meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1335,2.59,62.26,10.45,30.3,62.99,65.17,36.38,28.33 +nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,2.56,66.29,12.71,31.39,48.33,64.1,37.55,29.69 +"claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,2.57,62.41,12.64,74.06,35.77,64.25,27.77,14.21 +robinsmits/Qwen1.5-7B-Dutch-Chat-Sft-Bf16 (few-shot),7719,152,32768,False,False,2413,2.57,56.83,14.79,23.58,55.9,65.06,33.73,29.29 +ReBatch/Reynaerde-7B-Instruct (few-shot),7248,33,32768,False,False,2562,2.61,59.16,10.39,19.5,60.96,68.19,32.93,28.04 +robinsmits/Qwen1.5-7B-Dutch-Chat (few-shot),7719,152,32768,False,False,4686,2.62,57.81,14.62,25.34,56.81,60.51,34.78,30.29 ReBatch/Reynaerde-7B-Chat (few-shot),7248,33,32768,False,False,2554,2.63,56.22,11.22,20.04,61.15,68.28,32.44,31.24 -robinsmits/Qwen1.5-7B-Dutch-Chat (few-shot),7719,152,32768,False,False,4686,2.63,57.81,14.62,25.34,56.81,60.51,34.78,30.29 -ReBatch/Reynaerde-7B-Instruct (few-shot),7248,33,32768,False,False,2562,2.66,59.16,10.39,19.5,60.96,68.19,32.93,28.04 mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot),341029,256,4096,True,False,1904,2.67,47.6,10.62,61.64,24.02,54.51,50.32,64.62 +ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4096,True,False,2515,2.68,52.32,8.46,42.42,53.12,66.0,34.34,26.46 ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,2.68,53.62,13.37,23.47,61.2,62.34,34.69,31.36 ReBatch/Llama-3-8B-dutch (few-shot),8030,128,8317,False,False,3800,2.69,60.14,11.07,15.67,59.93,64.49,36.13,28.25 -ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4096,True,False,2515,2.72,52.26,8.46,42.42,53.11,66.01,34.35,31.89 +meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1473,2.72,61.68,8.97,36.57,33.88,64.8,37.45,42.45 +Rijgersberg/Mistral-7B-v0.1-chat-nl (few-shot),7242,32,32768,False,False,5907,2.73,56.73,11.08,19.41,58.91,68.05,31.39,16.49 +google/gemma-7b (few-shot),8538,256,8192,True,False,1378,2.73,47.75,7.68,28.28,61.49,65.6,42.19,31.45 NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,2.74,62.81,11.28,28.57,38.75,65.35,39.53,26.71 -Rijgersberg/Mistral-7B-v0.1-chat-nl (few-shot),7242,32,32768,False,False,5907,2.74,56.73,11.08,19.41,58.91,68.05,31.39,16.49 mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,2.76,55.56,12.37,21.5,50.77,67.99,22.86,24.8 -google/gemma-7b (few-shot),8538,256,8192,True,False,1378,2.77,47.75,7.68,28.28,61.49,65.6,42.19,31.45 -mistralai/Mistral-7B-v0.1 (few-shot),7242,32,32768,True,False,1446,2.78,58.15,7.94,25.41,62.56,64.24,35.49,19.88 -ibm-granite/granite-3.0-2b-instruct (few-shot),2634,49,4224,True,False,10194,2.8,52.52,13.85,17.72,53.5,66.23,24.62,15.79 +ibm-granite/granite-3.0-2b-instruct (few-shot),2634,49,4224,True,False,10194,2.79,52.52,13.85,17.72,53.5,66.23,24.62,15.79 +mistralai/Mistral-7B-v0.1 (few-shot),7242,32,32768,True,False,1446,2.8,58.15,7.94,25.41,62.56,64.24,35.49,19.88 ibm-granite/granite-3.0-2b-base (few-shot),2534,49,4224,True,False,10187,2.81,47.28,12.12,12.74,60.36,64.26,26.88,34.05 -NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,2.83,62.76,13.83,24.44,26.17,63.34,38.5,20.26 +NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,2.82,62.76,13.83,24.44,26.17,63.34,38.5,20.26 +alpindale/Mistral-7B-v0.2-hf (few-shot),7242,32,32768,True,False,1841,2.83,56.76,7.11,23.55,61.89,64.33,34.92,23.87 BramVanroy/GEITje-7B-ultra (few-shot),7242,32,8192,False,False,2475,2.84,42.2,12.78,18.23,53.41,68.3,26.92,25.72 -alpindale/Mistral-7B-v0.2-hf (few-shot),7242,32,32768,True,False,1841,2.85,56.76,7.11,23.55,61.89,64.33,34.92,23.87 -mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,2.87,56.52,7.02,23.41,61.9,64.37,34.93,23.73 -microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,3194,2.88,50.31,12.58,14.72,56.19,64.4,28.31,21.25 +mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,2.84,56.52,7.02,23.41,61.9,64.37,34.93,23.73 +microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,8681,2.87,50.31,12.58,14.72,56.19,64.4,28.31,21.25 neuralmagic/Sparse-Llama-3.1-8B-2of4 (few-shot),8030,128,131072,True,False,2996,2.88,34.21,11.94,6.18,60.44,64.48,29.76,38.79 +meta-llama/Llama-2-13b-chat-hf (few-shot),13016,32,4096,True,False,2849,2.89,57.8,8.57,17.4,56.35,66.11,27.15,19.18 meta-llama/Llama-3.2-3B-Instruct (few-shot),3213,128,131200,False,False,10424,2.89,43.66,12.87,17.94,47.77,66.74,33.8,21.02 +RuterNorway/Llama-2-13b-chat-norwegian (few-shot),-1,32,4096,False,False,3254,2.9,57.66,8.41,16.93,56.29,66.22,25.7,17.92 Rijgersberg/GEITje-7B-chat-v2 (few-shot),7242,32,32768,False,False,5908,2.91,42.12,11.06,19.71,59.19,65.55,27.71,18.03 google/gemma-2-2b-it (few-shot),2614,256,8320,True,False,5374,2.91,40.58,11.17,19.63,49.3,66.83,29.3,31.43 -occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,2.91,53.78,7.78,16.23,63.09,66.46,28.37,15.25 ibm-granite/granite-8b-code-base-4k (few-shot),8055,49,4096,True,False,2313,2.92,63.29,13.81,8.16,56.64,63.08,16.86,6.24 -meta-llama/Llama-2-13b-chat-hf (few-shot),13016,32,4096,True,False,2849,2.93,57.8,8.57,17.4,56.35,66.11,27.15,19.18 -"claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,2.94,61.15,12.71,35.26,41.27,64.93,8.55,10.69 -RuterNorway/Llama-2-13b-chat-norwegian (few-shot),-1,32,4096,False,False,3254,2.95,57.66,8.41,16.93,56.29,66.22,25.7,17.92 -microsoft/Phi-3-mini-128k-instruct (few-shot),3821,32,131072,True,False,7312,2.95,44.27,12.84,10.44,56.4,62.6,28.63,22.58 -BramVanroy/fietje-2b-instruct (few-shot),2775,50,2048,False,False,4710,2.96,36.5,13.7,4.81,60.63,66.01,26.76,12.93 -google/gemma-7b-it (few-shot),8538,256,8317,False,False,1792,2.97,53.93,12.83,6.58,53.45,65.89,19.58,13.68 +"claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,2.93,61.15,12.71,35.26,41.27,64.93,8.55,10.69 +occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,2.93,53.78,7.78,16.23,63.09,66.46,28.37,15.25 +microsoft/Phi-3-mini-128k-instruct (few-shot),3821,32,131072,True,False,7312,2.94,44.27,12.84,10.44,56.4,62.6,28.63,22.58 +BramVanroy/fietje-2b-instruct (few-shot),2775,50,2048,False,False,4710,2.95,36.5,13.7,4.81,60.63,66.01,26.76,12.93 +google/gemma-7b-it (few-shot),8538,256,8317,False,False,1792,2.96,53.93,12.83,6.58,53.45,65.89,19.58,13.68 +NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,2.98,69.37,14.35,29.13,0.34,52.69,50.9,34.78 +Rijgersberg/GEITje-7B-chat (few-shot),7242,32,32768,False,False,5920,2.98,50.69,8.16,20.45,54.48,66.92,24.89,9.84 BramVanroy/fietje-2b-chat (few-shot),2775,50,2048,False,False,4704,2.99,39.57,13.25,9.31,60.26,65.37,26.63,11.62 -meta-llama/Llama-2-13b-hf (few-shot),13016,32,4096,True,False,2898,3.0,52.55,4.26,24.57,60.99,64.02,28.73,19.43 -NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,3.01,69.37,14.35,29.13,0.34,52.69,50.9,34.78 -Qwen/Qwen1.5-4B-Chat (few-shot),3950,152,32768,False,False,4347,3.01,42.52,14.68,4.07,55.18,62.75,24.25,16.21 +Qwen/Qwen1.5-4B-Chat (few-shot),3950,152,32768,False,False,4347,3.0,42.52,14.68,4.07,55.18,62.75,24.25,16.21 +meta-llama/Llama-2-13b-hf (few-shot),13016,32,4096,True,False,2898,3.01,52.55,4.26,24.57,60.99,64.02,28.73,19.43 Rijgersberg/GEITje-7B (few-shot),7242,32,32768,True,False,5887,3.02,47.53,4.36,30.67,56.55,67.58,28.12,11.7 -nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,3.02,60.11,11.12,32.68,0.0,64.01,41.0,39.2 -Rijgersberg/GEITje-7B-chat (few-shot),7242,32,32768,False,False,5920,3.03,50.69,8.16,20.45,54.48,66.92,24.89,9.84 -ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,3.03,49.25,9.45,11.87,54.2,64.77,21.52,22.32 -meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4096,False,False,2643,3.03,50.23,10.07,14.73,53.42,67.59,20.19,11.42 -mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32768,False,False,634,3.05,52.72,7.91,18.14,52.75,64.77,26.06,14.26 -ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,3.06,60.72,12.38,10.96,51.2,63.23,15.26,4.03 -occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,3.06,51.31,7.41,13.04,59.28,64.66,27.12,13.99 +nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,3.03,60.11,11.12,32.68,0.0,64.01,41.0,39.2 +ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,3.04,49.25,9.45,11.87,54.2,64.77,21.52,22.32 +meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4096,False,False,2643,3.04,50.23,10.07,14.73,53.42,67.59,20.19,11.42 +occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,3.04,51.31,7.41,13.04,59.28,64.66,27.12,13.99 +ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,3.05,60.72,12.38,10.96,51.2,63.23,15.26,4.03 +mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32768,False,False,634,3.07,52.72,7.91,18.14,52.75,64.77,26.06,14.26 BramVanroy/fietje-2b (few-shot),2780,51,2048,True,False,4804,3.09,33.92,13.39,6.75,58.57,61.49,24.66,4.78 -01-ai/Yi-1.5-6B (few-shot),6061,64,4224,True,False,2867,3.12,51.18,9.23,1.99,54.66,60.81,28.03,23.64 -BramVanroy/GEITje-7B-ultra-sft (few-shot),7242,32,8192,False,False,5979,3.15,39.41,7.0,16.1,53.02,67.74,25.8,12.49 -ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,3.15,42.52,9.91,0.69,56.95,63.71,20.93,24.42 -timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,3.2,54.56,8.43,10.99,55.91,57.88,25.12,10.65 -meta-llama/Llama-3.2-3B (few-shot),3213,128,131200,True,False,3713,3.21,47.4,7.9,3.1,56.53,62.27,29.18,10.34 -meta-llama/Llama-2-7b-hf (few-shot),6738,32,4096,True,False,930,3.28,40.49,7.1,18.66,59.92,62.58,17.36,6.68 -Qwen/Qwen1.5-4B (few-shot),3950,152,32768,True,False,3248,3.3,35.74,12.55,0.23,51.3,58.9,22.2,13.11 +01-ai/Yi-1.5-6B (few-shot),6061,64,4224,True,False,2867,3.13,51.18,9.23,1.99,54.66,60.81,28.03,23.64 +BramVanroy/GEITje-7B-ultra-sft (few-shot),7242,32,8192,False,False,5979,3.13,39.41,7.0,16.1,53.02,67.74,25.8,12.49 +ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,3.16,42.52,9.91,0.69,56.95,63.71,20.93,24.42 +timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,3.16,54.56,8.43,10.99,55.91,57.88,25.12,10.65 +meta-llama/Llama-3.2-3B (few-shot),3213,128,131200,True,False,3713,3.24,47.4,7.9,3.1,56.53,62.27,29.18,10.34 +meta-llama/Llama-2-7b-hf (few-shot),6738,32,4096,True,False,930,3.26,40.49,7.1,18.66,59.92,62.58,17.36,6.68 +Qwen/Qwen1.5-4B (few-shot),3950,152,32768,True,False,3248,3.29,35.74,12.55,0.23,51.3,58.9,22.2,13.11 TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot),7800,100,4096,True,False,6197,3.3,48.24,11.37,10.73,54.83,64.32,0.72,-0.47 -allenai/OLMo-1.7-7B-hf (few-shot),6888,50,4096,True,False,3371,3.42,46.95,4.34,3.46,57.07,61.31,18.21,9.03 AI-Sweden-Models/gpt-sw3-20b (few-shot),20918,64,2048,True,False,1875,3.43,35.3,15.67,1.76,45.05,59.15,6.24,0.47 -google/gemma-2-2b (few-shot),2614,256,8320,True,False,5235,3.47,22.63,8.11,8.04,52.39,61.9,22.76,8.5 -openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,3.47,39.24,4.25,11.48,54.18,65.55,13.37,9.13 +allenai/OLMo-1.7-7B-hf (few-shot),6888,50,4096,True,False,3371,3.43,46.95,4.34,3.46,57.07,61.31,18.21,9.03 +ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,3.43,48.53,10.15,4.88,45.38,59.56,7.38,2.69 +openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,3.46,39.24,4.25,11.48,54.18,65.55,13.37,9.13 ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,3.48,50.88,12.39,3.31,48.44,52.5,9.22,4.45 -ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,3.48,48.53,10.15,4.88,45.38,59.56,7.38,2.69 AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot),20918,64,2048,True,False,1831,3.5,24.44,18.4,4.85,39.83,54.89,4.38,1.95 -meta-llama/Llama-3.2-1B-Instruct (few-shot),1236,128,131200,False,False,7436,3.53,42.01,9.15,1.11,40.04,58.72,17.71,6.98 -MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,3.55,36.61,8.77,3.52,59.51,54.5,14.9,7.26 -ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,3.55,33.73,7.45,3.78,43.6,64.37,13.67,11.51 -ibm-granite/granite-7b-base (few-shot),6738,32,2048,True,False,4405,3.59,37.39,7.51,3.11,49.6,57.63,14.52,6.6 +MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,3.5,36.61,8.77,3.52,59.51,54.5,14.9,7.26 +google/gemma-2-2b (few-shot),2614,256,8320,True,False,5235,3.5,22.63,8.11,8.04,52.39,61.9,22.76,8.5 +ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,3.53,33.73,7.45,3.78,43.6,64.37,13.67,11.51 +meta-llama/Llama-3.2-1B-Instruct (few-shot),1236,128,131200,False,False,7436,3.54,42.01,9.15,1.11,40.04,58.72,17.71,6.98 +ibm-granite/granite-7b-base (few-shot),6738,32,2048,True,False,4405,3.57,37.39,7.51,3.11,49.6,57.63,14.52,6.6 RuterNorway/Llama-2-7b-chat-norwegian (few-shot),-1,32,4096,False,False,10890,3.6,35.49,11.36,2.52,37.49,62.24,5.41,0.15 -openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.63,42.35,0.78,-0.02,47.61,65.32,14.59,11.78 +openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.62,42.35,0.78,-0.02,47.61,65.32,14.59,11.78 ibm-granite/granite-3.0-1b-a400m-instruct (few-shot),1335,49,4096,True,False,7964,3.64,30.6,13.26,1.04,39.69,61.2,2.1,-0.52 -utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,3.66,32.45,7.03,5.58,51.18,61.96,2.53,1.04 +utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,3.64,32.45,7.03,5.58,51.18,61.96,2.53,1.04 google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,3.68,38.85,11.25,-2.27,45.95,51.99,10.97,1.45 -google/gemma-2b (few-shot),2506,256,8192,True,False,6087,3.73,16.9,9.95,0.41,49.15,58.61,10.94,3.29 -stabilityai/stablelm-2-1_6b (few-shot),1645,100,4096,True,False,7259,3.77,36.58,6.32,4.01,52.81,52.24,10.24,6.75 -Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,3.81,23.44,6.82,4.11,33.16,60.91,12.11,6.41 -NorwAI/NorwAI-Mistral-7B (few-shot),7537,68,4096,True,False,3035,3.84,24.15,8.31,1.6,37.08,59.43,3.04,3.7 -Tweeties/tweety-7b-dutch-v24a (few-shot),7391,50,1024,True,False,2971,3.84,35.83,12.47,16.81,0.0,51.61,20.21,5.94 +stabilityai/stablelm-2-1_6b (few-shot),1645,100,4096,True,False,7259,3.7,36.58,6.32,4.01,52.81,52.24,10.24,6.75 +google/gemma-2b (few-shot),2506,256,8192,True,False,6087,3.74,16.9,9.95,0.41,49.15,58.61,10.94,3.29 +NorwAI/NorwAI-Mistral-7B (few-shot),7537,68,4096,True,False,3035,3.79,24.15,8.31,1.6,37.08,59.43,3.04,3.7 +Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,3.79,23.44,6.82,4.11,33.16,60.91,12.11,6.41 +Tweeties/tweety-7b-dutch-v24a (few-shot),7391,50,1024,True,False,2971,3.83,35.83,12.47,16.81,0.0,51.61,20.21,5.94 +HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,3.86,31.84,1.56,5.05,40.55,60.35,13.39,4.57 HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,3.86,22.84,4.6,2.55,40.33,58.31,14.32,3.87 -HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,3.87,31.84,1.56,5.05,40.55,60.35,13.39,4.57 -PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.95,31.13,7.24,1.23,32.13,56.85,1.79,-0.63 -Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,3.95,18.66,8.59,0.34,26.74,62.36,2.09,1.44 -LumiOpen/Viking-13B (few-shot),14030,131,4224,True,False,840,3.97,36.74,8.57,3.01,32.32,51.76,0.67,-0.3 -ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,3.97,12.76,9.35,0.69,37.71,56.45,-0.05,-0.85 -Qwen/Qwen1.5-1.8B (few-shot),1837,152,32768,True,False,5666,4.02,11.66,5.2,2.89,34.6,55.0,13.56,5.89 -allenai/OLMo-7B (few-shot),6888,50,2176,True,False,5403,4.04,37.37,9.55,0.05,34.81,45.22,3.92,0.16 +Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,3.9,18.66,8.59,0.34,26.74,62.36,2.09,1.44 +LumiOpen/Viking-13B (few-shot),14030,131,4224,True,False,840,3.92,36.74,8.57,3.01,32.32,51.76,0.67,-0.3 +PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.93,31.13,7.24,1.23,32.13,56.85,1.79,-0.63 +ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,3.98,12.76,9.35,0.69,37.71,56.45,-0.05,-0.85 +Qwen/Qwen1.5-1.8B (few-shot),1837,152,32768,True,False,5666,4.03,11.66,5.2,2.89,34.6,55.0,13.56,5.89 Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,4.05,28.3,4.54,-0.42,20.81,58.4,7.44,1.7 +allenai/OLMo-7B (few-shot),6888,50,2176,True,False,5403,4.05,37.37,9.55,0.05,34.81,45.22,3.92,0.16 meta-llama/Llama-3.2-1B (few-shot),1236,128,131200,True,False,7577,4.07,22.03,4.25,1.46,41.76,51.56,5.46,-0.58 -PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,4.08,23.58,7.9,1.79,26.11,53.77,-0.41,-0.45 -PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,4.11,38.22,4.99,1.85,27.77,49.31,-1.45,0.83 -NorwAI/NorwAI-Llama2-7B (few-shot),7033,68,4096,True,False,4438,4.15,22.5,6.04,-0.61,26.96,56.55,0.27,1.47 -HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.22,20.95,6.84,-1.5,22.67,53.89,-0.45,-0.31 -HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.32,15.68,6.73,0.63,19.73,50.53,1.3,-0.36 -state-spaces/mamba-2.8b-hf (few-shot),2768,50,32896,True,False,2722,4.35,22.95,2.4,3.12,22.4,50.36,-0.48,1.28 +PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,4.1,23.58,7.9,1.79,26.11,53.77,-0.41,-0.45 +PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,4.12,38.22,4.99,1.85,27.77,49.31,-1.45,0.83 +tiiuae/Falcon3-1B-Instruct (few-shot),1669,131,8192,True,False,9270,4.15,28.25,3.73,0.76,19.08,57.2,5.45,2.34 +NorwAI/NorwAI-Llama2-7B (few-shot),7033,68,4096,True,False,4438,4.16,22.5,6.04,-0.61,26.96,56.55,0.27,1.47 +HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.2,20.95,6.84,-1.5,22.67,53.89,-0.45,-0.31 +HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.3,15.68,6.73,0.63,19.73,50.53,1.3,-0.36 +state-spaces/mamba-2.8b-hf (few-shot),2768,50,32896,True,False,2722,4.34,22.95,2.4,3.12,22.4,50.36,-0.48,1.28 allenai/OLMo-7B-Twin-2T (few-shot),6888,50,2176,True,False,5484,4.37,18.7,3.7,2.19,38.08,43.79,0.65,-0.02 PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.43,21.32,4.37,-0.19,9.38,50.94,0.33,-0.81 -allenai/OLMo-1B (few-shot),1177,50,2176,True,False,8536,4.57,22.58,4.92,-1.27,6.64,43.43,-0.56,0.29 -PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.63,24.47,3.57,-2.03,10.18,44.43,-0.11,-0.01 -HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.65,15.82,-0.62,1.16,3.25,54.82,1.36,0.34 -RJuro/kanelsnegl-v0.1 (few-shot),7242,32,512,True,False,5847,4.66,0.0,0.95,0.0,0.0,60.14,0.11,-0.13 -HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.67,17.49,2.01,-0.02,0.53,52.46,0.33,-0.1 -NorwAI/NorwAI-Mistral-7B-pretrain (few-shot),7537,68,4096,True,False,3024,4.95,3.8,0.97,-0.37,0.4,45.25,-0.98,-0.05 -ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,5.02,0.0,0.0,0.0,0.0,43.74,0.32,1.73 +allenai/OLMo-1B (few-shot),1177,50,2176,True,False,8536,4.58,22.58,4.92,-1.27,6.64,43.43,-0.56,0.29 +PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.62,24.47,3.57,-2.03,10.18,44.43,-0.11,-0.01 +HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.64,15.82,-0.62,1.16,3.25,54.82,1.36,0.34 +RJuro/kanelsnegl-v0.1 (few-shot),7242,32,512,True,False,5847,4.65,0.0,0.95,0.0,0.0,60.14,0.11,-0.13 +HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.66,17.49,2.01,-0.02,0.53,52.46,0.33,-0.1 +NorwAI/NorwAI-Mistral-7B-pretrain (few-shot),7537,68,4096,True,False,3024,4.94,3.8,0.97,-0.37,0.4,45.25,-0.98,-0.05 +ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,5.01,0.0,0.0,0.0,0.0,43.74,0.32,1.73 ai-forever/mGPT (few-shot),-1,100,1024,True,False,11734,5.33,0.11,-0.67,-0.97,0.29,30.2,1.45,-0.56 diff --git a/dutch-nlg.md b/dutch-nlg.md index 779c56a9..f5b3d856 100644 --- a/dutch-nlg.md +++ b/dutch-nlg.md @@ -3,7 +3,7 @@ layout: leaderboard title: Dutch NLG 🇳🇱 --- -
Last updated: 10/01/2025 12:30:37 CET
+
Last updated: 11/01/2025 11:03:48 CET
@@ -47,7 +47,7 @@ title: Dutch NLG 🇳🇱 8316 True 597 ± 197 / 93 ± 33 - 1.23 + 1.22 73.35 ± 2.61 / 56.00 ± 2.82 18.92 ± 2.78 / 40.80 ± 2.43 76.70 ± 2.39 / 88.16 ± 1.21 @@ -70,7 +70,7 @@ title: Dutch NLG 🇳🇱 8317 True 312 ± 55 / 177 ± 51 - 1.41 + 1.40 72.91 ± 3.24 / 68.06 ± 4.62 19.08 ± 3.37 / 42.04 ± 2.31 54.33 ± 3.49 / 75.54 ± 2.31 @@ -93,7 +93,7 @@ title: Dutch NLG 🇳🇱 128000 True 576 ± 221 / 81 ± 28 - 1.54 + 1.53 66.44 ± 2.18 / 56.97 ± 2.87 14.22 ± 3.26 / 33.41 ± 3.24 72.30 ± 2.26 / 85.96 ± 1.13 @@ -116,7 +116,7 @@ title: Dutch NLG 🇳🇱 8317 True 1,673 ± 583 / 275 ± 85 - 1.59 + 1.58 74.64 ± 3.67 / 71.84 ± 4.01 18.90 ± 2.04 / 41.93 ± 1.60 49.54 ± 4.22 / 74.03 ± 2.52 @@ -139,7 +139,7 @@ title: Dutch NLG 🇳🇱 131072 True 799 ± 246 / 112 ± 38 - 1.64 + 1.63 69.12 ± 2.03 / 64.39 ± 2.33 11.23 ± 1.27 / 20.82 ± 0.81 68.74 ± 0.69 / 83.97 ± 0.59 @@ -162,7 +162,7 @@ title: Dutch NLG 🇳🇱 128000 True 916 ± 329 / 114 ± 38 - 1.68 + 1.67 76.75 ± 3.44 / 61.13 ± 4.40 10.80 ± 2.24 / 32.52 ± 2.18 56.26 ± 4.51 / 73.83 ± 3.26 @@ -185,7 +185,7 @@ title: Dutch NLG 🇳🇱 32889 True 2,126 ± 676 / 319 ± 104 - 1.78 + 1.77 67.00 ± 3.69 / 56.41 ± 4.29 15.33 ± 4.14 / 36.14 ± 2.91 55.48 ± 4.37 / 77.55 ± 2.24 @@ -231,7 +231,7 @@ title: Dutch NLG 🇳🇱 32768 True 2,258 ± 1,221 / 198 ± 67 - 1.81 + 1.82 71.32 ± 1.36 / 57.50 ± 3.60 9.12 ± 1.28 / 21.29 ± 0.75 63.96 ± 1.27 / 81.82 ± 0.72 @@ -277,7 +277,7 @@ title: Dutch NLG 🇳🇱 8191 True 637 ± 306 / 92 ± 31 - 1.85 + 1.84 69.12 ± 2.60 / 41.51 ± 2.79 12.36 ± 2.16 / 19.95 ± 1.34 58.88 ± 2.34 / 79.01 ± 1.24 @@ -300,7 +300,7 @@ title: Dutch NLG 🇳🇱 131072 True 1,353 ± 443 / 180 ± 61 - 1.86 + 1.85 70.37 ± 1.97 / 56.00 ± 2.79 10.87 ± 1.05 / 19.75 ± 0.82 62.87 ± 1.48 / 81.12 ± 0.82 @@ -346,7 +346,7 @@ title: Dutch NLG 🇳🇱 8191 True 784 ± 310 / 95 ± 28 - 1.92 + 1.91 68.71 ± 3.85 / 53.19 ± 4.36 20.33 ± 2.51 / 40.68 ± 1.79 49.52 ± 5.39 / 72.59 ± 3.64 @@ -369,7 +369,7 @@ title: Dutch NLG 🇳🇱 8191 True 436 ± 152 / 57 ± 21 - 1.97 + 1.96 55.72 ± 3.68 / 40.53 ± 2.95 11.13 ± 1.98 / 18.38 ± 1.21 67.28 ± 2.42 / 83.06 ± 1.36 @@ -385,6 +385,29 @@ title: Dutch NLG 🇳🇱 14.0.3 14.0.3 + + CohereForAI/c4ai-command-r-08-2024 (few-shot) + 32296 + 256 + 131072 + False + 1,909 ± 646 / 248 ± 84 + 2.01 + 68.58 ± 1.76 / 44.49 ± 2.32 + 14.41 ± 1.69 / 35.58 ± 1.84 + 55.01 ± 1.21 / 77.17 ± 0.63 + 58.63 ± 0.55 / 72.85 ± 0.32 + 65.69 ± 0.95 / 16.39 ± 0.64 + 50.48 ± 0.92 / 62.55 ± 0.71 + 49.08 ± 1.51 / 60.63 ± 1.34 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + google/gemma-2-9b (few-shot) 9242 @@ -408,29 +431,6 @@ title: Dutch NLG 🇳🇱 13.0.0 13.0.0 - - CohereForAI/c4ai-command-r-08-2024 (few-shot) - 32296 - 256 - 131072 - False - 1,909 ± 646 / 248 ± 84 - 2.02 - 68.58 ± 1.76 / 44.49 ± 2.32 - 14.41 ± 1.69 / 35.58 ± 1.84 - 55.01 ± 1.21 / 77.17 ± 0.63 - 58.63 ± 0.55 / 72.85 ± 0.32 - 65.69 ± 0.95 / 16.39 ± 0.64 - 50.48 ± 0.92 / 62.55 ± 0.71 - 49.08 ± 1.51 / 60.63 ± 1.34 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - gpt-4o-mini-2024-07-18 (zero-shot, val) unknown @@ -438,7 +438,7 @@ title: Dutch NLG 🇳🇱 8191 True 908 ± 303 / 96 ± 36 - 2.05 + 2.04 64.15 ± 2.49 / 38.77 ± 2.59 12.67 ± 2.02 / 18.63 ± 1.37 62.44 ± 3.24 / 80.58 ± 1.66 @@ -461,7 +461,7 @@ title: Dutch NLG 🇳🇱 4096 True 3,780 ± 906 / 799 ± 261 - 2.05 + 2.04 65.37 ± 1.61 / 46.10 ± 1.53 11.93 ± 1.80 / 34.67 ± 2.84 41.67 ± 1.53 / 69.81 ± 1.38 @@ -478,27 +478,27 @@ title: Dutch NLG 🇳🇱 12.5.3 - CohereForAI/aya-expanse-8b (few-shot) - 8028 + ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot) + 28411 256 - 8192 - False - 2,686 ± 685 / 491 ± 164 - 2.07 - 62.07 ± 1.67 / 37.68 ± 1.28 - 13.70 ± 1.36 / 34.90 ± 0.68 - 35.14 ± 2.33 / 66.66 ± 1.50 - 49.15 ± 1.48 / 68.82 ± 0.68 - 74.40 ± 0.20 / 31.66 ± 0.46 - 46.76 ± 0.71 / 60.01 ± 0.54 - 60.36 ± 0.88 / 70.23 ± 0.64 - 13.0.0 + 4096 + True + 3,633 ± 1,236 / 777 ± 220 + 2.08 + 68.17 ± 1.75 / 51.61 ± 2.73 + 10.56 ± 1.04 / 19.29 ± 0.63 + 56.89 ± 0.82 / 78.31 ± 0.43 + 53.05 ± 2.18 / 71.82 ± 0.84 + 64.01 ± 0.23 / 15.23 ± 0.30 + 58.74 ± 0.88 / 68.94 ± 0.65 + 65.18 ± 0.73 / 73.34 ± 0.53 + 14.0.4 14.0.4 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 google/gemma-2-9b-it (few-shot) @@ -523,29 +523,6 @@ title: Dutch NLG 🇳🇱 13.0.0 13.0.0 - - ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot) - 28411 - 256 - 4096 - True - 3,633 ± 1,236 / 777 ± 220 - 2.13 - 68.17 ± 1.75 / 51.61 ± 2.73 - 10.56 ± 1.04 / 19.29 ± 0.63 - 56.89 ± 0.82 / 78.31 ± 0.43 - 53.05 ± 2.18 / 71.82 ± 0.84 - 64.01 ± 0.23 / 15.23 ± 0.30 - 58.74 ± 0.88 / 68.94 ± 0.65 - 65.18 ± 0.73 / 73.34 ± 0.53 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - gpt-3.5-turbo-0613 (few-shot, val) unknown @@ -553,7 +530,7 @@ title: Dutch NLG 🇳🇱 4095 True 921 ± 293 / 113 ± 37 - 2.17 + 2.12 68.96 ± 3.80 / 58.45 ± 3.71 8.81 ± 3.30 / 30.88 ± 2.25 58.95 ± 4.48 / 78.64 ± 2.32 @@ -576,7 +553,7 @@ title: Dutch NLG 🇳🇱 4221 True 1,892 ± 650 / 318 ± 105 - 2.20 + 2.22 66.50 ± 3.72 / 57.66 ± 3.78 7.82 ± 4.30 / 34.91 ± 2.53 49.55 ± 4.95 / 73.43 ± 3.38 @@ -599,22 +576,45 @@ title: Dutch NLG 🇳🇱 32768 True 1,302 ± 323 / 253 ± 86 - 2.23 - 66.51 ± 1.38 / 52.40 ± 2.62 - 11.91 ± 1.03 / 34.21 ± 1.08 - 34.46 ± 2.79 / 65.61 ± 2.58 - 59.23 ± 1.16 / 72.56 ± 0.80 + 2.27 + 63.30 ± 2.36 / 39.20 ± 2.16 + 11.82 ± 1.07 / 34.18 ± 1.11 + 32.20 ± 0.77 / 65.67 ± 0.69 + 59.45 ± 0.89 / 71.13 ± 0.60 68.87 ± 0.72 / 21.06 ± 1.13 40.81 ± 1.05 / 55.44 ± 0.80 60.68 ± 1.04 / 70.24 ± 0.85 - 13.0.0 - 14.0.4 - 13.0.0 - 13.0.0 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 13.0.0 13.0.0 13.0.0 + + meta-llama/Llama-2-70b-chat-hf (few-shot, val) + 68977 + 32 + 4221 + True + 1,979 ± 621 / 320 ± 105 + 2.29 + 64.00 ± 3.52 / 48.94 ± 3.83 + 13.30 ± 3.75 / 30.50 ± 2.48 + 30.88 ± 4.62 / 59.62 ± 4.50 + 54.14 ± 1.55 / 70.96 ± 1.01 + 68.71 ± 0.70 / 19.82 ± 0.90 + 45.47 ± 2.07 / 59.14 ± 1.69 + 42.91 ± 3.26 / 57.30 ± 2.39 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + mistralai/Mistral-Nemo-Instruct-2407 (few-shot) 12248 @@ -622,7 +622,7 @@ title: Dutch NLG 🇳🇱 1024128 True 7,095 ± 2,193 / 1,063 ± 344 - 2.28 + 2.29 66.57 ± 1.86 / 48.40 ± 2.67 10.10 ± 1.55 / 33.62 ± 2.04 40.31 ± 2.25 / 69.53 ± 1.51 @@ -662,27 +662,27 @@ title: Dutch NLG 🇳🇱 14.0.4 - meta-llama/Llama-2-70b-chat-hf (few-shot, val) - 68977 - 32 - 4221 - True - 1,979 ± 621 / 320 ± 105 + CohereForAI/aya-expanse-8b (few-shot) + 8028 + 256 + 8192 + False + 2,686 ± 685 / 491 ± 164 2.30 - 64.00 ± 3.52 / 48.94 ± 3.83 - 13.30 ± 3.75 / 30.50 ± 2.48 - 30.88 ± 4.62 / 59.62 ± 4.50 - 54.14 ± 1.55 / 70.96 ± 1.01 - 68.71 ± 0.70 / 19.82 ± 0.90 - 45.47 ± 2.07 / 59.14 ± 1.69 - 42.91 ± 3.26 / 57.30 ± 2.39 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 + 53.02 ± 1.86 / 30.09 ± 1.16 + 13.68 ± 1.32 / 34.87 ± 0.67 + 29.97 ± 2.13 / 64.01 ± 1.12 + 53.40 ± 1.34 / 69.31 ± 0.65 + 73.93 ± 0.30 / 30.81 ± 0.43 + 38.81 ± 1.13 / 53.87 ± 0.90 + 40.95 ± 1.72 / 55.25 ± 1.38 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 senseable/WestLake-7B-v2 (few-shot) @@ -691,7 +691,7 @@ title: Dutch NLG 🇳🇱 32768 False 5,993 ± 1,028 / 1,742 ± 561 - 2.34 + 2.33 64.25 ± 2.23 / 46.52 ± 1.72 13.66 ± 1.99 / 39.45 ± 1.52 28.59 ± 1.48 / 61.24 ± 1.46 @@ -707,29 +707,6 @@ title: Dutch NLG 🇳🇱 12.6.1 12.6.1 - - meta-llama/Llama-3.1-8B (few-shot) - 8030 - 128 - 131200 - True - 2,986 ± 823 / 276 ± 94 - 2.37 - 64.79 ± 1.96 / 45.48 ± 2.24 - 11.95 ± 2.83 / 37.12 ± 2.19 - 32.97 ± 2.68 / 58.52 ± 2.92 - 63.89 ± 1.06 / 74.73 ± 1.02 - 66.29 ± 1.29 / 20.14 ± 1.64 - 38.44 ± 1.33 / 53.68 ± 1.01 - 30.88 ± 2.27 / 47.18 ± 1.81 - 12.11.0 - 12.11.0 - 12.11.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot) 46703 @@ -737,7 +714,7 @@ title: Dutch NLG 🇳🇱 32768 True 5,535 ± 1,837 / 760 ± 256 - 2.38 + 2.37 58.80 ± 3.37 / 37.95 ± 1.80 12.50 ± 1.74 / 33.57 ± 1.09 45.22 ± 1.65 / 71.80 ± 1.10 @@ -760,7 +737,7 @@ title: Dutch NLG 🇳🇱 8192 False 2,549 ± 472 / 784 ± 245 - 2.38 + 2.37 63.53 ± 3.80 / 50.43 ± 2.90 11.25 ± 4.22 / 39.00 ± 3.14 27.76 ± 4.44 / 62.44 ± 2.43 @@ -776,6 +753,29 @@ title: Dutch NLG 🇳🇱 9.3.2 9.3.2 + + meta-llama/Llama-3.1-8B (few-shot) + 8030 + 128 + 131200 + True + 2,986 ± 823 / 276 ± 94 + 2.38 + 64.79 ± 1.96 / 45.48 ± 2.24 + 11.95 ± 2.83 / 37.12 ± 2.19 + 32.97 ± 2.68 / 58.52 ± 2.92 + 63.89 ± 1.06 / 74.73 ± 1.02 + 66.29 ± 1.29 / 20.14 ± 1.64 + 38.44 ± 1.33 / 53.68 ± 1.01 + 30.88 ± 2.27 / 47.18 ± 1.81 + 12.11.0 + 12.11.0 + 12.11.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + CohereForAI/aya-23-8B (few-shot) 8028 @@ -783,7 +783,7 @@ title: Dutch NLG 🇳🇱 8192 False 2,707 ± 688 / 497 ± 166 - 2.39 + 2.40 60.81 ± 1.94 / 46.59 ± 3.32 7.90 ± 1.63 / 24.82 ± 0.95 31.12 ± 2.35 / 64.29 ± 1.88 @@ -799,29 +799,6 @@ title: Dutch NLG 🇳🇱 13.0.0 13.0.0 - - meta-llama/Meta-Llama-3-8B-Instruct (few-shot) - 8030 - 128 - 8192 - True - 1,007 ± 316 / 162 ± 45 - 2.41 - 68.72 ± 1.81 / 54.89 ± 2.10 - 14.67 ± 2.51 / 41.36 ± 2.04 - 32.91 ± 2.56 / 64.93 ± 1.97 - 45.36 ± 1.31 / 67.50 ± 0.69 - 67.62 ± 0.82 / 18.19 ± 1.14 - 36.18 ± 1.31 / 51.68 ± 1.05 - 33.91 ± 1.71 / 48.01 ± 1.47 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - utter-project/EuroLLM-9B-Instruct (few-shot) 9152 @@ -829,7 +806,7 @@ title: Dutch NLG 🇳🇱 4096 False 1,483 ± 321 / 379 ± 158 - 2.42 + 2.41 43.06 ± 1.89 / 30.50 ± 1.45 11.95 ± 2.25 / 33.40 ± 1.41 40.85 ± 3.31 / 68.94 ± 2.23 @@ -845,6 +822,29 @@ title: Dutch NLG 🇳🇱 13.1.0 13.1.0 + + meta-llama/Meta-Llama-3-8B-Instruct (few-shot) + 8030 + 128 + 8192 + True + 1,483 ± 377 / 287 ± 97 + 2.42 + 68.72 ± 1.81 / 54.89 ± 2.10 + 14.67 ± 2.51 / 41.36 ± 2.04 + 32.91 ± 2.56 / 64.93 ± 1.97 + 45.36 ± 1.31 / 67.50 ± 0.69 + 67.62 ± 0.82 / 18.19 ± 1.14 + 36.18 ± 1.31 / 51.68 ± 1.05 + 33.91 ± 1.71 / 48.01 ± 1.47 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + mistralai/Mixtral-8x7B-v0.1 (few-shot) 46703 @@ -869,28 +869,51 @@ title: Dutch NLG 🇳🇱 14.0.4 - meta-llama/Llama-3.1-8B-Instruct (few-shot) - 8030 - 128 - 131200 - True - 1,005 ± 330 / 196 ± 74 - 2.50 - 69.76 ± 1.36 / 57.66 ± 1.36 - 9.09 ± 1.42 / 20.14 ± 0.84 - 37.58 ± 3.42 / 66.98 ± 2.22 - 41.26 ± 2.09 / 65.63 ± 0.90 - 68.84 ± 0.62 / 21.41 ± 1.20 - 40.98 ± 0.71 / 55.51 ± 0.59 - 39.98 ± 1.59 / 54.32 ± 1.27 + skole-gpt-mixtral (few-shot) + unknown + 32 + 32768 + False + 3,583 ± 977 / 686 ± 231 + 2.49 + 62.16 ± 1.09 / 45.76 ± 2.07 + 8.92 ± 1.08 / 24.28 ± 0.76 + 32.76 ± 2.94 / 65.17 ± 2.79 + 56.87 ± 0.92 / 72.57 ± 0.85 + 68.39 ± 0.73 / 20.83 ± 1.21 + 46.17 ± 0.64 / 59.57 ± 0.48 + 33.31 ± 1.83 / 49.12 ± 1.64 13.0.0 - 14.0.4 + 13.0.0 13.0.0 13.0.0 13.0.0 13.0.0 13.0.0 + + mlabonne/AlphaMonarch-7B (few-shot, val) + 7242 + 32 + 8192 + False + 5,340 ± 1,262 / 1,157 ± 375 + 2.50 + 64.71 ± 5.15 / 53.58 ± 3.82 + 11.14 ± 3.37 / 38.64 ± 2.36 + 25.22 ± 5.45 / 61.28 ± 2.51 + 46.34 ± 1.07 / 66.56 ± 1.49 + 67.71 ± 0.59 / 17.76 ± 0.71 + 38.49 ± 2.59 / 53.52 ± 2.03 + 47.66 ± 2.91 / 60.31 ± 2.30 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + Nexusflow/Starling-LM-7B-beta (few-shot) 7242 @@ -914,51 +937,51 @@ title: Dutch NLG 🇳🇱 14.0.4 14.0.4 - - mlabonne/AlphaMonarch-7B (few-shot, val) - 7242 - 32 + + meta-llama/Meta-Llama-3-8B (few-shot) + 8030 + 128 8192 - False - 5,340 ± 1,262 / 1,157 ± 375 - 2.51 - 64.71 ± 5.15 / 53.58 ± 3.82 - 11.14 ± 3.37 / 38.64 ± 2.36 - 25.22 ± 5.45 / 61.28 ± 2.51 - 46.34 ± 1.07 / 66.56 ± 1.49 - 67.71 ± 0.59 / 17.76 ± 0.71 - 38.49 ± 2.59 / 53.52 ± 2.03 - 47.66 ± 2.91 / 60.31 ± 2.30 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 + True + 1,477 ± 376 / 285 ± 97 + 2.53 + 62.26 ± 2.20 / 42.41 ± 2.02 + 10.45 ± 2.69 / 33.45 ± 1.99 + 30.30 ± 3.94 / 62.28 ± 2.89 + 62.99 ± 1.00 / 73.73 ± 0.98 + 65.17 ± 1.24 / 18.63 ± 1.85 + 36.38 ± 0.86 / 52.08 ± 0.66 + 28.33 ± 2.31 / 45.29 ± 1.78 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 - skole-gpt-mixtral (few-shot) - unknown + yhavinga/Boreas-7B-chat (few-shot) + 7242 32 32768 False - 3,583 ± 977 / 686 ± 231 - 2.52 - 62.16 ± 1.09 / 45.76 ± 2.07 - 8.92 ± 1.08 / 24.28 ± 0.76 - 32.76 ± 2.94 / 65.17 ± 2.79 - 56.87 ± 0.92 / 72.57 ± 0.85 - 68.39 ± 0.73 / 20.83 ± 1.21 - 46.17 ± 0.64 / 59.57 ± 0.48 - 33.31 ± 1.83 / 49.12 ± 1.64 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 + 2,913 ± 459 / 1,129 ± 342 + 2.53 + 60.22 ± 1.55 / 38.72 ± 1.45 + 11.97 ± 1.80 / 35.17 ± 3.03 + 30.94 ± 4.81 / 62.66 ± 3.36 + 52.19 ± 1.96 / 67.52 ± 1.56 + 66.60 ± 1.93 / 20.00 ± 1.35 + 35.34 ± 1.32 / 51.45 ± 0.99 + 31.47 ± 2.90 / 47.64 ± 2.52 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 CohereForAI/c4ai-command-r-v01 (few-shot) @@ -984,27 +1007,27 @@ title: Dutch NLG 🇳🇱 14.1.1 - yhavinga/Boreas-7B-chat (few-shot) - 7242 - 32 - 32768 - False - 2,913 ± 459 / 1,129 ± 342 + nvidia/mistral-nemo-minitron-8b-base (few-shot) + 8414 + 131 + 8192 + True + 2,470 ± 836 / 326 ± 111 2.56 - 60.22 ± 1.55 / 38.72 ± 1.45 - 11.97 ± 1.80 / 35.17 ± 3.03 - 30.94 ± 4.81 / 62.66 ± 3.36 - 52.19 ± 1.96 / 67.52 ± 1.56 - 66.60 ± 1.93 / 20.00 ± 1.35 - 35.34 ± 1.32 / 51.45 ± 0.99 - 31.47 ± 2.90 / 47.64 ± 2.52 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 + 66.29 ± 2.06 / 47.69 ± 2.04 + 12.71 ± 2.10 / 37.65 ± 1.83 + 31.39 ± 1.27 / 63.48 ± 1.67 + 48.33 ± 1.71 / 58.63 ± 1.53 + 64.10 ± 0.62 / 17.65 ± 0.69 + 37.55 ± 1.03 / 52.98 ± 0.83 + 29.69 ± 2.55 / 44.05 ± 2.51 + 14.1.1 + 14.1.1 + 14.1.1 + 14.1.1 + 14.1.1 + 14.1.1 + 14.1.1 claude-3-5-sonnet-20241022 (zero-shot, val) @@ -1013,7 +1036,7 @@ title: Dutch NLG 🇳🇱 200000 True 193 ± 87 / 55 ± 19 - 2.58 + 2.57 62.41 ± 2.92 / 52.27 ± 2.69 12.64 ± 2.16 / 33.98 ± 1.59 74.06 ± 2.21 / 86.59 ± 1.26 @@ -1029,29 +1052,6 @@ title: Dutch NLG 🇳🇱 14.0.3 14.0.3 - - nvidia/mistral-nemo-minitron-8b-base (few-shot) - 8414 - 131 - 8192 - True - 2,470 ± 836 / 326 ± 111 - 2.58 - 66.29 ± 2.06 / 47.69 ± 2.04 - 12.71 ± 2.10 / 37.65 ± 1.83 - 31.39 ± 1.27 / 63.48 ± 1.67 - 48.33 ± 1.71 / 58.63 ± 1.53 - 64.10 ± 0.62 / 17.65 ± 0.69 - 37.55 ± 1.03 / 52.98 ± 0.83 - 29.69 ± 2.55 / 44.05 ± 2.51 - 14.1.1 - 14.1.1 - 14.1.1 - 14.1.1 - 14.1.1 - 14.1.1 - 14.1.1 - robinsmits/Qwen1.5-7B-Dutch-Chat-Sft-Bf16 (few-shot) 7719 @@ -1059,7 +1059,7 @@ title: Dutch NLG 🇳🇱 32768 False 2,413 ± 463 / 700 ± 220 - 2.58 + 2.57 56.83 ± 2.31 / 46.81 ± 2.87 14.79 ± 1.96 / 41.48 ± 1.53 23.58 ± 2.69 / 50.85 ± 3.74 @@ -1076,43 +1076,20 @@ title: Dutch NLG 🇳🇱 12.6.1 - meta-llama/Meta-Llama-3-8B (few-shot) - 8030 - 128 - 8192 - True - 1,335 ± 338 / 260 ± 88 - 2.59 - 62.26 ± 2.20 / 42.41 ± 2.02 - 10.45 ± 2.69 / 33.45 ± 1.99 - 30.30 ± 3.94 / 62.28 ± 2.89 - 62.99 ± 1.00 / 73.73 ± 0.98 - 65.17 ± 1.24 / 18.63 ± 1.85 - 36.38 ± 0.86 / 52.08 ± 0.66 - 28.33 ± 2.31 / 45.29 ± 1.78 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - - - ReBatch/Reynaerde-7B-Chat (few-shot) + ReBatch/Reynaerde-7B-Instruct (few-shot) 7248 33 32768 False - 2,554 ± 483 / 781 ± 247 - 2.63 - 56.22 ± 2.46 / 38.04 ± 1.69 - 11.22 ± 1.85 / 30.99 ± 1.36 - 20.04 ± 1.67 / 55.38 ± 3.62 - 61.15 ± 1.01 / 72.89 ± 0.88 - 68.28 ± 0.61 / 20.97 ± 0.82 - 32.44 ± 0.61 / 49.05 ± 0.48 - 31.24 ± 1.92 / 47.72 ± 1.69 + 2,562 ± 487 / 782 ± 247 + 2.61 + 59.16 ± 2.29 / 42.33 ± 2.15 + 10.39 ± 1.44 / 28.74 ± 1.05 + 19.50 ± 1.96 / 55.52 ± 3.92 + 60.96 ± 1.24 / 72.79 ± 0.95 + 68.19 ± 0.63 / 20.85 ± 0.78 + 32.93 ± 0.63 / 49.37 ± 0.55 + 28.04 ± 1.54 / 45.48 ± 1.35 13.0.0 13.0.0 13.0.0 @@ -1128,7 +1105,7 @@ title: Dutch NLG 🇳🇱 32768 False 4,686 ± 1,131 / 996 ± 326 - 2.63 + 2.62 57.81 ± 2.68 / 47.15 ± 2.77 14.62 ± 2.25 / 41.08 ± 1.81 25.34 ± 2.37 / 54.46 ± 3.43 @@ -1145,20 +1122,20 @@ title: Dutch NLG 🇳🇱 12.5.3 - ReBatch/Reynaerde-7B-Instruct (few-shot) + ReBatch/Reynaerde-7B-Chat (few-shot) 7248 33 32768 False - 2,562 ± 487 / 782 ± 247 - 2.66 - 59.16 ± 2.29 / 42.33 ± 2.15 - 10.39 ± 1.44 / 28.74 ± 1.05 - 19.50 ± 1.96 / 55.52 ± 3.92 - 60.96 ± 1.24 / 72.79 ± 0.95 - 68.19 ± 0.63 / 20.85 ± 0.78 - 32.93 ± 0.63 / 49.37 ± 0.55 - 28.04 ± 1.54 / 45.48 ± 1.35 + 2,554 ± 483 / 781 ± 247 + 2.63 + 56.22 ± 2.46 / 38.04 ± 1.69 + 11.22 ± 1.85 / 30.99 ± 1.36 + 20.04 ± 1.67 / 55.38 ± 3.62 + 61.15 ± 1.01 / 72.89 ± 0.88 + 68.28 ± 0.61 / 20.97 ± 0.82 + 32.44 ± 0.61 / 49.05 ± 0.48 + 31.24 ± 1.92 / 47.72 ± 1.69 13.0.0 13.0.0 13.0.0 @@ -1190,6 +1167,29 @@ title: Dutch NLG 🇳🇱 14.0.4 14.0.4 + + ibm-granite/granite-3.0-8b-base (few-shot) + 8171 + 49 + 4096 + True + 2,515 ± 625 / 476 ± 159 + 2.68 + 52.32 ± 1.98 / 41.98 ± 1.88 + 8.46 ± 1.09 / 21.30 ± 0.67 + 42.42 ± 3.42 / 68.81 ± 2.66 + 53.12 ± 1.81 / 63.79 ± 1.63 + 66.00 ± 0.61 / 18.47 ± 0.79 + 34.34 ± 0.87 / 50.51 ± 0.72 + 26.46 ± 3.88 / 43.33 ± 3.00 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + ibm-granite/granite-3.0-8b-instruct (few-shot) 8171 @@ -1237,50 +1237,27 @@ title: Dutch NLG 🇳🇱 12.7.0 - ibm-granite/granite-3.0-8b-base (few-shot) - 8171 - 49 - 4096 + meta-llama/Llama-3.1-8B-Instruct (few-shot) + 8030 + 128 + 131072 True - 2,515 ± 625 / 476 ± 159 + 1,473 ± 377 / 283 ± 96 2.72 - 52.26 ± 1.87 / 42.18 ± 1.90 - 8.46 ± 1.09 / 21.30 ± 0.67 - 42.42 ± 3.42 / 68.81 ± 2.66 - 53.11 ± 1.79 / 63.80 ± 1.61 - 66.01 ± 0.59 / 18.49 ± 0.79 - 34.35 ± 0.90 / 50.52 ± 0.73 - 31.89 ± 1.58 / 48.04 ± 1.12 - 14.0.4 + 61.68 ± 1.94 / 42.64 ± 1.85 + 8.97 ± 1.44 / 20.07 ± 0.82 + 36.57 ± 1.77 / 65.25 ± 1.94 + 33.88 ± 1.83 / 62.17 ± 0.91 + 64.80 ± 0.38 / 16.87 ± 0.29 + 37.45 ± 0.61 / 53.03 ± 0.48 + 42.45 ± 1.72 / 56.78 ± 1.31 + 14.1.2 14.1.2 14.1.2 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - - - NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot) - 46998 - 68 - 32768 - True - 9,015 ± 2,966 / 1,121 ± 510 - 2.74 - 62.81 ± 3.06 / 43.07 ± 2.90 - 11.28 ± 2.13 / 32.44 ± 3.18 - 28.57 ± 2.14 / 60.59 ± 2.37 - 38.75 ± 5.65 / 58.21 ± 3.41 - 65.35 ± 0.98 / 18.66 ± 0.83 - 39.53 ± 0.99 / 54.24 ± 0.73 - 26.71 ± 3.11 / 44.58 ± 2.46 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 Rijgersberg/Mistral-7B-v0.1-chat-nl (few-shot) @@ -1289,7 +1266,7 @@ title: Dutch NLG 🇳🇱 32768 False 5,907 ± 1,028 / 1,695 ± 549 - 2.74 + 2.73 56.73 ± 1.95 / 38.97 ± 1.84 11.08 ± 1.46 / 32.20 ± 1.43 19.41 ± 2.55 / 57.17 ± 2.38 @@ -1305,29 +1282,6 @@ title: Dutch NLG 🇳🇱 12.5.2 12.5.2 - - mistralai/Mistral-7B-Instruct-v0.2 (few-shot) - 7242 - 32 - 32768 - False - 2,370 ± 416 / 711 ± 242 - 2.76 - 55.56 ± 2.66 / 39.56 ± 2.13 - 12.37 ± 1.64 / 37.37 ± 1.35 - 21.50 ± 1.70 / 59.10 ± 1.32 - 50.77 ± 0.95 / 66.54 ± 0.79 - 67.99 ± 0.49 / 19.54 ± 0.55 - 22.86 ± 1.89 / 41.71 ± 1.45 - 24.80 ± 1.77 / 42.93 ± 1.38 - 9.3.1 - 9.2.0 - 9.3.1 - 12.4.0 - 12.4.0 - 9.3.2 - 9.3.2 - google/gemma-7b (few-shot) 8538 @@ -1335,7 +1289,7 @@ title: Dutch NLG 🇳🇱 8192 True 1,378 ± 260 / 387 ± 119 - 2.77 + 2.73 47.75 ± 2.33 / 35.64 ± 1.89 7.68 ± 0.61 / 26.25 ± 1.18 28.28 ± 2.48 / 62.81 ± 1.70 @@ -1352,27 +1306,50 @@ title: Dutch NLG 🇳🇱 12.10.0 - mistralai/Mistral-7B-v0.1 (few-shot) + NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot) + 46998 + 68 + 32768 + True + 9,015 ± 2,966 / 1,121 ± 510 + 2.74 + 62.81 ± 3.06 / 43.07 ± 2.90 + 11.28 ± 2.13 / 32.44 ± 3.18 + 28.57 ± 2.14 / 60.59 ± 2.37 + 38.75 ± 5.65 / 58.21 ± 3.41 + 65.35 ± 0.98 / 18.66 ± 0.83 + 39.53 ± 0.99 / 54.24 ± 0.73 + 26.71 ± 3.11 / 44.58 ± 2.46 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + + + mistralai/Mistral-7B-Instruct-v0.2 (few-shot) 7242 32 32768 - True - 1,446 ± 354 / 295 ± 100 - 2.78 - 58.15 ± 1.14 / 40.78 ± 1.91 - 7.94 ± 1.25 / 31.02 ± 3.45 - 25.41 ± 3.46 / 61.11 ± 2.36 - 62.56 ± 1.10 / 73.16 ± 0.93 - 64.24 ± 0.91 / 17.54 ± 1.10 - 35.49 ± 0.57 / 51.51 ± 0.42 - 19.88 ± 1.80 / 39.13 ± 1.56 - 9.1.2 - 9.1.2 - 9.1.2 - 12.5.1 - 11.0.0 - 9.2.0 - 9.2.0 + False + 2,370 ± 416 / 711 ± 242 + 2.76 + 55.56 ± 2.66 / 39.56 ± 2.13 + 12.37 ± 1.64 / 37.37 ± 1.35 + 21.50 ± 1.70 / 59.10 ± 1.32 + 50.77 ± 0.95 / 66.54 ± 0.79 + 67.99 ± 0.49 / 19.54 ± 0.55 + 22.86 ± 1.89 / 41.71 ± 1.45 + 24.80 ± 1.77 / 42.93 ± 1.38 + 9.3.1 + 9.2.0 + 9.3.1 + 12.4.0 + 12.4.0 + 9.3.2 + 9.3.2 ibm-granite/granite-3.0-2b-instruct (few-shot) @@ -1381,7 +1358,7 @@ title: Dutch NLG 🇳🇱 4224 True 10,194 ± 2,403 / 2,193 ± 731 - 2.80 + 2.79 52.52 ± 1.62 / 44.69 ± 2.23 13.85 ± 1.90 / 36.43 ± 2.11 17.72 ± 1.86 / 57.31 ± 1.52 @@ -1397,6 +1374,29 @@ title: Dutch NLG 🇳🇱 13.0.0 13.0.0 + + mistralai/Mistral-7B-v0.1 (few-shot) + 7242 + 32 + 32768 + True + 1,446 ± 354 / 295 ± 100 + 2.80 + 58.15 ± 1.14 / 40.78 ± 1.91 + 7.94 ± 1.25 / 31.02 ± 3.45 + 25.41 ± 3.46 / 61.11 ± 2.36 + 62.56 ± 1.10 / 73.16 ± 0.93 + 64.24 ± 0.91 / 17.54 ± 1.10 + 35.49 ± 0.57 / 51.51 ± 0.42 + 19.88 ± 1.80 / 39.13 ± 1.56 + 9.1.2 + 9.1.2 + 9.1.2 + 12.5.1 + 11.0.0 + 9.2.0 + 9.2.0 + ibm-granite/granite-3.0-2b-base (few-shot) 2534 @@ -1427,7 +1427,7 @@ title: Dutch NLG 🇳🇱 32768 True 2,368 ± 793 / 317 ± 108 - 2.83 + 2.82 62.76 ± 3.54 / 40.29 ± 1.82 13.83 ± 1.32 / 37.70 ± 1.94 24.44 ± 2.86 / 58.02 ± 2.28 @@ -1443,6 +1443,29 @@ title: Dutch NLG 🇳🇱 14.0.4 14.0.4 + + alpindale/Mistral-7B-v0.2-hf (few-shot) + 7242 + 32 + 32768 + True + 1,841 ± 297 / 651 ± 193 + 2.83 + 56.76 ± 1.52 / 42.03 ± 1.98 + 7.11 ± 1.17 / 26.36 ± 2.97 + 23.55 ± 2.76 / 59.14 ± 3.18 + 61.89 ± 1.10 / 72.41 ± 1.08 + 64.33 ± 0.72 / 17.66 ± 0.87 + 34.92 ± 1.07 / 51.02 ± 0.85 + 23.87 ± 1.80 / 42.03 ± 1.58 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + BramVanroy/GEITje-7B-ultra (few-shot) 7242 @@ -1466,29 +1489,6 @@ title: Dutch NLG 🇳🇱 10.0.1 10.0.1 - - alpindale/Mistral-7B-v0.2-hf (few-shot) - 7242 - 32 - 32768 - True - 1,841 ± 297 / 651 ± 193 - 2.85 - 56.76 ± 1.52 / 42.03 ± 1.98 - 7.11 ± 1.17 / 26.36 ± 2.97 - 23.55 ± 2.76 / 59.14 ± 3.18 - 61.89 ± 1.10 / 72.41 ± 1.08 - 64.33 ± 0.72 / 17.66 ± 0.87 - 34.92 ± 1.07 / 51.02 ± 0.85 - 23.87 ± 1.80 / 42.03 ± 1.58 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - mistralai/Mistral-7B-v0.3 (few-shot) 7248 @@ -1496,7 +1496,7 @@ title: Dutch NLG 🇳🇱 32768 True 1,364 ± 343 / 266 ± 90 - 2.87 + 2.84 56.52 ± 1.42 / 41.84 ± 1.84 7.02 ± 1.21 / 26.40 ± 2.96 23.41 ± 2.91 / 59.14 ± 3.11 @@ -1518,8 +1518,8 @@ title: Dutch NLG 🇳🇱 32 4096 True - 3,194 ± 687 / 650 ± 216 - 2.88 + 8,681 ± 1,650 / 2,177 ± 717 + 2.87 50.31 ± 1.94 / 41.54 ± 2.19 12.58 ± 1.62 / 36.56 ± 1.79 14.72 ± 1.84 / 50.23 ± 3.10 @@ -1558,6 +1558,29 @@ title: Dutch NLG 🇳🇱 13.2.0 13.2.0 + + meta-llama/Llama-2-13b-chat-hf (few-shot) + 13016 + 32 + 4096 + True + 2,849 ± 622 / 723 ± 229 + 2.89 + 57.80 ± 1.53 / 39.43 ± 1.53 + 8.57 ± 1.27 / 29.84 ± 2.35 + 17.40 ± 1.54 / 57.26 ± 1.96 + 56.35 ± 0.85 / 69.69 ± 0.76 + 66.11 ± 0.58 / 18.31 ± 0.61 + 27.15 ± 1.09 / 44.95 ± 0.78 + 19.18 ± 1.46 / 38.84 ± 1.26 + 12.11.0 + 12.10.4 + 12.10.4 + 12.11.0 + 12.11.0 + 12.10.4 + 12.10.4 + meta-llama/Llama-3.2-3B-Instruct (few-shot) 3213 @@ -1581,6 +1604,29 @@ title: Dutch NLG 🇳🇱 13.0.0 13.0.0 + + RuterNorway/Llama-2-13b-chat-norwegian (few-shot) + unknown + 32 + 4096 + False + 3,254 ± 1,068 / 484 ± 173 + 2.90 + 57.66 ± 1.29 / 43.77 ± 2.78 + 8.41 ± 1.47 / 25.59 ± 1.30 + 16.93 ± 2.60 / 55.72 ± 3.35 + 56.29 ± 1.11 / 68.94 ± 0.81 + 66.22 ± 0.50 / 19.03 ± 0.51 + 25.70 ± 1.05 / 44.15 ± 0.82 + 17.92 ± 1.69 / 37.64 ± 1.43 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + Rijgersberg/GEITje-7B-chat-v2 (few-shot) 7242 @@ -1627,29 +1673,6 @@ title: Dutch NLG 🇳🇱 13.0.0 13.0.0 - - occiglot/occiglot-7b-eu5-instruct (few-shot) - 7242 - 32 - 32768 - False - 2,088 ± 352 / 706 ± 214 - 2.91 - 53.78 ± 1.86 / 41.29 ± 2.07 - 7.78 ± 1.43 / 24.33 ± 1.57 - 16.23 ± 2.49 / 55.09 ± 3.18 - 63.09 ± 1.18 / 73.88 ± 0.72 - 66.46 ± 0.67 / 19.49 ± 0.92 - 28.37 ± 0.99 / 45.81 ± 0.88 - 15.25 ± 1.71 / 35.83 ± 1.42 - 12.5.2 - 12.2.0 - 12.3.1 - 12.4.0 - 12.4.0 - 12.3.1 - 12.3.1 - ibm-granite/granite-8b-code-base-4k (few-shot) 8055 @@ -1673,29 +1696,6 @@ title: Dutch NLG 🇳🇱 13.0.0 13.0.0 - - meta-llama/Llama-2-13b-chat-hf (few-shot) - 13016 - 32 - 4096 - True - 2,849 ± 622 / 723 ± 229 - 2.93 - 57.80 ± 1.53 / 39.43 ± 1.53 - 8.57 ± 1.27 / 29.84 ± 2.35 - 17.40 ± 1.54 / 57.26 ± 1.96 - 56.35 ± 0.85 / 69.69 ± 0.76 - 66.11 ± 0.58 / 18.31 ± 0.61 - 27.15 ± 1.09 / 44.95 ± 0.78 - 19.18 ± 1.46 / 38.84 ± 1.26 - 12.11.0 - 12.10.4 - 12.10.4 - 12.11.0 - 12.11.0 - 12.10.4 - 12.10.4 - claude-3-5-haiku-20241022 (zero-shot, val) unknown @@ -1703,7 +1703,7 @@ title: Dutch NLG 🇳🇱 200000 True 277 ± 77 / 70 ± 25 - 2.94 + 2.93 61.15 ± 3.04 / 43.60 ± 2.00 12.71 ± 2.21 / 30.22 ± 1.53 35.26 ± 2.46 / 59.55 ± 1.69 @@ -1720,27 +1720,27 @@ title: Dutch NLG 🇳🇱 14.0.3 - RuterNorway/Llama-2-13b-chat-norwegian (few-shot) - unknown + occiglot/occiglot-7b-eu5-instruct (few-shot) + 7242 32 - 4096 + 32768 False - 3,254 ± 1,068 / 484 ± 173 - 2.95 - 57.66 ± 1.29 / 43.77 ± 2.78 - 8.41 ± 1.47 / 25.59 ± 1.30 - 16.93 ± 2.60 / 55.72 ± 3.35 - 56.29 ± 1.11 / 68.94 ± 0.81 - 66.22 ± 0.50 / 19.03 ± 0.51 - 25.70 ± 1.05 / 44.15 ± 0.82 - 17.92 ± 1.69 / 37.64 ± 1.43 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 + 2,088 ± 352 / 706 ± 214 + 2.93 + 53.78 ± 1.86 / 41.29 ± 2.07 + 7.78 ± 1.43 / 24.33 ± 1.57 + 16.23 ± 2.49 / 55.09 ± 3.18 + 63.09 ± 1.18 / 73.88 ± 0.72 + 66.46 ± 0.67 / 19.49 ± 0.92 + 28.37 ± 0.99 / 45.81 ± 0.88 + 15.25 ± 1.71 / 35.83 ± 1.42 + 12.5.2 + 12.2.0 + 12.3.1 + 12.4.0 + 12.4.0 + 12.3.1 + 12.3.1 microsoft/Phi-3-mini-128k-instruct (few-shot) @@ -1749,7 +1749,7 @@ title: Dutch NLG 🇳🇱 131072 True 7,312 ± 1,668 / 1,609 ± 525 - 2.95 + 2.94 44.27 ± 2.14 / 34.47 ± 2.48 12.84 ± 1.82 / 36.64 ± 1.13 10.44 ± 1.58 / 48.93 ± 3.41 @@ -1772,7 +1772,7 @@ title: Dutch NLG 🇳🇱 2048 False 4,710 ± 1,040 / 1,188 ± 383 - 2.96 + 2.95 36.50 ± 2.72 / 28.73 ± 1.41 13.70 ± 2.29 / 40.77 ± 1.77 4.81 ± 2.20 / 43.19 ± 1.45 @@ -1795,7 +1795,7 @@ title: Dutch NLG 🇳🇱 8317 False 1,792 ± 249 / 668 ± 203 - 2.97 + 2.96 53.93 ± 2.71 / 47.48 ± 2.09 12.83 ± 2.37 / 34.00 ± 2.26 6.58 ± 3.36 / 48.51 ± 3.35 @@ -1811,52 +1811,6 @@ title: Dutch NLG 🇳🇱 12.10.0 12.10.0 - - BramVanroy/fietje-2b-chat (few-shot) - 2775 - 50 - 2048 - False - 4,704 ± 1,015 / 1,185 ± 375 - 2.99 - 39.57 ± 2.74 / 31.81 ± 1.55 - 13.25 ± 2.12 / 39.92 ± 1.66 - 9.31 ± 1.92 / 50.99 ± 2.59 - 60.26 ± 0.62 / 71.03 ± 0.65 - 65.37 ± 0.53 / 17.63 ± 0.74 - 26.63 ± 1.33 / 44.86 ± 1.00 - 11.62 ± 1.28 / 32.71 ± 1.00 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - - - meta-llama/Llama-2-13b-hf (few-shot) - 13016 - 32 - 4096 - True - 2,898 ± 637 / 736 ± 236 - 3.00 - 52.55 ± 1.64 / 43.32 ± 1.70 - 4.26 ± 2.09 / 28.32 ± 2.68 - 24.57 ± 3.54 / 54.94 ± 5.33 - 60.99 ± 0.95 / 72.74 ± 0.78 - 64.02 ± 0.84 / 16.97 ± 0.98 - 28.73 ± 0.90 / 45.62 ± 0.64 - 19.43 ± 1.53 / 38.57 ± 1.39 - 12.10.5 - 12.10.4 - 12.10.4 - 12.10.5 - 12.10.5 - 12.10.4 - 12.10.4 - NbAiLab/nb-llama-3.1-70B (few-shot) 70554 @@ -1864,7 +1818,7 @@ title: Dutch NLG 🇳🇱 131072 True 1,220 ± 411 / 158 ± 53 - 3.01 + 2.98 69.37 ± 2.25 / 59.14 ± 2.55 14.35 ± 1.64 / 31.18 ± 2.02 29.13 ± 3.95 / 58.71 ± 5.17 @@ -1880,6 +1834,52 @@ title: Dutch NLG 🇳🇱 14.0.4 14.0.4 + + Rijgersberg/GEITje-7B-chat (few-shot) + 7242 + 32 + 32768 + False + 5,920 ± 1,028 / 1,696 ± 550 + 2.98 + 50.69 ± 1.67 / 35.96 ± 2.63 + 8.16 ± 1.68 / 27.37 ± 1.95 + 20.45 ± 2.12 / 59.00 ± 1.21 + 54.48 ± 0.86 / 66.71 ± 0.59 + 66.92 ± 0.50 / 20.19 ± 0.69 + 24.89 ± 1.55 / 42.34 ± 1.14 + 9.84 ± 1.87 / 30.90 ± 1.58 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + + + BramVanroy/fietje-2b-chat (few-shot) + 2775 + 50 + 2048 + False + 4,704 ± 1,015 / 1,185 ± 375 + 2.99 + 39.57 ± 2.74 / 31.81 ± 1.55 + 13.25 ± 2.12 / 39.92 ± 1.66 + 9.31 ± 1.92 / 50.99 ± 2.59 + 60.26 ± 0.62 / 71.03 ± 0.65 + 65.37 ± 0.53 / 17.63 ± 0.74 + 26.63 ± 1.33 / 44.86 ± 1.00 + 11.62 ± 1.28 / 32.71 ± 1.00 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + Qwen/Qwen1.5-4B-Chat (few-shot) 3950 @@ -1887,7 +1887,7 @@ title: Dutch NLG 🇳🇱 32768 False 4,347 ± 893 / 1,135 ± 365 - 3.01 + 3.00 42.52 ± 2.25 / 37.46 ± 3.08 14.68 ± 1.40 / 40.53 ± 1.64 4.07 ± 2.16 / 35.24 ± 1.77 @@ -1903,6 +1903,29 @@ title: Dutch NLG 🇳🇱 12.1.0 12.1.0 + + meta-llama/Llama-2-13b-hf (few-shot) + 13016 + 32 + 4096 + True + 2,898 ± 637 / 736 ± 236 + 3.01 + 52.55 ± 1.64 / 43.32 ± 1.70 + 4.26 ± 2.09 / 28.32 ± 2.68 + 24.57 ± 3.54 / 54.94 ± 5.33 + 60.99 ± 0.95 / 72.74 ± 0.78 + 64.02 ± 0.84 / 16.97 ± 0.98 + 28.73 ± 0.90 / 45.62 ± 0.64 + 19.43 ± 1.53 / 38.57 ± 1.39 + 12.10.5 + 12.10.4 + 12.10.4 + 12.10.5 + 12.10.5 + 12.10.4 + 12.10.4 + Rijgersberg/GEITje-7B (few-shot) 7242 @@ -1933,7 +1956,7 @@ title: Dutch NLG 🇳🇱 8192 True 3,161 ± 676 / 1,247 ± 481 - 3.02 + 3.03 60.11 ± 1.96 / 37.17 ± 1.69 11.12 ± 1.15 / 29.33 ± 1.27 32.68 ± 1.80 / 64.15 ± 1.14 @@ -1949,29 +1972,6 @@ title: Dutch NLG 🇳🇱 14.1.1 14.1.1 - - Rijgersberg/GEITje-7B-chat (few-shot) - 7242 - 32 - 32768 - False - 5,920 ± 1,028 / 1,696 ± 550 - 3.03 - 50.69 ± 1.67 / 35.96 ± 2.63 - 8.16 ± 1.68 / 27.37 ± 1.95 - 20.45 ± 2.12 / 59.00 ± 1.21 - 54.48 ± 0.86 / 66.71 ± 0.59 - 66.92 ± 0.50 / 20.19 ± 0.69 - 24.89 ± 1.55 / 42.34 ± 1.14 - 9.84 ± 1.87 / 30.90 ± 1.58 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - ibm-granite/granite-3.0-3b-a800m-instruct (few-shot) 3374 @@ -1979,7 +1979,7 @@ title: Dutch NLG 🇳🇱 4096 True 10,246 ± 3,021 / 1,629 ± 550 - 3.03 + 3.04 49.25 ± 2.57 / 36.48 ± 2.14 9.45 ± 1.76 / 39.66 ± 1.14 11.87 ± 2.68 / 47.32 ± 3.85 @@ -2002,7 +2002,7 @@ title: Dutch NLG 🇳🇱 4096 False 2,643 ± 455 / 800 ± 247 - 3.03 + 3.04 50.23 ± 2.34 / 37.12 ± 3.30 10.07 ± 1.84 / 35.66 ± 2.24 14.73 ± 1.62 / 54.59 ± 2.24 @@ -2019,27 +2019,27 @@ title: Dutch NLG 🇳🇱 9.3.1 - mistralai/Mistral-7B-Instruct-v0.1 (few-shot) + occiglot/occiglot-7b-eu5 (few-shot) 7242 32 32768 - False - 634 ± 179 / 110 ± 35 - 3.05 - 52.72 ± 2.58 / 33.51 ± 1.22 - 7.91 ± 2.16 / 27.82 ± 1.97 - 18.14 ± 2.10 / 55.42 ± 3.05 - 52.75 ± 0.88 / 67.15 ± 1.08 - 64.77 ± 0.97 / 16.55 ± 0.81 - 26.06 ± 0.77 / 44.08 ± 0.51 - 14.26 ± 1.48 / 35.14 ± 1.18 - 9.3.1 - 9.3.1 - 9.3.1 - 12.4.0 - 12.4.0 - 9.3.1 - 9.3.1 + True + 2,219 ± 427 / 717 ± 224 + 3.04 + 51.31 ± 2.32 / 42.95 ± 2.58 + 7.41 ± 1.24 / 26.93 ± 1.56 + 13.04 ± 1.93 / 53.54 ± 2.70 + 59.28 ± 1.15 / 69.67 ± 0.95 + 64.66 ± 0.74 / 18.22 ± 0.85 + 27.12 ± 0.86 / 44.36 ± 0.75 + 13.99 ± 2.04 / 34.45 ± 1.89 + 12.5.2 + 12.1.0 + 12.1.0 + 12.1.0 + 12.1.0 + 12.2.0 + 12.2.0 ibm-granite/granite-8b-code-instruct-4k (few-shot) @@ -2048,7 +2048,7 @@ title: Dutch NLG 🇳🇱 4096 True 5,617 ± 995 / 1,623 ± 540 - 3.06 + 3.05 60.72 ± 2.14 / 45.52 ± 2.46 12.38 ± 1.62 / 29.91 ± 1.91 10.96 ± 1.47 / 47.97 ± 3.45 @@ -2065,27 +2065,27 @@ title: Dutch NLG 🇳🇱 13.0.0 - occiglot/occiglot-7b-eu5 (few-shot) + mistralai/Mistral-7B-Instruct-v0.1 (few-shot) 7242 32 32768 - True - 2,219 ± 427 / 717 ± 224 - 3.06 - 51.31 ± 2.32 / 42.95 ± 2.58 - 7.41 ± 1.24 / 26.93 ± 1.56 - 13.04 ± 1.93 / 53.54 ± 2.70 - 59.28 ± 1.15 / 69.67 ± 0.95 - 64.66 ± 0.74 / 18.22 ± 0.85 - 27.12 ± 0.86 / 44.36 ± 0.75 - 13.99 ± 2.04 / 34.45 ± 1.89 - 12.5.2 - 12.1.0 - 12.1.0 - 12.1.0 - 12.1.0 - 12.2.0 - 12.2.0 + False + 634 ± 179 / 110 ± 35 + 3.07 + 52.72 ± 2.58 / 33.51 ± 1.22 + 7.91 ± 2.16 / 27.82 ± 1.97 + 18.14 ± 2.10 / 55.42 ± 3.05 + 52.75 ± 0.88 / 67.15 ± 1.08 + 64.77 ± 0.97 / 16.55 ± 0.81 + 26.06 ± 0.77 / 44.08 ± 0.51 + 14.26 ± 1.48 / 35.14 ± 1.18 + 9.3.1 + 9.3.1 + 9.3.1 + 12.4.0 + 12.4.0 + 9.3.1 + 9.3.1 BramVanroy/fietje-2b (few-shot) @@ -2117,7 +2117,7 @@ title: Dutch NLG 🇳🇱 4224 True 2,867 ± 550 / 793 ± 253 - 3.12 + 3.13 51.18 ± 1.62 / 35.45 ± 1.88 9.23 ± 2.84 / 19.38 ± 4.37 1.99 ± 2.56 / 34.69 ± 1.59 @@ -2140,7 +2140,7 @@ title: Dutch NLG 🇳🇱 8192 False 5,979 ± 1,044 / 1,724 ± 559 - 3.15 + 3.13 39.41 ± 2.93 / 30.59 ± 1.59 7.00 ± 3.04 / 35.01 ± 3.72 16.10 ± 2.34 / 52.05 ± 3.60 @@ -2163,7 +2163,7 @@ title: Dutch NLG 🇳🇱 4096 True 10,504 ± 3,028 / 1,678 ± 559 - 3.15 + 3.16 42.52 ± 3.31 / 33.08 ± 2.70 9.91 ± 1.71 / 35.24 ± 2.62 0.69 ± 2.82 / 36.10 ± 2.58 @@ -2186,7 +2186,7 @@ title: Dutch NLG 🇳🇱 32768 True 5,054 ± 1,200 / 1,056 ± 339 - 3.20 + 3.16 54.56 ± 2.96 / 37.86 ± 2.49 8.43 ± 1.27 / 24.23 ± 0.94 10.99 ± 2.55 / 50.46 ± 4.17 @@ -2209,7 +2209,7 @@ title: Dutch NLG 🇳🇱 131200 True 3,713 ± 877 / 836 ± 267 - 3.21 + 3.24 47.40 ± 3.29 / 33.11 ± 2.04 7.90 ± 1.98 / 30.71 ± 1.89 3.10 ± 1.93 / 34.24 ± 0.73 @@ -2232,7 +2232,7 @@ title: Dutch NLG 🇳🇱 4096 True 930 ± 310 / 128 ± 43 - 3.28 + 3.26 40.49 ± 4.32 / 30.86 ± 2.27 7.10 ± 1.85 / 27.42 ± 1.76 18.66 ± 2.39 / 55.25 ± 3.77 @@ -2255,7 +2255,7 @@ title: Dutch NLG 🇳🇱 32768 True 3,248 ± 739 / 761 ± 252 - 3.30 + 3.29 35.74 ± 3.22 / 31.74 ± 2.24 12.55 ± 1.39 / 39.80 ± 1.38 0.23 ± 0.44 / 33.35 ± 0.31 @@ -2294,6 +2294,29 @@ title: Dutch NLG 🇳🇱 13.0.0 13.0.0 + + AI-Sweden-Models/gpt-sw3-20b (few-shot) + 20918 + 64 + 2048 + True + 1,875 ± 673 / 261 ± 91 + 3.43 + 35.30 ± 3.76 / 33.68 ± 1.80 + 15.67 ± 2.21 / 31.30 ± 4.51 + 1.76 ± 2.37 / 47.60 ± 1.68 + 45.05 ± 1.68 / 55.38 ± 1.66 + 59.15 ± 1.54 / 14.60 ± 0.72 + 6.24 ± 1.54 / 29.02 ± 1.30 + 0.47 ± 1.20 / 24.89 ± 0.59 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + allenai/OLMo-1.7-7B-hf (few-shot) 6888 @@ -2301,7 +2324,7 @@ title: Dutch NLG 🇳🇱 4096 True 3,371 ± 876 / 561 ± 184 - 3.42 + 3.43 46.95 ± 2.32 / 36.13 ± 1.88 4.34 ± 2.10 / 19.37 ± 2.08 3.46 ± 1.91 / 41.32 ± 3.08 @@ -2318,43 +2341,20 @@ title: Dutch NLG 🇳🇱 12.10.4 - AI-Sweden-Models/gpt-sw3-20b (few-shot) - 20918 - 64 + ibm-granite/granite-3b-code-instruct-2k (few-shot) + 3483 + 49 2048 True - 1,875 ± 673 / 261 ± 91 + 9,059 ± 1,947 / 2,201 ± 728 3.43 - 35.30 ± 3.76 / 33.68 ± 1.80 - 15.67 ± 2.21 / 31.30 ± 4.51 - 1.76 ± 2.37 / 47.60 ± 1.68 - 45.05 ± 1.68 / 55.38 ± 1.66 - 59.15 ± 1.54 / 14.60 ± 0.72 - 6.24 ± 1.54 / 29.02 ± 1.30 - 0.47 ± 1.20 / 24.89 ± 0.59 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - - - google/gemma-2-2b (few-shot) - 2614 - 256 - 8320 - True - 5,235 ± 1,226 / 1,154 ± 366 - 3.47 - 22.63 ± 4.98 / 22.71 ± 2.86 - 8.11 ± 1.55 / 28.07 ± 1.80 - 8.04 ± 1.79 / 48.95 ± 2.97 - 52.39 ± 2.14 / 65.22 ± 1.11 - 61.90 ± 1.04 / 17.09 ± 0.93 - 22.76 ± 0.76 / 41.67 ± 0.80 - 8.50 ± 1.32 / 30.16 ± 1.11 + 48.53 ± 3.89 / 38.20 ± 2.92 + 10.15 ± 1.55 / 22.01 ± 1.44 + 4.88 ± 2.27 / 38.78 ± 3.56 + 45.38 ± 0.93 / 56.09 ± 1.05 + 59.56 ± 1.09 / 15.20 ± 0.56 + 7.38 ± 1.06 / 29.60 ± 0.71 + 2.69 ± 0.56 / 25.85 ± 0.33 13.0.0 13.0.0 13.0.0 @@ -2370,7 +2370,7 @@ title: Dutch NLG 🇳🇱 4096 False 1,254 ± 328 / 243 ± 83 - 3.47 + 3.46 39.24 ± 2.43 / 24.25 ± 1.34 4.25 ± 1.28 / 18.15 ± 3.08 11.48 ± 1.02 / 52.86 ± 2.43 @@ -2409,29 +2409,6 @@ title: Dutch NLG 🇳🇱 13.0.0 13.0.0 - - ibm-granite/granite-3b-code-instruct-2k (few-shot) - 3483 - 49 - 2048 - True - 9,059 ± 1,947 / 2,201 ± 728 - 3.48 - 48.53 ± 3.89 / 38.20 ± 2.92 - 10.15 ± 1.55 / 22.01 ± 1.44 - 4.88 ± 2.27 / 38.78 ± 3.56 - 45.38 ± 0.93 / 56.09 ± 1.05 - 59.56 ± 1.09 / 15.20 ± 0.56 - 7.38 ± 1.06 / 29.60 ± 0.71 - 2.69 ± 0.56 / 25.85 ± 0.33 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot) 20918 @@ -2456,20 +2433,20 @@ title: Dutch NLG 🇳🇱 9.3.1 - meta-llama/Llama-3.2-1B-Instruct (few-shot) - 1236 - 128 - 131200 - False - 7,436 ± 1,846 / 1,508 ± 479 - 3.53 - 42.01 ± 2.06 / 37.16 ± 1.98 - 9.15 ± 1.70 / 32.55 ± 2.69 - 1.11 ± 2.15 / 36.71 ± 3.89 - 40.04 ± 1.61 / 53.75 ± 1.10 - 58.72 ± 1.05 / 12.47 ± 0.62 - 17.71 ± 0.69 / 38.01 ± 0.61 - 6.98 ± 1.11 / 28.05 ± 1.00 + MaLA-LM/emma-500-llama2-7b (few-shot) + 6738 + 32 + 4096 + True + 6,275 ± 1,193 / 1,755 ± 578 + 3.50 + 36.61 ± 3.37 / 31.91 ± 2.20 + 8.77 ± 1.80 / 25.31 ± 1.42 + 3.52 ± 2.07 / 35.34 ± 1.61 + 59.51 ± 0.97 / 70.33 ± 0.64 + 54.50 ± 5.78 / 15.01 ± 1.59 + 14.90 ± 1.44 / 35.20 ± 1.09 + 7.26 ± 1.56 / 29.91 ± 1.18 13.0.0 13.0.0 13.0.0 @@ -2479,20 +2456,20 @@ title: Dutch NLG 🇳🇱 13.0.0 - MaLA-LM/emma-500-llama2-7b (few-shot) - 6738 - 32 - 4096 + google/gemma-2-2b (few-shot) + 2614 + 256 + 8320 True - 6,275 ± 1,193 / 1,755 ± 578 - 3.55 - 36.61 ± 3.37 / 31.91 ± 2.20 - 8.77 ± 1.80 / 25.31 ± 1.42 - 3.52 ± 2.07 / 35.34 ± 1.61 - 59.51 ± 0.97 / 70.33 ± 0.64 - 54.50 ± 5.78 / 15.01 ± 1.59 - 14.90 ± 1.44 / 35.20 ± 1.09 - 7.26 ± 1.56 / 29.91 ± 1.18 + 5,235 ± 1,226 / 1,154 ± 366 + 3.50 + 22.63 ± 4.98 / 22.71 ± 2.86 + 8.11 ± 1.55 / 28.07 ± 1.80 + 8.04 ± 1.79 / 48.95 ± 2.97 + 52.39 ± 2.14 / 65.22 ± 1.11 + 61.90 ± 1.04 / 17.09 ± 0.93 + 22.76 ± 0.76 / 41.67 ± 0.80 + 8.50 ± 1.32 / 30.16 ± 1.11 13.0.0 13.0.0 13.0.0 @@ -2508,7 +2485,7 @@ title: Dutch NLG 🇳🇱 4096 True 3,136 ± 558 / 942 ± 290 - 3.55 + 3.53 33.73 ± 2.02 / 30.41 ± 1.57 7.45 ± 1.77 / 22.28 ± 1.35 3.78 ± 2.04 / 50.30 ± 1.51 @@ -2524,6 +2501,29 @@ title: Dutch NLG 🇳🇱 13.2.0 13.2.0 + + meta-llama/Llama-3.2-1B-Instruct (few-shot) + 1236 + 128 + 131200 + False + 7,436 ± 1,846 / 1,508 ± 479 + 3.54 + 42.01 ± 2.06 / 37.16 ± 1.98 + 9.15 ± 1.70 / 32.55 ± 2.69 + 1.11 ± 2.15 / 36.71 ± 3.89 + 40.04 ± 1.61 / 53.75 ± 1.10 + 58.72 ± 1.05 / 12.47 ± 0.62 + 17.71 ± 0.69 / 38.01 ± 0.61 + 6.98 ± 1.11 / 28.05 ± 1.00 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + ibm-granite/granite-7b-base (few-shot) 6738 @@ -2531,7 +2531,7 @@ title: Dutch NLG 🇳🇱 2048 True 4,405 ± 1,098 / 1,032 ± 345 - 3.59 + 3.57 37.39 ± 3.37 / 32.77 ± 1.97 7.51 ± 1.57 / 19.22 ± 1.72 3.11 ± 0.88 / 50.54 ± 0.90 @@ -2577,7 +2577,7 @@ title: Dutch NLG 🇳🇱 4096 True 1,438 ± 410 / 233 ± 79 - 3.63 + 3.62 42.35 ± 2.49 / 29.29 ± 1.66 0.78 ± 0.93 / 8.63 ± 0.29 -0.02 ± 1.29 / 38.46 ± 1.55 @@ -2623,7 +2623,7 @@ title: Dutch NLG 🇳🇱 4096 True 15,009 ± 4,072 / 2,702 ± 878 - 3.66 + 3.64 32.45 ± 2.17 / 30.83 ± 2.31 7.03 ± 2.08 / 34.16 ± 1.83 5.58 ± 1.32 / 44.79 ± 3.28 @@ -2662,29 +2662,6 @@ title: Dutch NLG 🇳🇱 12.1.0 12.1.0 - - google/gemma-2b (few-shot) - 2506 - 256 - 8192 - True - 6,087 ± 1,046 / 1,902 ± 563 - 3.73 - 16.90 ± 4.91 / 17.38 ± 4.30 - 9.95 ± 0.78 / 27.94 ± 1.43 - 0.41 ± 1.03 / 33.54 ± 0.32 - 49.15 ± 1.55 / 59.16 ± 1.44 - 58.61 ± 2.22 / 13.67 ± 1.15 - 10.94 ± 0.50 / 31.87 ± 0.76 - 3.29 ± 0.95 / 26.85 ± 0.49 - 12.5.2 - 12.1.0 - 12.1.0 - 12.1.0 - 12.1.0 - 12.1.0 - 12.1.0 - stabilityai/stablelm-2-1_6b (few-shot) 1645 @@ -2692,7 +2669,7 @@ title: Dutch NLG 🇳🇱 4096 True 7,259 ± 2,120 / 1,240 ± 432 - 3.77 + 3.70 36.58 ± 3.88 / 33.82 ± 2.87 6.32 ± 1.30 / 24.04 ± 1.14 4.01 ± 2.01 / 36.03 ± 1.61 @@ -2709,25 +2686,25 @@ title: Dutch NLG 🇳🇱 12.10.8 - Qwen/Qwen1.5-1.8B-Chat (few-shot) - 1837 - 152 - 32768 - False - 8,304 ± 1,846 / 1,933 ± 617 - 3.81 - 23.44 ± 5.09 / 25.00 ± 2.33 - 6.82 ± 1.82 / 30.97 ± 2.65 - 4.11 ± 1.73 / 43.70 ± 3.47 - 33.16 ± 1.61 / 46.66 ± 1.27 - 60.91 ± 0.99 / 12.65 ± 0.41 - 12.11 ± 0.90 / 33.62 ± 0.61 - 6.41 ± 1.13 / 29.73 ± 0.82 + google/gemma-2b (few-shot) + 2506 + 256 + 8192 + True + 6,087 ± 1,046 / 1,902 ± 563 + 3.74 + 16.90 ± 4.91 / 17.38 ± 4.30 + 9.95 ± 0.78 / 27.94 ± 1.43 + 0.41 ± 1.03 / 33.54 ± 0.32 + 49.15 ± 1.55 / 59.16 ± 1.44 + 58.61 ± 2.22 / 13.67 ± 1.15 + 10.94 ± 0.50 / 31.87 ± 0.76 + 3.29 ± 0.95 / 26.85 ± 0.49 12.5.2 - 11.0.0 + 12.1.0 12.1.0 - 12.5.0 - 12.5.0 + 12.1.0 + 12.1.0 12.1.0 12.1.0 @@ -2738,7 +2715,7 @@ title: Dutch NLG 🇳🇱 4096 True 3,035 ± 503 / 911 ± 300 - 3.84 + 3.79 24.15 ± 5.73 / 26.49 ± 4.13 8.31 ± 1.56 / 20.06 ± 1.06 1.60 ± 1.71 / 41.51 ± 3.60 @@ -2754,6 +2731,29 @@ title: Dutch NLG 🇳🇱 12.10.4 12.10.4 + + Qwen/Qwen1.5-1.8B-Chat (few-shot) + 1837 + 152 + 32768 + False + 8,304 ± 1,846 / 1,933 ± 617 + 3.79 + 23.44 ± 5.09 / 25.00 ± 2.33 + 6.82 ± 1.82 / 30.97 ± 2.65 + 4.11 ± 1.73 / 43.70 ± 3.47 + 33.16 ± 1.61 / 46.66 ± 1.27 + 60.91 ± 0.99 / 12.65 ± 0.41 + 12.11 ± 0.90 / 33.62 ± 0.61 + 6.41 ± 1.13 / 29.73 ± 0.82 + 12.5.2 + 11.0.0 + 12.1.0 + 12.5.0 + 12.5.0 + 12.1.0 + 12.1.0 + Tweeties/tweety-7b-dutch-v24a (few-shot) 7391 @@ -2761,7 +2761,7 @@ title: Dutch NLG 🇳🇱 1024 True 2,971 ± 423 / 1,351 ± 410 - 3.84 + 3.83 35.83 ± 3.06 / 29.15 ± 2.80 12.47 ± 2.51 / 40.41 ± 2.26 16.81 ± 3.31 / 53.38 ± 5.08 @@ -2778,20 +2778,20 @@ title: Dutch NLG 🇳🇱 12.6.1 - HuggingFaceTB/SmolLM2-1.7B (few-shot) + HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot) 1711 49 8192 True - 16,249 ± 3,690 / 3,689 ± 1,226 + 15,971 ± 3,654 / 3,609 ± 1,197 3.86 - 22.84 ± 5.42 / 25.11 ± 3.52 - 4.60 ± 2.12 / 29.94 ± 1.50 - 2.55 ± 1.41 / 40.88 ± 3.15 - 40.33 ± 1.19 / 48.35 ± 1.31 - 58.31 ± 1.50 / 13.08 ± 0.51 - 14.32 ± 0.71 / 35.65 ± 0.50 - 3.87 ± 1.02 / 27.14 ± 0.66 + 31.84 ± 3.39 / 28.66 ± 1.77 + 1.56 ± 3.25 / 28.78 ± 2.60 + 5.05 ± 1.34 / 43.99 ± 4.14 + 40.55 ± 0.77 / 48.56 ± 0.95 + 60.35 ± 1.16 / 13.63 ± 0.63 + 13.39 ± 0.96 / 34.55 ± 0.65 + 4.57 ± 0.96 / 27.42 ± 0.73 13.1.0 13.1.0 13.1.0 @@ -2801,20 +2801,20 @@ title: Dutch NLG 🇳🇱 13.1.0 - HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot) + HuggingFaceTB/SmolLM2-1.7B (few-shot) 1711 49 8192 True - 15,971 ± 3,654 / 3,609 ± 1,197 - 3.87 - 31.84 ± 3.39 / 28.66 ± 1.77 - 1.56 ± 3.25 / 28.78 ± 2.60 - 5.05 ± 1.34 / 43.99 ± 4.14 - 40.55 ± 0.77 / 48.56 ± 0.95 - 60.35 ± 1.16 / 13.63 ± 0.63 - 13.39 ± 0.96 / 34.55 ± 0.65 - 4.57 ± 0.96 / 27.42 ± 0.73 + 16,249 ± 3,690 / 3,689 ± 1,226 + 3.86 + 22.84 ± 5.42 / 25.11 ± 3.52 + 4.60 ± 2.12 / 29.94 ± 1.50 + 2.55 ± 1.41 / 40.88 ± 3.15 + 40.33 ± 1.19 / 48.35 ± 1.31 + 58.31 ± 1.50 / 13.08 ± 0.51 + 14.32 ± 0.71 / 35.65 ± 0.50 + 3.87 ± 1.02 / 27.14 ± 0.66 13.1.0 13.1.0 13.1.0 @@ -2823,29 +2823,6 @@ title: Dutch NLG 🇳🇱 13.1.0 13.1.0 - - PleIAs/Pleias-3b-Preview (few-shot) - 3212 - 66 - 4096 - True - 6,513 ± 1,241 / 1,282 ± 644 - 3.95 - 31.13 ± 3.71 / 29.34 ± 2.26 - 7.24 ± 2.08 / 29.45 ± 4.63 - 1.23 ± 1.73 / 44.71 ± 3.28 - 32.13 ± 0.85 / 39.28 ± 0.76 - 56.85 ± 0.66 / 11.52 ± 0.17 - 1.79 ± 1.11 / 26.12 ± 0.72 - -0.63 ± 1.13 / 23.93 ± 0.71 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - Qwen/Qwen1.5-0.5B-Chat (few-shot) 620 @@ -2853,7 +2830,7 @@ title: Dutch NLG 🇳🇱 32768 False 11,740 ± 3,000 / 2,209 ± 721 - 3.95 + 3.90 18.66 ± 4.43 / 17.56 ± 4.28 8.59 ± 3.20 / 29.65 ± 5.10 0.34 ± 2.02 / 43.92 ± 3.15 @@ -2876,7 +2853,7 @@ title: Dutch NLG 🇳🇱 4224 True 840 ± 79 / 400 ± 124 - 3.97 + 3.92 36.74 ± 3.36 / 32.36 ± 1.39 8.57 ± 2.44 / 34.17 ± 2.59 3.01 ± 1.94 / 46.03 ± 4.19 @@ -2892,6 +2869,29 @@ title: Dutch NLG 🇳🇱 12.5.2 12.5.2 + + PleIAs/Pleias-3b-Preview (few-shot) + 3212 + 66 + 4096 + True + 6,513 ± 1,241 / 1,282 ± 644 + 3.93 + 31.13 ± 3.71 / 29.34 ± 2.26 + 7.24 ± 2.08 / 29.45 ± 4.63 + 1.23 ± 1.73 / 44.71 ± 3.28 + 32.13 ± 0.85 / 39.28 ± 0.76 + 56.85 ± 0.66 / 11.52 ± 0.17 + 1.79 ± 1.11 / 26.12 ± 0.72 + -0.63 ± 1.13 / 23.93 ± 0.71 + 14.0.4 + 14.1.2 + 14.1.2 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + ibm-granite/granite-3.0-1b-a400m-base (few-shot) 1385 @@ -2899,7 +2899,7 @@ title: Dutch NLG 🇳🇱 4096 True 7,808 ± 2,183 / 1,289 ± 428 - 3.97 + 3.98 12.76 ± 7.37 / 14.65 ± 5.86 9.35 ± 1.70 / 31.57 ± 6.23 0.69 ± 1.52 / 44.03 ± 3.52 @@ -2922,7 +2922,7 @@ title: Dutch NLG 🇳🇱 32768 True 5,666 ± 1,328 / 1,256 ± 408 - 4.02 + 4.03 11.66 ± 6.46 / 15.15 ± 4.38 5.20 ± 1.78 / 35.43 ± 2.14 2.89 ± 1.91 / 41.36 ± 4.63 @@ -2938,29 +2938,6 @@ title: Dutch NLG 🇳🇱 12.1.0 12.1.0 - - allenai/OLMo-7B (few-shot) - 6888 - 50 - 2176 - True - 5,403 ± 1,133 / 1,294 ± 423 - 4.04 - 37.37 ± 2.22 / 30.45 ± 2.45 - 9.55 ± 1.82 / 23.90 ± 1.53 - 0.05 ± 1.35 / 35.78 ± 2.30 - 34.81 ± 1.54 / 46.37 ± 1.51 - 45.22 ± 0.39 / 8.61 ± 0.17 - 3.92 ± 1.04 / 27.22 ± 0.63 - 0.16 ± 0.55 / 25.29 ± 0.61 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - Qwen/Qwen1.5-0.5B (few-shot) 620 @@ -2984,6 +2961,29 @@ title: Dutch NLG 🇳🇱 12.1.0 12.1.0 + + allenai/OLMo-7B (few-shot) + 6888 + 50 + 2176 + True + 5,403 ± 1,133 / 1,294 ± 423 + 4.05 + 37.37 ± 2.22 / 30.45 ± 2.45 + 9.55 ± 1.82 / 23.90 ± 1.53 + 0.05 ± 1.35 / 35.78 ± 2.30 + 34.81 ± 1.54 / 46.37 ± 1.51 + 45.22 ± 0.39 / 8.61 ± 0.17 + 3.92 ± 1.04 / 27.22 ± 0.63 + 0.16 ± 0.55 / 25.29 ± 0.61 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + meta-llama/Llama-3.2-1B (few-shot) 1236 @@ -3014,7 +3014,7 @@ title: Dutch NLG 🇳🇱 2048 True 2,519 ± 841 / 323 ± 104 - 4.08 + 4.10 23.58 ± 4.08 / 26.01 ± 4.43 7.90 ± 3.56 / 33.04 ± 3.49 1.79 ± 1.38 / 40.53 ± 3.24 @@ -3037,7 +3037,7 @@ title: Dutch NLG 🇳🇱 2048 True 10,756 ± 3,589 / 1,157 ± 670 - 4.11 + 4.12 38.22 ± 3.45 / 35.62 ± 3.90 4.99 ± 3.86 / 29.17 ± 2.70 1.85 ± 1.45 / 40.34 ± 3.41 @@ -3053,6 +3053,29 @@ title: Dutch NLG 🇳🇱 14.0.4 14.0.4 + + tiiuae/Falcon3-1B-Instruct (few-shot) + 1669 + 131 + 8192 + True + 9,270 ± 2,690 / 1,434 ± 437 + 4.15 + 28.25 ± 3.03 / 25.24 ± 2.38 + 3.73 ± 1.83 / 15.20 ± 2.26 + 0.76 ± 1.10 / 33.57 ± 0.34 + 19.08 ± 2.27 / 28.16 ± 2.64 + 57.20 ± 1.38 / 10.23 ± 0.61 + 5.45 ± 0.96 / 28.87 ± 0.86 + 2.34 ± 1.16 / 26.19 ± 0.80 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + NorwAI/NorwAI-Llama2-7B (few-shot) 7033 @@ -3060,7 +3083,7 @@ title: Dutch NLG 🇳🇱 4096 True 4,438 ± 1,128 / 1,028 ± 346 - 4.15 + 4.16 22.50 ± 2.27 / 24.09 ± 2.40 6.04 ± 1.51 / 18.08 ± 2.09 -0.61 ± 1.30 / 46.51 ± 2.55 @@ -3083,7 +3106,7 @@ title: Dutch NLG 🇳🇱 8192 True 22,023 ± 6,203 / 3,675 ± 1,231 - 4.22 + 4.20 20.95 ± 2.02 / 25.63 ± 1.96 6.84 ± 1.76 / 27.74 ± 5.49 -1.50 ± 1.30 / 34.07 ± 0.45 @@ -3106,7 +3129,7 @@ title: Dutch NLG 🇳🇱 8192 True 21,777 ± 6,115 / 3,617 ± 1,211 - 4.32 + 4.30 15.68 ± 5.54 / 22.21 ± 5.42 6.73 ± 2.20 / 27.67 ± 4.00 0.63 ± 1.05 / 43.48 ± 2.98 @@ -3129,7 +3152,7 @@ title: Dutch NLG 🇳🇱 32896 True 2,722 ± 495 / 766 ± 250 - 4.35 + 4.34 22.95 ± 1.99 / 23.05 ± 1.55 2.40 ± 1.70 / 22.89 ± 4.81 3.12 ± 1.51 / 45.58 ± 4.56 @@ -3198,7 +3221,7 @@ title: Dutch NLG 🇳🇱 2176 True 8,536 ± 1,926 / 1,940 ± 619 - 4.57 + 4.58 22.58 ± 5.05 / 26.82 ± 3.69 4.92 ± 2.71 / 19.51 ± 4.22 -1.27 ± 1.85 / 41.38 ± 3.59 @@ -3221,7 +3244,7 @@ title: Dutch NLG 🇳🇱 2048 True 10,242 ± 3,432 / 1,335 ± 484 - 4.63 + 4.62 24.47 ± 2.03 / 26.64 ± 2.70 3.57 ± 2.03 / 16.42 ± 3.18 -2.03 ± 1.35 / 39.46 ± 4.09 @@ -3244,7 +3267,7 @@ title: Dutch NLG 🇳🇱 8192 True 25,602 ± 7,583 / 3,953 ± 1,325 - 4.65 + 4.64 15.82 ± 3.13 / 16.46 ± 2.61 -0.62 ± 1.55 / 16.18 ± 1.88 1.16 ± 1.38 / 34.30 ± 1.27 @@ -3267,7 +3290,7 @@ title: Dutch NLG 🇳🇱 512 True 5,847 ± 1,029 / 1,640 ± 525 - 4.66 + 4.65 0.00 ± 0.00 / 0.00 ± 0.00 0.95 ± 1.17 / 9.87 ± 0.86 0.00 ± 0.00 / 33.34 ± 0.31 @@ -3290,7 +3313,7 @@ title: Dutch NLG 🇳🇱 8192 True 26,346 ± 7,812 / 4,082 ± 1,372 - 4.67 + 4.66 17.49 ± 2.94 / 18.59 ± 2.66 2.01 ± 1.88 / 15.88 ± 1.32 -0.02 ± 0.15 / 34.86 ± 2.12 @@ -3313,7 +3336,7 @@ title: Dutch NLG 🇳🇱 4096 True 3,024 ± 496 / 909 ± 301 - 4.95 + 4.94 3.80 ± 1.23 / 4.24 ± 1.19 0.97 ± 1.50 / 13.00 ± 2.52 -0.37 ± 0.55 / 33.40 ± 0.35 @@ -3336,7 +3359,7 @@ title: Dutch NLG 🇳🇱 8192 True 7,692 ± 1,423 / 1,960 ± 644 - 5.02 + 5.01 0.00 ± 0.00 / 0.00 ± 0.00 0.00 ± 0.00 / 8.62 ± 0.30 0.00 ± 0.00 / 33.34 ± 0.31 diff --git a/dutch-nlu.csv b/dutch-nlu.csv index f622bbfe..5e24c4a4 100644 --- a/dutch-nlu.csv +++ b/dutch-nlu.csv @@ -14,166 +14,167 @@ DTAI-KULeuven/robbertje-1-gb-non-shuffled,74,40,512,True,False,21007,2.12,74.5,3 DTAI-KULeuven/robbert-2023-dutch-base,124,50,512,True,False,11230,2.18,82.22,28.2,55.12,9.74 DTAI-KULeuven/robbertje-1-gb-merged,74,40,512,True,False,21027,2.21,72.51,32.26,50.0,5.97 "meta-llama/Meta-Llama-3-70B-Instruct (few-shot, val)",70554,128,8317,True,False,1673,2.21,74.64,18.9,49.54,44.77 +FacebookAI/xlm-roberta-large,559,250,512,True,False,17897,2.24,83.49,8.82,64.8,50.72 CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,2.26,68.58,14.41,55.01,58.63 -meta-llama/Llama-3.1-405B-Instruct-FP8 (few-shot),405869,128,131072,True,False,799,2.26,69.12,11.23,68.74,55.25 google/gemma-2-27b-it (few-shot),27227,256,8320,True,False,1516,2.27,65.2,14.8,59.02,59.6 jhu-clsp/bernice,277,250,128,True,False,5567,2.27,78.74,22.58,55.39,5.95 -FacebookAI/xlm-roberta-large,559,250,512,True,False,17897,2.29,83.49,8.82,64.8,50.72 -Qwen/QwQ-32B-Preview (few-shot),32764,152,32768,True,False,2258,2.3,71.32,9.12,63.96,58.36 +meta-llama/Llama-3.1-405B-Instruct-FP8 (few-shot),405869,128,131072,True,False,799,2.27,69.12,11.23,68.74,55.25 "gpt-4o-2024-05-13 (few-shot, val)",-1,200,128000,True,False,916,2.31,76.75,10.8,56.26,55.55 DTAI-KULeuven/robbertje-1-gb-shuffled,74,40,512,True,False,20616,2.32,73.55,26.02,57.03,6.64 +Qwen/QwQ-32B-Preview (few-shot),32764,152,32768,True,False,2258,2.32,71.32,9.12,63.96,58.36 meta-llama/Llama-3.1-70B-Instruct (few-shot),70554,128,131072,True,False,1409,2.34,68.82,11.41,61.66,55.43 -microsoft/mdeberta-v3-base,278,251,512,True,False,20637,2.37,84.47,5.16,71.23,46.43 +microsoft/mdeberta-v3-base,278,251,512,True,False,20637,2.38,84.47,5.16,71.23,46.43 "gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,2.39,68.71,20.33,49.52,34.06 -google/rembert,575,250,512,True,False,11736,2.42,75.49,4.79,66.47,55.7 "gpt-4o-2024-05-13 (zero-shot, val)",-1,200,8191,True,False,637,2.42,69.12,12.36,58.88,45.88 -Qwen/Qwen2.5-72B-Instruct (few-shot),72706,152,32768,True,False,1219,2.43,67.16,9.84,66.06,50.91 +google/rembert,575,250,512,True,False,11736,2.43,75.49,4.79,66.47,55.7 "gpt-4o-mini-2024-07-18 (zero-shot, val)",-1,200,8191,True,False,908,2.43,64.15,12.67,62.44,45.65 upstage/SOLAR-10.7B-v1.0 (few-shot),10732,32,4096,True,False,3780,2.43,65.37,11.93,41.67,67.75 DTAI-KULeuven/robbert-2023-dutch-large,354,50,512,True,False,5444,2.44,81.05,16.35,65.18,11.44 +"gpt-3.5-turbo-0613 (few-shot, val)",-1,100,4095,True,False,921,2.44,68.96,8.81,58.95,55.57 +Qwen/Qwen2.5-72B-Instruct (few-shot),72706,152,32768,True,False,1219,2.45,67.16,9.84,66.06,50.91 meta-llama/Llama-3.3-70B-Instruct (few-shot),70554,128,131072,True,False,1353,2.45,70.37,10.87,62.87,44.3 +ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot),28411,256,4096,True,False,3633,2.46,68.17,10.56,56.89,53.05 "gpt-4-1106-preview (zero-shot, val)",-1,100,8191,True,False,436,2.46,55.72,11.13,67.28,54.2 "claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,2.47,62.41,12.64,74.06,35.77 -"gpt-3.5-turbo-0613 (few-shot, val)",-1,100,4095,True,False,921,2.48,68.96,8.81,58.95,55.57 cardiffnlp/twitter-xlm-roberta-base,277,250,512,True,False,34475,2.49,77.15,18.78,56.72,14.61 -ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot),28411,256,4096,True,False,3633,2.51,68.17,10.56,56.89,53.05 -"meta-llama/Llama-2-70b-hf (few-shot, val)",68977,32,4221,True,False,1892,2.51,66.5,7.82,49.55,65.26 sentence-transformers/paraphrase-xlm-r-multilingual-v1,277,250,512,True,False,20154,2.52,70.59,21.37,45.86,5.2 -meta-llama/Llama-3.1-8B (few-shot),8030,128,131200,True,False,2986,2.53,64.79,11.95,32.97,63.89 -google/gemma-2-9b (few-shot),9242,256,8320,True,False,2038,2.57,57.13,17.43,31.39,59.33 -google/gemma-2-9b-it (few-shot),9242,256,8320,True,False,2062,2.58,52.62,11.78,59.23,55.78 -nvidia/Llama-3.1-Nemotron-70B-Instruct-HF (few-shot),70554,128,131072,True,False,1208,2.58,55.08,10.8,61.31,49.8 -mistralai/Mistral-Nemo-Instruct-2407 (few-shot),12248,131,1024128,True,False,7095,2.59,66.57,10.1,40.31,59.99 +"meta-llama/Llama-2-70b-hf (few-shot, val)",68977,32,4221,True,False,1892,2.54,66.5,7.82,49.55,65.26 +google/gemma-2-9b (few-shot),9242,256,8320,True,False,2038,2.56,57.13,17.43,31.39,59.33 +meta-llama/Llama-3.1-8B (few-shot),8030,128,131200,True,False,2986,2.56,64.79,11.95,32.97,63.89 +google/gemma-2-9b-it (few-shot),9242,256,8320,True,False,2062,2.59,52.62,11.78,59.23,55.78 +nvidia/Llama-3.1-Nemotron-70B-Instruct-HF (few-shot),70554,128,131072,True,False,1208,2.59,55.08,10.8,61.31,49.8 +mistralai/Mistral-Nemo-Instruct-2407 (few-shot),12248,131,1024128,True,False,7095,2.61,66.57,10.1,40.31,59.99 DTAI-KULeuven/robbertje-1-gb-bort,45,40,512,True,False,31087,2.62,66.74,24.93,37.19,5.23 -mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,2.62,66.51,11.91,34.46,59.23 -meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1007,2.65,68.72,14.67,32.91,45.36 +meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1477,2.67,62.26,10.45,30.3,62.99 +"meta-llama/Llama-2-70b-chat-hf (few-shot, val)",68977,32,4221,True,False,1979,2.68,64.0,13.3,30.88,54.14 +meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1483,2.68,68.72,14.67,32.91,45.36 mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,2.68,64.81,12.99,39.38,49.08 -"meta-llama/Llama-2-70b-chat-hf (few-shot, val)",68977,32,4221,True,False,1979,2.69,64.0,13.3,30.88,54.14 -CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,2.7,62.07,13.7,35.14,49.15 +mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,2.69,63.3,11.82,32.2,59.45 Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,2.7,62.86,15.11,39.11,36.48 mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot),46703,32,32768,True,False,5535,2.7,58.8,12.5,45.22,47.03 -meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1335,2.72,62.26,10.45,30.3,62.99 nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,2.72,66.29,12.71,31.39,48.33 senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.74,64.25,13.66,28.59,49.64 utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.75,43.06,11.95,40.85,63.42 robinsmits/Qwen1.5-7B-Dutch-Chat (few-shot),7719,152,32768,False,False,4686,2.77,57.81,14.62,25.34,56.81 yhavinga/Boreas-7B-chat (few-shot),7242,32,32768,False,False,2913,2.77,60.22,11.97,30.94,52.19 -CohereForAI/aya-23-8B (few-shot),8028,256,8192,False,False,2707,2.79,60.81,7.9,31.12,63.0 robinsmits/Qwen1.5-7B-Dutch-Chat-Sft-Bf16 (few-shot),7719,152,32768,False,False,2413,2.79,56.83,14.79,23.58,55.9 -meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131200,True,False,1005,2.82,69.76,9.09,37.58,41.26 +CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,2.8,53.02,13.68,29.97,53.4 +skole-gpt-mixtral (few-shot),-1,32,32768,False,False,3583,2.81,62.16,8.92,32.76,56.87 +CohereForAI/aya-23-8B (few-shot),8028,256,8192,False,False,2707,2.82,60.81,7.9,31.12,63.0 ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,2.83,53.62,13.37,23.47,61.2 sentence-transformers/quora-distilbert-multilingual,135,120,512,True,False,26458,2.83,67.89,23.25,21.36,4.5 -skole-gpt-mixtral (few-shot),-1,32,32768,False,False,3583,2.83,62.16,8.92,32.76,56.87 +"claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,2.84,61.15,12.71,35.26,41.27 +ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4096,True,False,2515,2.84,52.32,8.46,42.42,53.12 "mlabonne/NeuralBeagle14-7B (few-shot, val)",7242,32,8192,False,True,2549,2.84,63.53,11.25,27.76,50.94 -"claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,2.86,61.15,12.71,35.26,41.27 -CohereForAI/c4ai-command-r-v01 (few-shot),34981,256,8192,False,False,1919,2.87,61.21,9.03,38.15,44.91 +ReBatch/Reynaerde-7B-Instruct (few-shot),7248,33,32768,False,False,2562,2.85,59.16,10.39,19.5,60.96 ibm-granite/granite-8b-code-base-4k (few-shot),8055,49,4096,True,False,2313,2.87,63.29,13.81,8.16,56.64 -ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4096,True,False,2515,2.88,52.26,8.46,42.42,53.11 -mistralai/Mistral-7B-v0.1 (few-shot),7242,32,32768,True,False,1446,2.89,58.15,7.94,25.41,62.56 +CohereForAI/c4ai-command-r-v01 (few-shot),34981,256,8192,False,False,1919,2.89,61.21,9.03,38.15,44.91 ReBatch/Llama-3-8B-dutch (few-shot),8030,128,8317,False,False,3800,2.9,60.14,11.07,15.67,59.93 ReBatch/Reynaerde-7B-Chat (few-shot),7248,33,32768,False,False,2554,2.9,56.22,11.22,20.04,61.15 -ReBatch/Reynaerde-7B-Instruct (few-shot),7248,33,32768,False,False,2562,2.9,59.16,10.39,19.5,60.96 -Rijgersberg/Mistral-7B-v0.1-chat-nl (few-shot),7242,32,32768,False,False,5907,2.9,56.73,11.08,19.41,58.91 microsoft/xlm-align-base,277,250,512,True,False,14744,2.9,78.85,11.8,14.56,42.08 +Rijgersberg/Mistral-7B-v0.1-chat-nl (few-shot),7242,32,32768,False,False,5907,2.91,56.73,11.08,19.41,58.91 +mistralai/Mistral-7B-v0.1 (few-shot),7242,32,32768,True,False,1446,2.92,58.15,7.94,25.41,62.56 "mlabonne/AlphaMonarch-7B (few-shot, val)",7242,32,8192,False,True,5340,2.92,64.71,11.14,25.22,46.34 ibm-granite/granite-3.0-2b-instruct (few-shot),2634,49,4224,True,False,10194,2.95,52.52,13.85,17.72,53.5 -sentence-transformers/stsb-xlm-r-multilingual,278,250,512,True,False,15040,2.96,66.85,20.56,35.56,5.04 -mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot),341029,256,4096,True,False,1904,2.97,47.6,10.62,61.64,24.02 +sentence-transformers/stsb-xlm-r-multilingual,278,250,512,True,False,15040,2.95,66.85,20.56,35.56,5.04 mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,2.97,55.56,12.37,21.5,50.77 -alpindale/Mistral-7B-v0.2-hf (few-shot),7242,32,32768,True,False,1841,3.0,56.76,7.11,23.55,61.89 +alpindale/Mistral-7B-v0.2-hf (few-shot),7242,32,32768,True,False,1841,2.98,56.76,7.11,23.55,61.89 +mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot),341029,256,4096,True,False,1904,2.98,47.6,10.62,61.64,24.02 ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,3.0,60.72,12.38,10.96,51.2 +mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,3.0,56.52,7.02,23.41,61.9 NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,3.01,62.81,11.28,28.57,38.75 -mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,3.02,56.52,7.02,23.41,61.9 -ibm-granite/granite-3.0-2b-base (few-shot),2534,49,4224,True,False,10187,3.03,47.28,12.12,12.74,60.36 -microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,3194,3.05,50.31,12.58,14.72,56.19 -occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,3.06,53.78,7.78,16.23,63.09 -Geotrend/distilbert-base-25lang-cased,108,85,512,True,False,26099,3.07,75.02,7.45,45.28,20.18 +RuterNorway/Llama-2-13b-chat-norwegian (few-shot),-1,32,4096,False,False,3254,3.04,57.66,8.41,16.93,56.29 +ibm-granite/granite-3.0-2b-base (few-shot),2534,49,4224,True,False,10187,3.04,47.28,12.12,12.74,60.36 +meta-llama/Llama-2-13b-chat-hf (few-shot),13016,32,4096,True,False,2849,3.04,57.8,8.57,17.4,56.35 +meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1473,3.04,61.68,8.97,36.57,33.88 +Geotrend/distilbert-base-25lang-cased,108,85,512,True,False,26099,3.05,75.02,7.45,45.28,20.18 +microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,8681,3.05,50.31,12.58,14.72,56.19 +google/gemma-7b (few-shot),8538,256,8192,True,False,1378,3.07,47.75,7.68,28.28,61.49 NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,3.08,62.76,13.83,24.44,26.17 Rijgersberg/GEITje-7B-chat-v2 (few-shot),7242,32,32768,False,False,5908,3.08,42.12,11.06,19.71,59.19 -RuterNorway/Llama-2-13b-chat-norwegian (few-shot),-1,32,4096,False,False,3254,3.09,57.66,8.41,16.93,56.29 google/gemma-7b-it (few-shot),8538,256,8317,False,False,1792,3.09,53.93,12.83,6.58,53.45 -google/gemma-7b (few-shot),8538,256,8192,True,False,1378,3.09,47.75,7.68,28.28,61.49 -meta-llama/Llama-2-13b-chat-hf (few-shot),13016,32,4096,True,False,2849,3.09,57.8,8.57,17.4,56.35 -meta-llama/Llama-2-13b-hf (few-shot),13016,32,4096,True,False,2898,3.1,52.55,4.26,24.57,60.99 +occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,3.09,53.78,7.78,16.23,63.09 +Rijgersberg/GEITje-7B-chat (few-shot),7242,32,32768,False,False,5920,3.12,50.69,8.16,20.45,54.48 +meta-llama/Llama-2-13b-hf (few-shot),13016,32,4096,True,False,2898,3.12,52.55,4.26,24.57,60.99 BramVanroy/GEITje-7B-ultra (few-shot),7242,32,8192,False,False,2475,3.13,42.2,12.78,18.23,53.41 +timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,3.14,54.56,8.43,10.99,55.91 Rijgersberg/GEITje-7B (few-shot),7242,32,32768,True,False,5887,3.15,47.53,4.36,30.67,56.55 microsoft/Phi-3-mini-128k-instruct (few-shot),3821,32,131072,True,False,7312,3.15,44.27,12.84,10.44,56.4 BramVanroy/fietje-2b-chat (few-shot),2775,50,2048,False,False,4704,3.16,39.57,13.25,9.31,60.26 -Rijgersberg/GEITje-7B-chat (few-shot),7242,32,32768,False,False,5920,3.16,50.69,8.16,20.45,54.48 EuropeanParliament/EUBERT,93,66,512,True,False,20070,3.18,49.54,14.86,27.9,20.65 -mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32768,False,False,634,3.18,52.72,7.91,18.14,52.75 +occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,3.18,51.31,7.41,13.04,59.28 TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot),7800,100,4096,True,False,6197,3.19,48.24,11.37,10.73,54.83 -Twitter/twhin-bert-base,278,250,512,True,False,11514,3.19,74.03,9.53,39.12,7.71 -meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4096,False,False,2643,3.19,50.23,10.07,14.73,53.42 -timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,3.19,54.56,8.43,10.99,55.91 -BramVanroy/fietje-2b (few-shot),2780,51,2048,True,False,4804,3.2,33.92,13.39,6.75,58.57 +Twitter/twhin-bert-large,560,250,512,True,False,9707,3.19,77.35,6.55,18.25,28.37 Qwen/Qwen1.5-4B-Chat (few-shot),3950,152,32768,False,False,4347,3.2,42.52,14.68,4.07,55.18 +Twitter/twhin-bert-base,278,250,512,True,False,11514,3.2,74.03,9.53,39.12,7.71 meta-llama/Llama-3.2-3B-Instruct (few-shot),3213,128,131200,False,False,10424,3.2,43.66,12.87,17.94,47.77 -occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,3.2,51.31,7.41,13.04,59.28 -Twitter/twhin-bert-large,560,250,512,True,False,9707,3.21,77.35,6.55,18.25,28.37 -ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,3.21,49.25,9.45,11.87,54.2 +BramVanroy/fietje-2b (few-shot),2780,51,2048,True,False,4804,3.21,33.92,13.39,6.75,58.57 +meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4096,False,False,2643,3.21,50.23,10.07,14.73,53.42 +mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32768,False,False,634,3.21,52.72,7.91,18.14,52.75 BramVanroy/fietje-2b-instruct (few-shot),2775,50,2048,False,False,4710,3.22,36.5,13.7,4.81,60.63 +ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,3.22,49.25,9.45,11.87,54.2 NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,3.24,69.37,14.35,29.13,0.34 google/gemma-2-2b-it (few-shot),2614,256,8320,True,False,5374,3.25,40.58,11.17,19.63,49.3 neuralmagic/Sparse-Llama-3.1-8B-2of4 (few-shot),8030,128,131072,True,False,2996,3.25,34.21,11.94,6.18,60.44 ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,3.26,50.88,12.39,3.31,48.44 -01-ai/Yi-1.5-6B (few-shot),6061,64,4224,True,False,2867,3.3,51.18,9.23,1.99,54.66 -meta-llama/Llama-2-7b-hf (few-shot),6738,32,4096,True,False,930,3.31,40.49,7.1,18.66,59.92 +meta-llama/Llama-2-7b-hf (few-shot),6738,32,4096,True,False,930,3.29,40.49,7.1,18.66,59.92 +01-ai/Yi-1.5-6B (few-shot),6061,64,4224,True,False,2867,3.32,51.18,9.23,1.99,54.66 AI-Sweden-Models/gpt-sw3-20b (few-shot),20918,64,2048,True,False,1875,3.33,35.3,15.67,1.76,45.05 +ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,3.35,48.53,10.15,4.88,45.38 sentence-transformers/distiluse-base-multilingual-cased-v1,135,120,512,True,False,34042,3.36,58.67,17.82,9.27,2.17 -meta-llama/Llama-3.2-3B (few-shot),3213,128,131200,True,False,3713,3.37,47.4,7.9,3.1,56.53 AI-Sweden-Models/roberta-large-1350k,354,50,512,True,False,5744,3.38,73.03,3.65,2.0,42.85 -ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,3.38,42.52,9.91,0.69,56.95 -BramVanroy/GEITje-7B-ultra-sft (few-shot),7242,32,8192,False,False,5979,3.4,39.41,7.0,16.1,53.02 -ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,3.4,48.53,10.15,4.88,45.38 +BramVanroy/GEITje-7B-ultra-sft (few-shot),7242,32,8192,False,False,5979,3.38,39.41,7.0,16.1,53.02 +ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,3.4,42.52,9.91,0.69,56.95 +meta-llama/Llama-3.2-3B (few-shot),3213,128,131200,True,False,3713,3.4,47.4,7.9,3.1,56.53 AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot),20918,64,2048,True,False,1831,3.42,24.44,18.4,4.85,39.83 +MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,3.42,36.61,8.77,3.52,59.51 AI-Sweden-Models/roberta-large-1160k,354,50,512,True,False,14014,3.44,70.92,3.5,2.06,41.4 Qwen/Qwen1.5-4B (few-shot),3950,152,32768,True,False,3248,3.44,35.74,12.55,0.23,51.3 -MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,3.46,36.61,8.77,3.52,59.51 -allenai/OLMo-1.7-7B-hf (few-shot),6888,50,4096,True,False,3371,3.47,46.95,4.34,3.46,57.07 +allenai/OLMo-1.7-7B-hf (few-shot),6888,50,4096,True,False,3371,3.48,46.95,4.34,3.46,57.07 nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,3.49,60.11,11.12,32.68,0.0 -google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,3.53,38.85,11.25,-2.27,45.95 sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking,135,120,512,True,False,33753,3.53,65.04,17.4,-0.95,3.94 -meta-llama/Llama-3.2-1B-Instruct (few-shot),1236,128,131200,False,False,7436,3.58,42.01,9.15,1.11,40.04 +google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,3.54,38.85,11.25,-2.27,45.95 +stabilityai/stablelm-2-1_6b (few-shot),1645,100,4096,True,False,7259,3.57,36.58,6.32,4.01,52.81 openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,3.59,39.24,4.25,11.48,54.18 -dbmdz/bert-base-historic-multilingual-cased,111,32,512,True,False,20047,3.61,56.69,9.29,3.02,22.14 -ibm-granite/granite-7b-base (few-shot),6738,32,2048,True,False,4405,3.61,37.39,7.51,3.11,49.6 -RuterNorway/Llama-2-7b-chat-norwegian (few-shot),-1,32,4096,False,False,10890,3.63,35.49,11.36,2.52,37.49 -stabilityai/stablelm-2-1_6b (few-shot),1645,100,4096,True,False,7259,3.64,36.58,6.32,4.01,52.81 -sentence-transformers/distiluse-base-multilingual-cased,135,120,512,True,False,19206,3.65,56.98,9.66,19.37,3.11 -utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,3.66,32.45,7.03,5.58,51.18 -google/gemma-2-2b (few-shot),2614,256,8320,True,False,5235,3.67,22.63,8.11,8.04,52.39 +ibm-granite/granite-7b-base (few-shot),6738,32,2048,True,False,4405,3.6,37.39,7.51,3.11,49.6 +meta-llama/Llama-3.2-1B-Instruct (few-shot),1236,128,131200,False,False,7436,3.6,42.01,9.15,1.11,40.04 +dbmdz/bert-base-historic-multilingual-cased,111,32,512,True,False,20047,3.63,56.69,9.29,3.02,22.14 +RuterNorway/Llama-2-7b-chat-norwegian (few-shot),-1,32,4096,False,False,10890,3.64,35.49,11.36,2.52,37.49 +utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,3.64,32.45,7.03,5.58,51.18 ibm-granite/granite-3.0-1b-a400m-instruct (few-shot),1335,49,4096,True,False,7964,3.67,30.6,13.26,1.04,39.69 -allenai/OLMo-7B (few-shot),6888,50,2176,True,False,5403,3.74,37.37,9.55,0.05,34.81 -ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,3.75,33.73,7.45,3.78,43.6 -jpostma/DagoBERT,116,40,512,True,False,11241,3.76,42.28,8.01,31.21,3.65 -LumiOpen/Viking-13B (few-shot),14030,131,4224,True,False,840,3.81,36.74,8.57,3.01,32.32 -google/gemma-2b (few-shot),2506,256,8192,True,False,6087,3.82,16.9,9.95,0.41,49.15 +sentence-transformers/distiluse-base-multilingual-cased,135,120,512,True,False,19206,3.67,56.98,9.66,19.37,3.11 +google/gemma-2-2b (few-shot),2614,256,8320,True,False,5235,3.7,22.63,8.11,8.04,52.39 +ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,3.74,33.73,7.45,3.78,43.6 +allenai/OLMo-7B (few-shot),6888,50,2176,True,False,5403,3.76,37.37,9.55,0.05,34.81 +LumiOpen/Viking-13B (few-shot),14030,131,4224,True,False,840,3.77,36.74,8.57,3.01,32.32 +jpostma/DagoBERT,116,40,512,True,False,11241,3.79,42.28,8.01,31.21,3.65 openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.83,42.35,0.78,-0.02,47.61 +google/gemma-2b (few-shot),2506,256,8192,True,False,6087,3.84,16.9,9.95,0.41,49.15 +NorwAI/NorwAI-Mistral-7B (few-shot),7537,68,4096,True,False,3035,3.87,24.15,8.31,1.6,37.08 Tweeties/tweety-7b-dutch-v24a (few-shot),7391,50,1024,True,False,2971,3.88,35.83,12.47,16.81,0.0 -NorwAI/NorwAI-Mistral-7B (few-shot),7537,68,4096,True,False,3035,3.91,24.15,8.31,1.6,37.08 -PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.91,31.13,7.24,1.23,32.13 -PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,3.93,38.22,4.99,1.85,27.77 -meta-llama/Llama-3.2-1B (few-shot),1236,128,131200,True,False,7577,3.95,22.03,4.25,1.46,41.76 -HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,3.97,22.84,4.6,2.55,40.33 +PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.89,31.13,7.24,1.23,32.13 +PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,3.94,38.22,4.99,1.85,27.77 +meta-llama/Llama-3.2-1B (few-shot),1236,128,131200,True,False,7577,3.96,22.03,4.25,1.46,41.76 +HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,3.98,22.84,4.6,2.55,40.33 +Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,3.98,23.44,6.82,4.11,33.16 HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,4.0,31.84,1.56,5.05,40.55 -Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,4.0,23.44,6.82,4.11,33.16 -ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,4.02,12.76,9.35,0.69,37.71 -PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,4.04,23.58,7.9,1.79,26.11 -3ebdola/Dialectal-Arabic-XLM-R-Base,277,250,512,True,False,12783,4.08,44.46,8.39,2.07,4.3 -dbmdz/bert-tiny-historic-multilingual-cased,5,32,512,True,False,78027,4.08,41.38,8.45,1.55,4.4 -Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,4.1,18.66,8.59,0.34,26.74 +3ebdola/Dialectal-Arabic-XLM-R-Base,277,250,512,True,False,12783,4.03,44.46,8.39,2.07,4.3 +dbmdz/bert-tiny-historic-multilingual-cased,5,32,512,True,False,78027,4.03,41.38,8.45,1.55,4.4 +ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,4.04,12.76,9.35,0.69,37.71 +Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,4.06,18.66,8.59,0.34,26.74 +PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,4.07,23.58,7.9,1.79,26.11 allenai/OLMo-7B-Twin-2T (few-shot),6888,50,2176,True,False,5484,4.14,18.7,3.7,2.19,38.08 sentence-transformers/distiluse-base-multilingual-cased-v2,135,120,512,True,False,33247,4.15,49.82,2.7,6.6,2.13 -Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,4.17,28.3,4.54,-0.42,20.81 -NorwAI/NorwAI-Llama2-7B (few-shot),7033,68,4096,True,False,4438,4.18,22.5,6.04,-0.61,26.96 -Qwen/Qwen1.5-1.8B (few-shot),1837,152,32768,True,False,5666,4.19,11.66,5.2,2.89,34.6 -HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.24,20.95,6.84,-1.5,22.67 +Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,4.18,28.3,4.54,-0.42,20.81 +NorwAI/NorwAI-Llama2-7B (few-shot),7033,68,4096,True,False,4438,4.19,22.5,6.04,-0.61,26.96 +Qwen/Qwen1.5-1.8B (few-shot),1837,152,32768,True,False,5666,4.2,11.66,5.2,2.89,34.6 +HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.22,20.95,6.84,-1.5,22.67 sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2,118,250,512,True,False,29201,4.26,59.61,0.0,-0.04,3.28 -HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.29,15.68,6.73,0.63,19.73 +tiiuae/Falcon3-1B-Instruct (few-shot),1669,131,8192,True,False,9270,4.26,28.25,3.73,0.76,19.08 +HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.28,15.68,6.73,0.63,19.73 state-spaces/mamba-2.8b-hf (few-shot),2768,50,32896,True,False,2722,4.29,22.95,2.4,3.12,22.4 -PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.43,21.32,4.37,-0.19,9.38 -allenai/OLMo-1B (few-shot),1177,50,2176,True,False,8536,4.45,22.58,4.92,-1.27,6.64 +PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.44,21.32,4.37,-0.19,9.38 +allenai/OLMo-1B (few-shot),1177,50,2176,True,False,8536,4.46,22.58,4.92,-1.27,6.64 PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.5,24.47,3.57,-2.03,10.18 HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.76,17.49,2.01,-0.02,0.53 fresh-xlm-roberta-base,277,250,512,True,False,2214,4.79,13.09,0.92,1.93,0.26 diff --git a/dutch-nlu.md b/dutch-nlu.md index de3893be..37b899eb 100644 --- a/dutch-nlu.md +++ b/dutch-nlu.md @@ -3,7 +3,7 @@ layout: leaderboard title: Dutch NLU 🇳🇱 --- -
Last updated: 10/01/2025 12:30:35 CET
+
Last updated: 11/01/2025 11:03:45 CET
@@ -289,6 +289,23 @@ title: Dutch NLU 🇳🇱 12.7.0 12.7.0 + + FacebookAI/xlm-roberta-large + 559 + 250 + 512 + True + 17,897 ± 3,921 / 3,463 ± 1,141 + 2.24 + 83.49 ± 1.51 / 86.12 ± 1.21 + 8.82 ± 7.93 / 30.82 ± 4.71 + 64.80 ± 8.79 / 80.93 ± 6.29 + 50.72 ± 1.20 / 61.66 ± 1.16 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + CohereForAI/c4ai-command-r-08-2024 (few-shot) 32296 @@ -306,23 +323,6 @@ title: Dutch NLU 🇳🇱 14.0.4 14.0.4 - - meta-llama/Llama-3.1-405B-Instruct-FP8 (few-shot) - 405869 - 128 - 131072 - True - 799 ± 246 / 112 ± 38 - 2.26 - 69.12 ± 2.03 / 64.39 ± 2.33 - 11.23 ± 1.27 / 20.82 ± 0.81 - 68.74 ± 0.69 / 83.97 ± 0.59 - 55.25 ± 3.26 / 73.34 ± 1.13 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - google/gemma-2-27b-it (few-shot) 27227 @@ -358,34 +358,17 @@ title: Dutch NLU 🇳🇱 0.0.0 - FacebookAI/xlm-roberta-large - 559 - 250 - 512 - True - 17,897 ± 3,921 / 3,463 ± 1,141 - 2.29 - 83.49 ± 1.51 / 86.12 ± 1.21 - 8.82 ± 7.93 / 30.82 ± 4.71 - 64.80 ± 8.79 / 80.93 ± 6.29 - 50.72 ± 1.20 / 61.66 ± 1.16 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - - - Qwen/QwQ-32B-Preview (few-shot) - 32764 - 152 - 32768 + meta-llama/Llama-3.1-405B-Instruct-FP8 (few-shot) + 405869 + 128 + 131072 True - 2,258 ± 1,221 / 198 ± 67 - 2.30 - 71.32 ± 1.36 / 57.50 ± 3.60 - 9.12 ± 1.28 / 21.29 ± 0.75 - 63.96 ± 1.27 / 81.82 ± 0.72 - 58.36 ± 1.16 / 73.62 ± 0.47 + 799 ± 246 / 112 ± 38 + 2.27 + 69.12 ± 2.03 / 64.39 ± 2.33 + 11.23 ± 1.27 / 20.82 ± 0.81 + 68.74 ± 0.69 / 83.97 ± 0.59 + 55.25 ± 3.26 / 73.34 ± 1.13 14.0.4 14.0.4 14.0.4 @@ -425,6 +408,23 @@ title: Dutch NLU 🇳🇱 0.0.0 0.0.0 + + Qwen/QwQ-32B-Preview (few-shot) + 32764 + 152 + 32768 + True + 2,258 ± 1,221 / 198 ± 67 + 2.32 + 71.32 ± 1.36 / 57.50 ± 3.60 + 9.12 ± 1.28 / 21.29 ± 0.75 + 63.96 ± 1.27 / 81.82 ± 0.72 + 58.36 ± 1.16 / 73.62 ± 0.47 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + meta-llama/Llama-3.1-70B-Instruct (few-shot) 70554 @@ -449,7 +449,7 @@ title: Dutch NLU 🇳🇱 512 True 20,637 ± 3,925 / 4,497 ± 1,502 - 2.37 + 2.38 84.47 ± 1.84 / 87.98 ± 1.21 5.16 ± 5.21 / 27.85 ± 3.29 71.23 ± 1.62 / 85.45 ± 0.83 @@ -476,23 +476,6 @@ title: Dutch NLU 🇳🇱 14.0.0 14.0.0 - - google/rembert - 575 - 250 - 512 - True - 11,736 ± 2,822 / 2,102 ± 677 - 2.42 - 75.49 ± 1.75 / 81.37 ± 1.31 - 4.79 ± 3.93 / 27.49 ± 2.37 - 66.47 ± 2.04 / 83.16 ± 1.01 - 55.70 ± 1.62 / 68.38 ± 1.47 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - gpt-4o-2024-05-13 (zero-shot, val) unknown @@ -511,21 +494,21 @@ title: Dutch NLU 🇳🇱 14.0.3 - Qwen/Qwen2.5-72B-Instruct (few-shot) - 72706 - 152 - 32768 + google/rembert + 575 + 250 + 512 True - 1,219 ± 412 / 158 ± 53 + 11,736 ± 2,822 / 2,102 ± 677 2.43 - 67.16 ± 1.61 / 44.06 ± 2.06 - 9.84 ± 1.35 / 19.81 ± 0.75 - 66.06 ± 1.20 / 82.87 ± 0.67 - 50.91 ± 2.59 / 71.45 ± 0.89 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 + 75.49 ± 1.75 / 81.37 ± 1.31 + 4.79 ± 3.93 / 27.49 ± 2.37 + 66.47 ± 2.04 / 83.16 ± 1.01 + 55.70 ± 1.62 / 68.38 ± 1.47 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 gpt-4o-mini-2024-07-18 (zero-shot, val) @@ -578,6 +561,40 @@ title: Dutch NLU 🇳🇱 0.0.0 0.0.0 + + gpt-3.5-turbo-0613 (few-shot, val) + unknown + 100 + 4095 + True + 921 ± 293 / 113 ± 37 + 2.44 + 68.96 ± 3.80 / 58.45 ± 3.71 + 8.81 ± 3.30 / 30.88 ± 2.25 + 58.95 ± 4.48 / 78.64 ± 2.32 + 55.57 ± 2.33 / 68.26 ± 1.85 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + + + Qwen/Qwen2.5-72B-Instruct (few-shot) + 72706 + 152 + 32768 + True + 1,219 ± 412 / 158 ± 53 + 2.45 + 67.16 ± 1.61 / 44.06 ± 2.06 + 9.84 ± 1.35 / 19.81 ± 0.75 + 66.06 ± 1.20 / 82.87 ± 0.67 + 50.91 ± 2.59 / 71.45 ± 0.89 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + meta-llama/Llama-3.3-70B-Instruct (few-shot) 70554 @@ -595,6 +612,23 @@ title: Dutch NLU 🇳🇱 14.0.3 14.0.3 + + ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot) + 28411 + 256 + 4096 + True + 3,633 ± 1,236 / 777 ± 220 + 2.46 + 68.17 ± 1.75 / 51.61 ± 2.73 + 10.56 ± 1.04 / 19.29 ± 0.63 + 56.89 ± 0.82 / 78.31 ± 0.43 + 53.05 ± 2.18 / 71.82 ± 0.84 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + gpt-4-1106-preview (zero-shot, val) unknown @@ -629,23 +663,6 @@ title: Dutch NLU 🇳🇱 14.0.3 14.0.3 - - gpt-3.5-turbo-0613 (few-shot, val) - unknown - 100 - 4095 - True - 921 ± 293 / 113 ± 37 - 2.48 - 68.96 ± 3.80 / 58.45 ± 3.71 - 8.81 ± 3.30 / 30.88 ± 2.25 - 58.95 ± 4.48 / 78.64 ± 2.32 - 55.57 ± 2.33 / 68.26 ± 1.85 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - cardiffnlp/twitter-xlm-roberta-base 277 @@ -664,21 +681,21 @@ title: Dutch NLU 🇳🇱 0.0.0 - ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot) - 28411 - 256 - 4096 + sentence-transformers/paraphrase-xlm-r-multilingual-v1 + 277 + 250 + 512 True - 3,633 ± 1,236 / 777 ± 220 - 2.51 - 68.17 ± 1.75 / 51.61 ± 2.73 - 10.56 ± 1.04 / 19.29 ± 0.63 - 56.89 ± 0.82 / 78.31 ± 0.43 - 53.05 ± 2.18 / 71.82 ± 0.84 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 + 20,154 ± 4,438 / 3,890 ± 1,256 + 2.52 + 70.59 ± 1.60 / 78.25 ± 1.22 + 21.37 ± 8.79 / 40.62 ± 7.64 + 45.86 ± 2.06 / 71.32 ± 1.40 + 5.20 ± 0.30 / 10.40 ± 0.38 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 meta-llama/Llama-2-70b-hf (few-shot, val) @@ -687,7 +704,7 @@ title: Dutch NLU 🇳🇱 4221 True 1,892 ± 650 / 318 ± 105 - 2.51 + 2.54 66.50 ± 3.72 / 57.66 ± 3.78 7.82 ± 4.30 / 34.91 ± 2.53 49.55 ± 4.95 / 73.43 ± 3.38 @@ -698,21 +715,21 @@ title: Dutch NLU 🇳🇱 12.7.0 - sentence-transformers/paraphrase-xlm-r-multilingual-v1 - 277 - 250 - 512 + google/gemma-2-9b (few-shot) + 9242 + 256 + 8320 True - 20,154 ± 4,438 / 3,890 ± 1,256 - 2.52 - 70.59 ± 1.60 / 78.25 ± 1.22 - 21.37 ± 8.79 / 40.62 ± 7.64 - 45.86 ± 2.06 / 71.32 ± 1.40 - 5.20 ± 0.30 / 10.40 ± 0.38 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 + 2,038 ± 406 / 566 ± 172 + 2.56 + 57.13 ± 2.73 / 36.21 ± 1.71 + 17.43 ± 2.17 / 40.83 ± 1.50 + 31.39 ± 5.53 / 56.70 ± 5.97 + 59.33 ± 1.35 / 73.56 ± 0.52 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 meta-llama/Llama-3.1-8B (few-shot) @@ -721,7 +738,7 @@ title: Dutch NLU 🇳🇱 131200 True 2,986 ± 823 / 276 ± 94 - 2.53 + 2.56 64.79 ± 1.96 / 45.48 ± 2.24 11.95 ± 2.83 / 37.12 ± 2.19 32.97 ± 2.68 / 58.52 ± 2.92 @@ -732,30 +749,13 @@ title: Dutch NLU 🇳🇱 13.0.0 - google/gemma-2-9b (few-shot) - 9242 - 256 - 8320 - True - 2,038 ± 406 / 566 ± 172 - 2.57 - 57.13 ± 2.73 / 36.21 ± 1.71 - 17.43 ± 2.17 / 40.83 ± 1.50 - 31.39 ± 5.53 / 56.70 ± 5.97 - 59.33 ± 1.35 / 73.56 ± 0.52 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - - - google/gemma-2-9b-it (few-shot) + google/gemma-2-9b-it (few-shot) 9242 256 8320 True 2,062 ± 397 / 589 ± 178 - 2.58 + 2.59 52.62 ± 2.15 / 39.41 ± 1.72 11.78 ± 1.31 / 32.80 ± 0.87 59.23 ± 1.58 / 79.42 ± 0.88 @@ -772,7 +772,7 @@ title: Dutch NLU 🇳🇱 131072 True 1,208 ± 412 / 156 ± 53 - 2.58 + 2.59 55.08 ± 2.19 / 30.07 ± 1.17 10.80 ± 1.40 / 20.34 ± 0.88 61.31 ± 1.10 / 80.58 ± 0.56 @@ -789,7 +789,7 @@ title: Dutch NLU 🇳🇱 1024128 True 7,095 ± 2,193 / 1,063 ± 344 - 2.59 + 2.61 66.57 ± 1.86 / 48.40 ± 2.67 10.10 ± 1.55 / 33.62 ± 2.04 40.31 ± 2.25 / 69.53 ± 1.51 @@ -817,21 +817,38 @@ title: Dutch NLU 🇳🇱 0.0.0 - mistralai/Ministral-8B-Instruct-2410 (few-shot) - 8020 - 131 - 32768 + meta-llama/Meta-Llama-3-8B (few-shot) + 8030 + 128 + 8192 True - 1,302 ± 323 / 253 ± 86 - 2.62 - 66.51 ± 1.38 / 52.40 ± 2.62 - 11.91 ± 1.03 / 34.21 ± 1.08 - 34.46 ± 2.79 / 65.61 ± 2.58 - 59.23 ± 1.16 / 72.56 ± 0.80 - 13.0.0 - 14.0.4 - 13.0.0 - 13.0.0 + 1,477 ± 376 / 285 ± 97 + 2.67 + 62.26 ± 2.20 / 42.41 ± 2.02 + 10.45 ± 2.69 / 33.45 ± 1.99 + 30.30 ± 3.94 / 62.28 ± 2.89 + 62.99 ± 1.00 / 73.73 ± 0.98 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + + + meta-llama/Llama-2-70b-chat-hf (few-shot, val) + 68977 + 32 + 4221 + True + 1,979 ± 621 / 320 ± 105 + 2.68 + 64.00 ± 3.52 / 48.94 ± 3.83 + 13.30 ± 3.75 / 30.50 ± 2.48 + 30.88 ± 4.62 / 59.62 ± 4.50 + 54.14 ± 1.55 / 70.96 ± 1.01 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 meta-llama/Meta-Llama-3-8B-Instruct (few-shot) @@ -839,8 +856,8 @@ title: Dutch NLU 🇳🇱 128 8192 True - 1,007 ± 316 / 162 ± 45 - 2.65 + 1,483 ± 377 / 287 ± 97 + 2.68 68.72 ± 1.81 / 54.89 ± 2.10 14.67 ± 2.51 / 41.36 ± 2.04 32.91 ± 2.56 / 64.93 ± 1.97 @@ -868,38 +885,21 @@ title: Dutch NLU 🇳🇱 14.0.4 - meta-llama/Llama-2-70b-chat-hf (few-shot, val) - 68977 - 32 - 4221 + mistralai/Ministral-8B-Instruct-2410 (few-shot) + 8020 + 131 + 32768 True - 1,979 ± 621 / 320 ± 105 + 1,302 ± 323 / 253 ± 86 2.69 - 64.00 ± 3.52 / 48.94 ± 3.83 - 13.30 ± 3.75 / 30.50 ± 2.48 - 30.88 ± 4.62 / 59.62 ± 4.50 - 54.14 ± 1.55 / 70.96 ± 1.01 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - - - CohereForAI/aya-expanse-8b (few-shot) - 8028 - 256 - 8192 - False - 2,686 ± 685 / 491 ± 164 - 2.70 - 62.07 ± 1.67 / 37.68 ± 1.28 - 13.70 ± 1.36 / 34.90 ± 0.68 - 35.14 ± 2.33 / 66.66 ± 1.50 - 49.15 ± 1.48 / 68.82 ± 0.68 - 13.0.0 - 14.0.4 - 13.0.0 - 13.0.0 + 63.30 ± 2.36 / 39.20 ± 2.16 + 11.82 ± 1.07 / 34.18 ± 1.11 + 32.20 ± 0.77 / 65.67 ± 0.69 + 59.45 ± 0.89 / 71.13 ± 0.60 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 Nexusflow/Starling-LM-7B-beta (few-shot) @@ -935,23 +935,6 @@ title: Dutch NLU 🇳🇱 14.0.4 14.0.4 - - meta-llama/Meta-Llama-3-8B (few-shot) - 8030 - 128 - 8192 - True - 1,335 ± 338 / 260 ± 88 - 2.72 - 62.26 ± 2.20 / 42.41 ± 2.02 - 10.45 ± 2.69 / 33.45 ± 1.99 - 30.30 ± 3.94 / 62.28 ± 2.89 - 62.99 ± 1.00 / 73.73 ± 0.98 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - nvidia/mistral-nemo-minitron-8b-base (few-shot) 8414 @@ -1037,23 +1020,6 @@ title: Dutch NLU 🇳🇱 12.6.1 12.6.1 - - CohereForAI/aya-23-8B (few-shot) - 8028 - 256 - 8192 - False - 2,707 ± 688 / 497 ± 166 - 2.79 - 60.81 ± 1.94 / 46.59 ± 3.32 - 7.90 ± 1.63 / 24.82 ± 0.95 - 31.12 ± 2.35 / 64.29 ± 1.88 - 63.00 ± 1.23 / 74.60 ± 0.67 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - robinsmits/Qwen1.5-7B-Dutch-Chat-Sft-Bf16 (few-shot) 7719 @@ -1072,19 +1038,53 @@ title: Dutch NLU 🇳🇱 12.6.1 - meta-llama/Llama-3.1-8B-Instruct (few-shot) - 8030 - 128 - 131200 - True - 1,005 ± 330 / 196 ± 74 + CohereForAI/aya-expanse-8b (few-shot) + 8028 + 256 + 8192 + False + 2,686 ± 685 / 491 ± 164 + 2.80 + 53.02 ± 1.86 / 30.09 ± 1.16 + 13.68 ± 1.32 / 34.87 ± 0.67 + 29.97 ± 2.13 / 64.01 ± 1.12 + 53.40 ± 1.34 / 69.31 ± 0.65 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + + + skole-gpt-mixtral (few-shot) + unknown + 32 + 32768 + False + 3,583 ± 977 / 686 ± 231 + 2.81 + 62.16 ± 1.09 / 45.76 ± 2.07 + 8.92 ± 1.08 / 24.28 ± 0.76 + 32.76 ± 2.94 / 65.17 ± 2.79 + 56.87 ± 0.92 / 72.57 ± 0.85 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + + + CohereForAI/aya-23-8B (few-shot) + 8028 + 256 + 8192 + False + 2,707 ± 688 / 497 ± 166 2.82 - 69.76 ± 1.36 / 57.66 ± 1.36 - 9.09 ± 1.42 / 20.14 ± 0.84 - 37.58 ± 3.42 / 66.98 ± 2.22 - 41.26 ± 2.09 / 65.63 ± 0.90 + 60.81 ± 1.94 / 46.59 ± 3.32 + 7.90 ± 1.63 / 24.82 ± 0.95 + 31.12 ± 2.35 / 64.29 ± 1.88 + 63.00 ± 1.23 / 74.60 ± 0.67 13.0.0 - 14.0.4 + 13.0.0 13.0.0 13.0.0 @@ -1123,21 +1123,38 @@ title: Dutch NLU 🇳🇱 0.0.0 - skole-gpt-mixtral (few-shot) + claude-3-5-haiku-20241022 (zero-shot, val) unknown - 32 - 32768 - False - 3,583 ± 977 / 686 ± 231 - 2.83 - 62.16 ± 1.09 / 45.76 ± 2.07 - 8.92 ± 1.08 / 24.28 ± 0.76 - 32.76 ± 2.94 / 65.17 ± 2.79 - 56.87 ± 0.92 / 72.57 ± 0.85 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 + unknown + 200000 + True + 277 ± 77 / 70 ± 25 + 2.84 + 61.15 ± 3.04 / 43.60 ± 2.00 + 12.71 ± 2.21 / 30.22 ± 1.53 + 35.26 ± 2.46 / 59.55 ± 1.69 + 41.27 ± 1.25 / 68.96 ± 1.30 + 14.0.3 + 14.0.2 + 14.0.3 + 14.0.3 + + + ibm-granite/granite-3.0-8b-base (few-shot) + 8171 + 49 + 4096 + True + 2,515 ± 625 / 476 ± 159 + 2.84 + 52.32 ± 1.98 / 41.98 ± 1.88 + 8.46 ± 1.09 / 21.30 ± 0.67 + 42.42 ± 3.42 / 68.81 ± 2.66 + 53.12 ± 1.81 / 63.79 ± 1.63 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 mlabonne/NeuralBeagle14-7B (few-shot, val) @@ -1157,38 +1174,21 @@ title: Dutch NLU 🇳🇱 12.5.2 - claude-3-5-haiku-20241022 (zero-shot, val) - unknown - unknown - 200000 - True - 277 ± 77 / 70 ± 25 - 2.86 - 61.15 ± 3.04 / 43.60 ± 2.00 - 12.71 ± 2.21 / 30.22 ± 1.53 - 35.26 ± 2.46 / 59.55 ± 1.69 - 41.27 ± 1.25 / 68.96 ± 1.30 - 14.0.3 - 14.0.2 - 14.0.3 - 14.0.3 - - - CohereForAI/c4ai-command-r-v01 (few-shot) - 34981 - 256 - 8192 + ReBatch/Reynaerde-7B-Instruct (few-shot) + 7248 + 33 + 32768 False - 1,919 ± 645 / 248 ± 83 - 2.87 - 61.21 ± 2.15 / 41.76 ± 1.84 - 9.03 ± 1.38 / 29.15 ± 1.45 - 38.15 ± 2.37 / 65.60 ± 2.16 - 44.91 ± 3.07 / 63.57 ± 1.72 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 + 2,562 ± 487 / 782 ± 247 + 2.85 + 59.16 ± 2.29 / 42.33 ± 2.15 + 10.39 ± 1.44 / 28.74 ± 1.05 + 19.50 ± 1.96 / 55.52 ± 3.92 + 60.96 ± 1.24 / 72.79 ± 0.95 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 ibm-granite/granite-8b-code-base-4k (few-shot) @@ -1208,38 +1208,21 @@ title: Dutch NLU 🇳🇱 13.0.0 - ibm-granite/granite-3.0-8b-base (few-shot) - 8171 - 49 - 4096 - True - 2,515 ± 625 / 476 ± 159 - 2.88 - 52.26 ± 1.87 / 42.18 ± 1.90 - 8.46 ± 1.09 / 21.30 ± 0.67 - 42.42 ± 3.42 / 68.81 ± 2.66 - 53.11 ± 1.79 / 63.80 ± 1.61 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 - - - mistralai/Mistral-7B-v0.1 (few-shot) - 7242 - 32 - 32768 - True - 1,446 ± 354 / 295 ± 100 + CohereForAI/c4ai-command-r-v01 (few-shot) + 34981 + 256 + 8192 + False + 1,919 ± 645 / 248 ± 83 2.89 - 58.15 ± 1.14 / 40.78 ± 1.91 - 7.94 ± 1.25 / 31.02 ± 3.45 - 25.41 ± 3.46 / 61.11 ± 2.36 - 62.56 ± 1.10 / 73.16 ± 0.93 - 9.1.2 - 9.1.2 - 9.1.2 - 12.5.1 + 61.21 ± 2.15 / 41.76 ± 1.84 + 9.03 ± 1.38 / 29.15 ± 1.45 + 38.15 ± 2.37 / 65.60 ± 2.16 + 44.91 ± 3.07 / 63.57 ± 1.72 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 ReBatch/Llama-3-8B-dutch (few-shot) @@ -1276,21 +1259,21 @@ title: Dutch NLU 🇳🇱 13.0.0 - ReBatch/Reynaerde-7B-Instruct (few-shot) - 7248 - 33 - 32768 - False - 2,562 ± 487 / 782 ± 247 + microsoft/xlm-align-base + 277 + 250 + 512 + True + 14,744 ± 2,870 / 3,265 ± 1,053 2.90 - 59.16 ± 2.29 / 42.33 ± 2.15 - 10.39 ± 1.44 / 28.74 ± 1.05 - 19.50 ± 1.96 / 55.52 ± 3.92 - 60.96 ± 1.24 / 72.79 ± 0.95 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 + 78.85 ± 2.48 / 83.35 ± 2.28 + 11.80 ± 7.64 / 33.49 ± 6.73 + 14.56 ± 8.02 / 53.64 ± 5.14 + 42.08 ± 7.94 / 51.94 ± 9.08 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 Rijgersberg/Mistral-7B-v0.1-chat-nl (few-shot) @@ -1299,7 +1282,7 @@ title: Dutch NLU 🇳🇱 32768 False 5,907 ± 1,028 / 1,695 ± 549 - 2.90 + 2.91 56.73 ± 1.95 / 38.97 ± 1.84 11.08 ± 1.46 / 32.20 ± 1.43 19.41 ± 2.55 / 57.17 ± 2.38 @@ -1310,21 +1293,21 @@ title: Dutch NLU 🇳🇱 12.5.2 - microsoft/xlm-align-base - 277 - 250 - 512 + mistralai/Mistral-7B-v0.1 (few-shot) + 7242 + 32 + 32768 True - 14,744 ± 2,870 / 3,265 ± 1,053 - 2.90 - 78.85 ± 2.48 / 83.35 ± 2.28 - 11.80 ± 7.64 / 33.49 ± 6.73 - 14.56 ± 8.02 / 53.64 ± 5.14 - 42.08 ± 7.94 / 51.94 ± 9.08 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 + 1,446 ± 354 / 295 ± 100 + 2.92 + 58.15 ± 1.14 / 40.78 ± 1.91 + 7.94 ± 1.25 / 31.02 ± 3.45 + 25.41 ± 3.46 / 61.11 ± 2.36 + 62.56 ± 1.10 / 73.16 ± 0.93 + 9.1.2 + 9.1.2 + 9.1.2 + 12.5.1 mlabonne/AlphaMonarch-7B (few-shot, val) @@ -1367,7 +1350,7 @@ title: Dutch NLU 🇳🇱 512 True 15,040 ± 2,953 / 3,417 ± 1,100 - 2.96 + 2.95 66.85 ± 1.32 / 72.84 ± 0.82 20.56 ± 1.44 / 39.67 ± 0.86 35.56 ± 1.76 / 66.00 ± 1.15 @@ -1377,23 +1360,6 @@ title: Dutch NLU 🇳🇱 12.6.1 12.6.1 - - mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot) - 341029 - 256 - 4096 - True - 1,904 ± 475 / 361 ± 121 - 2.97 - 47.60 ± 2.59 / 29.38 ± 1.71 - 10.62 ± 2.33 / 34.10 ± 2.72 - 61.64 ± 9.03 / 78.16 ± 6.70 - 24.02 ± 6.42 / 38.65 ± 10.84 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - mistralai/Mistral-7B-Instruct-v0.2 (few-shot) 7242 @@ -1418,7 +1384,7 @@ title: Dutch NLU 🇳🇱 32768 True 1,841 ± 297 / 651 ± 193 - 3.00 + 2.98 56.76 ± 1.52 / 42.03 ± 1.98 7.11 ± 1.17 / 26.36 ± 2.97 23.55 ± 2.76 / 59.14 ± 3.18 @@ -1428,6 +1394,23 @@ title: Dutch NLU 🇳🇱 12.5.2 12.5.2 + + mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot) + 341029 + 256 + 4096 + True + 1,904 ± 475 / 361 ± 121 + 2.98 + 47.60 ± 2.59 / 29.38 ± 1.71 + 10.62 ± 2.33 / 34.10 ± 2.72 + 61.64 ± 9.03 / 78.16 ± 6.70 + 24.02 ± 6.42 / 38.65 ± 10.84 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + ibm-granite/granite-8b-code-instruct-4k (few-shot) 8055 @@ -1445,6 +1428,23 @@ title: Dutch NLU 🇳🇱 13.0.0 13.0.0 + + mistralai/Mistral-7B-v0.3 (few-shot) + 7248 + 33 + 32768 + True + 1,364 ± 343 / 266 ± 90 + 3.00 + 56.52 ± 1.42 / 41.84 ± 1.84 + 7.02 ± 1.21 / 26.40 ± 2.96 + 23.41 ± 2.91 / 59.14 ± 3.11 + 61.90 ± 1.07 / 72.49 ± 1.05 + 12.10.4 + 12.10.4 + 12.10.4 + 12.10.5 + NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot) 46998 @@ -1463,21 +1463,21 @@ title: Dutch NLU 🇳🇱 14.0.4 - mistralai/Mistral-7B-v0.3 (few-shot) - 7248 - 33 - 32768 - True - 1,364 ± 343 / 266 ± 90 - 3.02 - 56.52 ± 1.42 / 41.84 ± 1.84 - 7.02 ± 1.21 / 26.40 ± 2.96 - 23.41 ± 2.91 / 59.14 ± 3.11 - 61.90 ± 1.07 / 72.49 ± 1.05 - 12.10.4 - 12.10.4 - 12.10.4 - 12.10.5 + RuterNorway/Llama-2-13b-chat-norwegian (few-shot) + unknown + 32 + 4096 + False + 3,254 ± 1,068 / 484 ± 173 + 3.04 + 57.66 ± 1.29 / 43.77 ± 2.78 + 8.41 ± 1.47 / 25.59 ± 1.30 + 16.93 ± 2.60 / 55.72 ± 3.35 + 56.29 ± 1.11 / 68.94 ± 0.81 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 ibm-granite/granite-3.0-2b-base (few-shot) @@ -1486,7 +1486,7 @@ title: Dutch NLU 🇳🇱 4224 True 10,187 ± 2,363 / 2,204 ± 737 - 3.03 + 3.04 47.28 ± 1.57 / 36.12 ± 1.72 12.12 ± 1.92 / 35.44 ± 1.80 12.74 ± 2.68 / 52.69 ± 2.88 @@ -1497,38 +1497,38 @@ title: Dutch NLU 🇳🇱 13.0.0 - microsoft/Phi-3-mini-4k-instruct (few-shot) - 3821 + meta-llama/Llama-2-13b-chat-hf (few-shot) + 13016 32 4096 True - 3,194 ± 687 / 650 ± 216 - 3.05 - 50.31 ± 1.94 / 41.54 ± 2.19 - 12.58 ± 1.62 / 36.56 ± 1.79 - 14.72 ± 1.84 / 50.23 ± 3.10 - 56.19 ± 0.80 / 66.72 ± 0.92 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 + 2,849 ± 622 / 723 ± 229 + 3.04 + 57.80 ± 1.53 / 39.43 ± 1.53 + 8.57 ± 1.27 / 29.84 ± 2.35 + 17.40 ± 1.54 / 57.26 ± 1.96 + 56.35 ± 0.85 / 69.69 ± 0.76 + 12.11.0 + 12.10.4 + 12.10.4 + 12.11.0 - occiglot/occiglot-7b-eu5-instruct (few-shot) - 7242 - 32 - 32768 - False - 2,088 ± 352 / 706 ± 214 - 3.06 - 53.78 ± 1.86 / 41.29 ± 2.07 - 7.78 ± 1.43 / 24.33 ± 1.57 - 16.23 ± 2.49 / 55.09 ± 3.18 - 63.09 ± 1.18 / 73.88 ± 0.72 - 12.5.2 - 12.2.0 - 12.3.1 - 12.4.0 + meta-llama/Llama-3.1-8B-Instruct (few-shot) + 8030 + 128 + 131072 + True + 1,473 ± 377 / 283 ± 96 + 3.04 + 61.68 ± 1.94 / 42.64 ± 1.85 + 8.97 ± 1.44 / 20.07 ± 0.82 + 36.57 ± 1.77 / 65.25 ± 1.94 + 33.88 ± 1.83 / 62.17 ± 0.91 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 Geotrend/distilbert-base-25lang-cased @@ -1537,7 +1537,7 @@ title: Dutch NLU 🇳🇱 512 True 26,099 ± 5,881 / 5,178 ± 1,665 - 3.07 + 3.05 75.02 ± 1.48 / 81.57 ± 0.76 7.45 ± 2.99 / 29.70 ± 1.94 45.28 ± 0.55 / 71.89 ± 0.59 @@ -1547,6 +1547,40 @@ title: Dutch NLU 🇳🇱 0.0.0 0.0.0 + + microsoft/Phi-3-mini-4k-instruct (few-shot) + 3821 + 32 + 4096 + True + 8,681 ± 1,650 / 2,177 ± 717 + 3.05 + 50.31 ± 1.94 / 41.54 ± 2.19 + 12.58 ± 1.62 / 36.56 ± 1.79 + 14.72 ± 1.84 / 50.23 ± 3.10 + 56.19 ± 0.80 / 66.72 ± 0.92 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + + + google/gemma-7b (few-shot) + 8538 + 256 + 8192 + True + 1,378 ± 260 / 387 ± 119 + 3.07 + 47.75 ± 2.33 / 35.64 ± 1.89 + 7.68 ± 0.61 / 26.25 ± 1.18 + 28.28 ± 2.48 / 62.81 ± 1.70 + 61.49 ± 1.15 / 73.19 ± 0.81 + 12.9.1 + 12.9.1 + 12.9.1 + 12.9.1 + NorwAI/NorwAI-Mixtral-8x7B (few-shot) 46998 @@ -1582,27 +1616,10 @@ title: Dutch NLU 🇳🇱 12.5.2 - RuterNorway/Llama-2-13b-chat-norwegian (few-shot) - unknown - 32 - 4096 - False - 3,254 ± 1,068 / 484 ± 173 - 3.09 - 57.66 ± 1.29 / 43.77 ± 2.78 - 8.41 ± 1.47 / 25.59 ± 1.30 - 16.93 ± 2.60 / 55.72 ± 3.35 - 56.29 ± 1.11 / 68.94 ± 0.81 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - - - google/gemma-7b-it (few-shot) - 8538 - 256 - 8317 + google/gemma-7b-it (few-shot) + 8538 + 256 + 8317 False 1,792 ± 249 / 668 ± 203 3.09 @@ -1616,38 +1633,38 @@ title: Dutch NLU 🇳🇱 12.10.0 - google/gemma-7b (few-shot) - 8538 - 256 - 8192 - True - 1,378 ± 260 / 387 ± 119 + occiglot/occiglot-7b-eu5-instruct (few-shot) + 7242 + 32 + 32768 + False + 2,088 ± 352 / 706 ± 214 3.09 - 47.75 ± 2.33 / 35.64 ± 1.89 - 7.68 ± 0.61 / 26.25 ± 1.18 - 28.28 ± 2.48 / 62.81 ± 1.70 - 61.49 ± 1.15 / 73.19 ± 0.81 - 12.9.1 - 12.9.1 - 12.9.1 - 12.9.1 + 53.78 ± 1.86 / 41.29 ± 2.07 + 7.78 ± 1.43 / 24.33 ± 1.57 + 16.23 ± 2.49 / 55.09 ± 3.18 + 63.09 ± 1.18 / 73.88 ± 0.72 + 12.5.2 + 12.2.0 + 12.3.1 + 12.4.0 - meta-llama/Llama-2-13b-chat-hf (few-shot) - 13016 + Rijgersberg/GEITje-7B-chat (few-shot) + 7242 32 - 4096 - True - 2,849 ± 622 / 723 ± 229 - 3.09 - 57.80 ± 1.53 / 39.43 ± 1.53 - 8.57 ± 1.27 / 29.84 ± 2.35 - 17.40 ± 1.54 / 57.26 ± 1.96 - 56.35 ± 0.85 / 69.69 ± 0.76 - 12.11.0 - 12.10.4 - 12.10.4 - 12.11.0 + 32768 + False + 5,920 ± 1,028 / 1,696 ± 550 + 3.12 + 50.69 ± 1.67 / 35.96 ± 2.63 + 8.16 ± 1.68 / 27.37 ± 1.95 + 20.45 ± 2.12 / 59.00 ± 1.21 + 54.48 ± 0.86 / 66.71 ± 0.59 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 meta-llama/Llama-2-13b-hf (few-shot) @@ -1656,7 +1673,7 @@ title: Dutch NLU 🇳🇱 4096 True 2,898 ± 637 / 736 ± 236 - 3.10 + 3.12 52.55 ± 1.64 / 43.32 ± 1.70 4.26 ± 2.09 / 28.32 ± 2.68 24.57 ± 3.54 / 54.94 ± 5.33 @@ -1683,6 +1700,23 @@ title: Dutch NLU 🇳🇱 10.0.1 12.4.0 + + timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot) + 7242 + 32 + 32768 + True + 5,054 ± 1,200 / 1,056 ± 339 + 3.14 + 54.56 ± 2.96 / 37.86 ± 2.49 + 8.43 ± 1.27 / 24.23 ± 0.94 + 10.99 ± 2.55 / 50.46 ± 4.17 + 55.91 ± 1.08 / 66.78 ± 1.13 + 12.5.3 + 12.5.3 + 12.5.3 + 12.5.3 + Rijgersberg/GEITje-7B (few-shot) 7242 @@ -1734,23 +1768,6 @@ title: Dutch NLU 🇳🇱 12.6.1 12.6.1 - - Rijgersberg/GEITje-7B-chat (few-shot) - 7242 - 32 - 32768 - False - 5,920 ± 1,028 / 1,696 ± 550 - 3.16 - 50.69 ± 1.67 / 35.96 ± 2.63 - 8.16 ± 1.68 / 27.37 ± 1.95 - 20.45 ± 2.12 / 59.00 ± 1.21 - 54.48 ± 0.86 / 66.71 ± 0.59 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - EuropeanParliament/EUBERT 93 @@ -1769,21 +1786,21 @@ title: Dutch NLU 🇳🇱 0.0.0 - mistralai/Mistral-7B-Instruct-v0.1 (few-shot) + occiglot/occiglot-7b-eu5 (few-shot) 7242 32 32768 - False - 634 ± 179 / 110 ± 35 + True + 2,219 ± 427 / 717 ± 224 3.18 - 52.72 ± 2.58 / 33.51 ± 1.22 - 7.91 ± 2.16 / 27.82 ± 1.97 - 18.14 ± 2.10 / 55.42 ± 3.05 - 52.75 ± 0.88 / 67.15 ± 1.08 - 9.3.1 - 9.3.1 - 9.3.1 - 12.4.0 + 51.31 ± 2.32 / 42.95 ± 2.58 + 7.41 ± 1.24 / 26.93 ± 1.56 + 13.04 ± 1.93 / 53.54 ± 2.70 + 59.28 ± 1.15 / 69.67 ± 0.95 + 12.5.2 + 12.1.0 + 12.1.0 + 12.1.0 TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot) @@ -1803,73 +1820,22 @@ title: Dutch NLU 🇳🇱 13.0.0 - Twitter/twhin-bert-base - 278 + Twitter/twhin-bert-large + 560 250 512 True - 11,514 ± 2,041 / 2,862 ± 918 + 9,707 ± 1,664 / 2,549 ± 831 3.19 - 74.03 ± 3.05 / 80.59 ± 2.24 - 9.53 ± 5.28 / 32.06 ± 4.17 - 39.12 ± 12.90 / 68.36 ± 6.85 - 7.71 ± 0.42 / 12.90 ± 0.39 + 77.35 ± 2.80 / 82.50 ± 1.87 + 6.55 ± 5.33 / 28.68 ± 3.64 + 18.25 ± 8.41 / 54.00 ± 5.57 + 28.37 ± 4.84 / 36.84 ± 5.92 0.0.0 0.0.0 0.0.0 0.0.0 - - meta-llama/Llama-2-7b-chat-hf (few-shot) - 6738 - 32 - 4096 - False - 2,643 ± 455 / 800 ± 247 - 3.19 - 50.23 ± 2.34 / 37.12 ± 3.30 - 10.07 ± 1.84 / 35.66 ± 2.24 - 14.73 ± 1.62 / 54.59 ± 2.24 - 53.42 ± 0.80 / 66.24 ± 0.84 - 9.3.1 - 9.3.1 - 9.3.1 - 12.4.0 - - - timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot) - 7242 - 32 - 32768 - True - 5,054 ± 1,200 / 1,056 ± 339 - 3.19 - 54.56 ± 2.96 / 37.86 ± 2.49 - 8.43 ± 1.27 / 24.23 ± 0.94 - 10.99 ± 2.55 / 50.46 ± 4.17 - 55.91 ± 1.08 / 66.78 ± 1.13 - 12.5.3 - 12.5.3 - 12.5.3 - 12.5.3 - - - BramVanroy/fietje-2b (few-shot) - 2780 - 51 - 2048 - True - 4,804 ± 1,045 / 1,220 ± 392 - 3.20 - 33.92 ± 3.43 / 28.63 ± 2.42 - 13.39 ± 1.64 / 41.03 ± 1.87 - 6.75 ± 2.55 / 41.28 ± 2.37 - 58.57 ± 1.03 / 69.39 ± 0.79 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - Qwen/Qwen1.5-4B-Chat (few-shot) 3950 @@ -1887,6 +1853,23 @@ title: Dutch NLU 🇳🇱 12.1.0 12.5.2 + + Twitter/twhin-bert-base + 278 + 250 + 512 + True + 11,514 ± 2,041 / 2,862 ± 918 + 3.20 + 74.03 ± 3.05 / 80.59 ± 2.24 + 9.53 ± 5.28 / 32.06 ± 4.17 + 39.12 ± 12.90 / 68.36 ± 6.85 + 7.71 ± 0.42 / 12.90 ± 0.39 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + meta-llama/Llama-3.2-3B-Instruct (few-shot) 3213 @@ -1905,55 +1888,55 @@ title: Dutch NLU 🇳🇱 13.0.0 - occiglot/occiglot-7b-eu5 (few-shot) - 7242 - 32 - 32768 + BramVanroy/fietje-2b (few-shot) + 2780 + 51 + 2048 True - 2,219 ± 427 / 717 ± 224 - 3.20 - 51.31 ± 2.32 / 42.95 ± 2.58 - 7.41 ± 1.24 / 26.93 ± 1.56 - 13.04 ± 1.93 / 53.54 ± 2.70 - 59.28 ± 1.15 / 69.67 ± 0.95 - 12.5.2 - 12.1.0 - 12.1.0 - 12.1.0 + 4,804 ± 1,045 / 1,220 ± 392 + 3.21 + 33.92 ± 3.43 / 28.63 ± 2.42 + 13.39 ± 1.64 / 41.03 ± 1.87 + 6.75 ± 2.55 / 41.28 ± 2.37 + 58.57 ± 1.03 / 69.39 ± 0.79 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 - Twitter/twhin-bert-large - 560 - 250 - 512 - True - 9,707 ± 1,664 / 2,549 ± 831 + meta-llama/Llama-2-7b-chat-hf (few-shot) + 6738 + 32 + 4096 + False + 2,643 ± 455 / 800 ± 247 3.21 - 77.35 ± 2.80 / 82.50 ± 1.87 - 6.55 ± 5.33 / 28.68 ± 3.64 - 18.25 ± 8.41 / 54.00 ± 5.57 - 28.37 ± 4.84 / 36.84 ± 5.92 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 + 50.23 ± 2.34 / 37.12 ± 3.30 + 10.07 ± 1.84 / 35.66 ± 2.24 + 14.73 ± 1.62 / 54.59 ± 2.24 + 53.42 ± 0.80 / 66.24 ± 0.84 + 9.3.1 + 9.3.1 + 9.3.1 + 12.4.0 - ibm-granite/granite-3.0-3b-a800m-instruct (few-shot) - 3374 - 49 - 4096 - True - 10,246 ± 3,021 / 1,629 ± 550 + mistralai/Mistral-7B-Instruct-v0.1 (few-shot) + 7242 + 32 + 32768 + False + 634 ± 179 / 110 ± 35 3.21 - 49.25 ± 2.57 / 36.48 ± 2.14 - 9.45 ± 1.76 / 39.66 ± 1.14 - 11.87 ± 2.68 / 47.32 ± 3.85 - 54.20 ± 1.44 / 67.04 ± 0.75 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 + 52.72 ± 2.58 / 33.51 ± 1.22 + 7.91 ± 2.16 / 27.82 ± 1.97 + 18.14 ± 2.10 / 55.42 ± 3.05 + 52.75 ± 0.88 / 67.15 ± 1.08 + 9.3.1 + 9.3.1 + 9.3.1 + 12.4.0 BramVanroy/fietje-2b-instruct (few-shot) @@ -1972,6 +1955,23 @@ title: Dutch NLU 🇳🇱 12.6.1 12.6.1 + + ibm-granite/granite-3.0-3b-a800m-instruct (few-shot) + 3374 + 49 + 4096 + True + 10,246 ± 3,021 / 1,629 ± 550 + 3.22 + 49.25 ± 2.57 / 36.48 ± 2.14 + 9.45 ± 1.76 / 39.66 ± 1.14 + 11.87 ± 2.68 / 47.32 ± 3.85 + 54.20 ± 1.44 / 67.04 ± 0.75 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + NbAiLab/nb-llama-3.1-70B (few-shot) 70554 @@ -2040,23 +2040,6 @@ title: Dutch NLU 🇳🇱 13.0.0 13.0.0 - - 01-ai/Yi-1.5-6B (few-shot) - 6061 - 64 - 4224 - True - 2,867 ± 550 / 793 ± 253 - 3.30 - 51.18 ± 1.62 / 35.45 ± 1.88 - 9.23 ± 2.84 / 19.38 ± 4.37 - 1.99 ± 2.56 / 34.69 ± 1.59 - 54.66 ± 1.25 / 65.31 ± 1.06 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - meta-llama/Llama-2-7b-hf (few-shot) 6738 @@ -2064,7 +2047,7 @@ title: Dutch NLU 🇳🇱 4096 True 930 ± 310 / 128 ± 43 - 3.31 + 3.29 40.49 ± 4.32 / 30.86 ± 2.27 7.10 ± 1.85 / 27.42 ± 1.76 18.66 ± 2.39 / 55.25 ± 3.77 @@ -2074,6 +2057,23 @@ title: Dutch NLU 🇳🇱 9.2.0 12.5.1 + + 01-ai/Yi-1.5-6B (few-shot) + 6061 + 64 + 4224 + True + 2,867 ± 550 / 793 ± 253 + 3.32 + 51.18 ± 1.62 / 35.45 ± 1.88 + 9.23 ± 2.84 / 19.38 ± 4.37 + 1.99 ± 2.56 / 34.69 ± 1.59 + 54.66 ± 1.25 / 65.31 ± 1.06 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + AI-Sweden-Models/gpt-sw3-20b (few-shot) 20918 @@ -2091,6 +2091,23 @@ title: Dutch NLU 🇳🇱 9.3.1 9.3.1 + + ibm-granite/granite-3b-code-instruct-2k (few-shot) + 3483 + 49 + 2048 + True + 9,059 ± 1,947 / 2,201 ± 728 + 3.35 + 48.53 ± 3.89 / 38.20 ± 2.92 + 10.15 ± 1.55 / 22.01 ± 1.44 + 4.88 ± 2.27 / 38.78 ± 3.56 + 45.38 ± 0.93 / 56.09 ± 1.05 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + sentence-transformers/distiluse-base-multilingual-cased-v1 135 @@ -2108,23 +2125,6 @@ title: Dutch NLU 🇳🇱 0.0.0 0.0.0 - - meta-llama/Llama-3.2-3B (few-shot) - 3213 - 128 - 131200 - True - 3,713 ± 877 / 836 ± 267 - 3.37 - 47.40 ± 3.29 / 33.11 ± 2.04 - 7.90 ± 1.98 / 30.71 ± 1.89 - 3.10 ± 1.93 / 34.24 ± 0.73 - 56.53 ± 1.48 / 68.47 ± 1.35 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - AI-Sweden-Models/roberta-large-1350k 354 @@ -2142,23 +2142,6 @@ title: Dutch NLU 🇳🇱 10.0.1 10.0.1 - - ibm-granite/granite-3.0-3b-a800m-base (few-shot) - 3374 - 49 - 4096 - True - 10,504 ± 3,028 / 1,678 ± 559 - 3.38 - 42.52 ± 3.31 / 33.08 ± 2.70 - 9.91 ± 1.71 / 35.24 ± 2.62 - 0.69 ± 2.82 / 36.10 ± 2.58 - 56.95 ± 1.18 / 66.87 ± 1.37 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - BramVanroy/GEITje-7B-ultra-sft (few-shot) 7242 @@ -2166,7 +2149,7 @@ title: Dutch NLU 🇳🇱 8192 False 5,979 ± 1,044 / 1,724 ± 559 - 3.40 + 3.38 39.41 ± 2.93 / 30.59 ± 1.59 7.00 ± 3.04 / 35.01 ± 3.72 16.10 ± 2.34 / 52.05 ± 3.60 @@ -2177,17 +2160,34 @@ title: Dutch NLU 🇳🇱 12.5.2 - ibm-granite/granite-3b-code-instruct-2k (few-shot) - 3483 + ibm-granite/granite-3.0-3b-a800m-base (few-shot) + 3374 49 - 2048 + 4096 True - 9,059 ± 1,947 / 2,201 ± 728 + 10,504 ± 3,028 / 1,678 ± 559 3.40 - 48.53 ± 3.89 / 38.20 ± 2.92 - 10.15 ± 1.55 / 22.01 ± 1.44 - 4.88 ± 2.27 / 38.78 ± 3.56 - 45.38 ± 0.93 / 56.09 ± 1.05 + 42.52 ± 3.31 / 33.08 ± 2.70 + 9.91 ± 1.71 / 35.24 ± 2.62 + 0.69 ± 2.82 / 36.10 ± 2.58 + 56.95 ± 1.18 / 66.87 ± 1.37 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + + + meta-llama/Llama-3.2-3B (few-shot) + 3213 + 128 + 131200 + True + 3,713 ± 877 / 836 ± 267 + 3.40 + 47.40 ± 3.29 / 33.11 ± 2.04 + 7.90 ± 1.98 / 30.71 ± 1.89 + 3.10 ± 1.93 / 34.24 ± 0.73 + 56.53 ± 1.48 / 68.47 ± 1.35 13.0.0 13.0.0 13.0.0 @@ -2210,6 +2210,23 @@ title: Dutch NLU 🇳🇱 9.3.1 9.3.1 + + MaLA-LM/emma-500-llama2-7b (few-shot) + 6738 + 32 + 4096 + True + 6,275 ± 1,193 / 1,755 ± 578 + 3.42 + 36.61 ± 3.37 / 31.91 ± 2.20 + 8.77 ± 1.80 / 25.31 ± 1.42 + 3.52 ± 2.07 / 35.34 ± 1.61 + 59.51 ± 0.97 / 70.33 ± 0.64 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + AI-Sweden-Models/roberta-large-1160k 354 @@ -2244,23 +2261,6 @@ title: Dutch NLU 🇳🇱 12.1.0 12.1.0 - - MaLA-LM/emma-500-llama2-7b (few-shot) - 6738 - 32 - 4096 - True - 6,275 ± 1,193 / 1,755 ± 578 - 3.46 - 36.61 ± 3.37 / 31.91 ± 2.20 - 8.77 ± 1.80 / 25.31 ± 1.42 - 3.52 ± 2.07 / 35.34 ± 1.61 - 59.51 ± 0.97 / 70.33 ± 0.64 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - allenai/OLMo-1.7-7B-hf (few-shot) 6888 @@ -2268,7 +2268,7 @@ title: Dutch NLU 🇳🇱 4096 True 3,371 ± 876 / 561 ± 184 - 3.47 + 3.48 46.95 ± 2.32 / 36.13 ± 1.88 4.34 ± 2.10 / 19.37 ± 2.08 3.46 ± 1.91 / 41.32 ± 3.08 @@ -2295,23 +2295,6 @@ title: Dutch NLU 🇳🇱 14.1.1 14.1.1 - - google/gemma-2b-it (few-shot) - 2506 - 256 - 8192 - False - 6,471 ± 1,142 / 1,961 ± 584 - 3.53 - 38.85 ± 3.77 / 32.18 ± 2.49 - 11.25 ± 1.90 / 28.36 ± 1.81 - -2.27 ± 1.37 / 37.91 ± 2.26 - 45.95 ± 1.11 / 56.54 ± 0.95 - 12.5.2 - 12.1.0 - 12.1.0 - 12.4.0 - sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking 135 @@ -2330,21 +2313,38 @@ title: Dutch NLU 🇳🇱 12.6.1 - meta-llama/Llama-3.2-1B-Instruct (few-shot) - 1236 - 128 - 131200 + google/gemma-2b-it (few-shot) + 2506 + 256 + 8192 False - 7,436 ± 1,846 / 1,508 ± 479 - 3.58 - 42.01 ± 2.06 / 37.16 ± 1.98 - 9.15 ± 1.70 / 32.55 ± 2.69 - 1.11 ± 2.15 / 36.71 ± 3.89 - 40.04 ± 1.61 / 53.75 ± 1.10 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 + 6,471 ± 1,142 / 1,961 ± 584 + 3.54 + 38.85 ± 3.77 / 32.18 ± 2.49 + 11.25 ± 1.90 / 28.36 ± 1.81 + -2.27 ± 1.37 / 37.91 ± 2.26 + 45.95 ± 1.11 / 56.54 ± 0.95 + 12.5.2 + 12.1.0 + 12.1.0 + 12.4.0 + + + stabilityai/stablelm-2-1_6b (few-shot) + 1645 + 100 + 4096 + True + 7,259 ± 2,120 / 1,240 ± 432 + 3.57 + 36.58 ± 3.88 / 33.82 ± 2.87 + 6.32 ± 1.30 / 24.04 ± 1.14 + 4.01 ± 2.01 / 36.03 ± 1.61 + 52.81 ± 0.81 / 63.87 ± 1.27 + 12.10.8 + 12.10.8 + 12.10.8 + 12.10.8 openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot) @@ -2363,23 +2363,6 @@ title: Dutch NLU 🇳🇱 14.0.4 14.0.4 - - dbmdz/bert-base-historic-multilingual-cased - 111 - 32 - 512 - True - 20,047 ± 4,407 / 3,844 ± 1,259 - 3.61 - 56.69 ± 1.80 / 68.42 ± 0.85 - 9.29 ± 3.04 / 30.73 ± 2.40 - 3.02 ± 1.45 / 50.08 ± 1.17 - 22.14 ± 1.13 / 31.59 ± 0.96 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - ibm-granite/granite-7b-base (few-shot) 6738 @@ -2387,7 +2370,7 @@ title: Dutch NLU 🇳🇱 2048 True 4,405 ± 1,098 / 1,032 ± 345 - 3.61 + 3.60 37.39 ± 3.37 / 32.77 ± 1.97 7.51 ± 1.57 / 19.22 ± 1.72 3.11 ± 0.88 / 50.54 ± 0.90 @@ -2397,6 +2380,40 @@ title: Dutch NLU 🇳🇱 12.10.5 12.10.5 + + meta-llama/Llama-3.2-1B-Instruct (few-shot) + 1236 + 128 + 131200 + False + 7,436 ± 1,846 / 1,508 ± 479 + 3.60 + 42.01 ± 2.06 / 37.16 ± 1.98 + 9.15 ± 1.70 / 32.55 ± 2.69 + 1.11 ± 2.15 / 36.71 ± 3.89 + 40.04 ± 1.61 / 53.75 ± 1.10 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + + + dbmdz/bert-base-historic-multilingual-cased + 111 + 32 + 512 + True + 20,047 ± 4,407 / 3,844 ± 1,259 + 3.63 + 56.69 ± 1.80 / 68.42 ± 0.85 + 9.29 ± 3.04 / 30.73 ± 2.40 + 3.02 ± 1.45 / 50.08 ± 1.17 + 22.14 ± 1.13 / 31.59 ± 0.96 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + RuterNorway/Llama-2-7b-chat-norwegian (few-shot) unknown @@ -2404,7 +2421,7 @@ title: Dutch NLU 🇳🇱 4096 False 10,890 ± 2,686 / 2,186 ± 750 - 3.63 + 3.64 35.49 ± 3.10 / 29.35 ± 2.75 11.36 ± 1.56 / 30.66 ± 3.68 2.52 ± 2.14 / 42.60 ± 4.80 @@ -2415,21 +2432,38 @@ title: Dutch NLU 🇳🇱 12.5.2 - stabilityai/stablelm-2-1_6b (few-shot) - 1645 - 100 + utter-project/EuroLLM-1.7B-Instruct (few-shot) + 1657 + 128 4096 True - 7,259 ± 2,120 / 1,240 ± 432 + 15,009 ± 4,072 / 2,702 ± 878 3.64 - 36.58 ± 3.88 / 33.82 ± 2.87 - 6.32 ± 1.30 / 24.04 ± 1.14 - 4.01 ± 2.01 / 36.03 ± 1.61 - 52.81 ± 0.81 / 63.87 ± 1.27 - 12.10.8 - 12.10.8 - 12.10.8 - 12.10.8 + 32.45 ± 2.17 / 30.83 ± 2.31 + 7.03 ± 2.08 / 34.16 ± 1.83 + 5.58 ± 1.32 / 44.79 ± 3.28 + 51.18 ± 1.19 / 61.82 ± 1.19 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + + + ibm-granite/granite-3.0-1b-a400m-instruct (few-shot) + 1335 + 49 + 4096 + True + 7,964 ± 2,255 / 1,299 ± 433 + 3.67 + 30.60 ± 2.69 / 26.10 ± 1.90 + 13.26 ± 1.54 / 37.96 ± 2.44 + 1.04 ± 1.52 / 45.97 ± 3.20 + 39.69 ± 1.18 / 50.12 ± 0.89 + 13.2.0 + 13.2.0 + 13.2.0 + 13.2.0 sentence-transformers/distiluse-base-multilingual-cased @@ -2438,7 +2472,7 @@ title: Dutch NLU 🇳🇱 512 True 19,206 ± 4,451 / 3,658 ± 1,187 - 3.65 + 3.67 56.98 ± 1.37 / 66.91 ± 1.60 9.66 ± 4.65 / 31.17 ± 3.34 19.37 ± 4.34 / 56.74 ± 3.13 @@ -2448,23 +2482,6 @@ title: Dutch NLU 🇳🇱 0.0.0 0.0.0 - - utter-project/EuroLLM-1.7B-Instruct (few-shot) - 1657 - 128 - 4096 - True - 15,009 ± 4,072 / 2,702 ± 878 - 3.66 - 32.45 ± 2.17 / 30.83 ± 2.31 - 7.03 ± 2.08 / 34.16 ± 1.83 - 5.58 ± 1.32 / 44.79 ± 3.28 - 51.18 ± 1.19 / 61.82 ± 1.19 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - google/gemma-2-2b (few-shot) 2614 @@ -2472,7 +2489,7 @@ title: Dutch NLU 🇳🇱 8320 True 5,235 ± 1,226 / 1,154 ± 366 - 3.67 + 3.70 22.63 ± 4.98 / 22.71 ± 2.86 8.11 ± 1.55 / 28.07 ± 1.80 8.04 ± 1.79 / 48.95 ± 2.97 @@ -2483,19 +2500,19 @@ title: Dutch NLU 🇳🇱 13.0.0 - ibm-granite/granite-3.0-1b-a400m-instruct (few-shot) - 1335 - 49 + ibm-granite/granite-7b-instruct (few-shot) + 6738 + 32 4096 True - 7,964 ± 2,255 / 1,299 ± 433 - 3.67 - 30.60 ± 2.69 / 26.10 ± 1.90 - 13.26 ± 1.54 / 37.96 ± 2.44 - 1.04 ± 1.52 / 45.97 ± 3.20 - 39.69 ± 1.18 / 50.12 ± 0.89 + 3,136 ± 558 / 942 ± 290 + 3.74 + 33.73 ± 2.02 / 30.41 ± 1.57 + 7.45 ± 1.77 / 22.28 ± 1.35 + 3.78 ± 2.04 / 50.30 ± 1.51 + 43.60 ± 1.46 / 58.94 ± 1.08 13.2.0 - 13.2.0 + 13.0.0 13.2.0 13.2.0 @@ -2506,7 +2523,7 @@ title: Dutch NLU 🇳🇱 2176 True 5,403 ± 1,133 / 1,294 ± 423 - 3.74 + 3.76 37.37 ± 2.22 / 30.45 ± 2.45 9.55 ± 1.82 / 23.90 ± 1.53 0.05 ± 1.35 / 35.78 ± 2.30 @@ -2517,21 +2534,21 @@ title: Dutch NLU 🇳🇱 12.5.2 - ibm-granite/granite-7b-instruct (few-shot) - 6738 - 32 - 4096 + LumiOpen/Viking-13B (few-shot) + 14030 + 131 + 4224 True - 3,136 ± 558 / 942 ± 290 - 3.75 - 33.73 ± 2.02 / 30.41 ± 1.57 - 7.45 ± 1.77 / 22.28 ± 1.35 - 3.78 ± 2.04 / 50.30 ± 1.51 - 43.60 ± 1.46 / 58.94 ± 1.08 - 13.2.0 - 13.0.0 - 13.2.0 - 13.2.0 + 840 ± 79 / 400 ± 124 + 3.77 + 36.74 ± 3.36 / 32.36 ± 1.39 + 8.57 ± 2.44 / 34.17 ± 2.59 + 3.01 ± 1.94 / 46.03 ± 4.19 + 32.32 ± 1.55 / 40.73 ± 1.64 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 jpostma/DagoBERT @@ -2540,7 +2557,7 @@ title: Dutch NLU 🇳🇱 512 True 11,241 ± 2,115 / 2,565 ± 830 - 3.76 + 3.79 42.28 ± 1.41 / 47.68 ± 1.08 8.01 ± 2.88 / 31.60 ± 2.41 31.21 ± 1.62 / 64.82 ± 0.69 @@ -2551,21 +2568,21 @@ title: Dutch NLU 🇳🇱 0.0.0 - LumiOpen/Viking-13B (few-shot) - 14030 - 131 - 4224 + openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot) + 7453 + 251 + 4096 True - 840 ± 79 / 400 ± 124 - 3.81 - 36.74 ± 3.36 / 32.36 ± 1.39 - 8.57 ± 2.44 / 34.17 ± 2.59 - 3.01 ± 1.94 / 46.03 ± 4.19 - 32.32 ± 1.55 / 40.73 ± 1.64 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 + 1,438 ± 410 / 233 ± 79 + 3.83 + 42.35 ± 2.49 / 29.29 ± 1.66 + 0.78 ± 0.93 / 8.63 ± 0.29 + -0.02 ± 1.29 / 38.46 ± 1.55 + 47.61 ± 1.96 / 59.25 ± 1.42 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 google/gemma-2b (few-shot) @@ -2574,7 +2591,7 @@ title: Dutch NLU 🇳🇱 8192 True 6,087 ± 1,046 / 1,902 ± 563 - 3.82 + 3.84 16.90 ± 4.91 / 17.38 ± 4.30 9.95 ± 0.78 / 27.94 ± 1.43 0.41 ± 1.03 / 33.54 ± 0.32 @@ -2585,21 +2602,21 @@ title: Dutch NLU 🇳🇱 12.1.0 - openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot) - 7453 - 251 + NorwAI/NorwAI-Mistral-7B (few-shot) + 7537 + 68 4096 True - 1,438 ± 410 / 233 ± 79 - 3.83 - 42.35 ± 2.49 / 29.29 ± 1.66 - 0.78 ± 0.93 / 8.63 ± 0.29 - -0.02 ± 1.29 / 38.46 ± 1.55 - 47.61 ± 1.96 / 59.25 ± 1.42 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 + 3,035 ± 503 / 911 ± 300 + 3.87 + 24.15 ± 5.73 / 26.49 ± 4.13 + 8.31 ± 1.56 / 20.06 ± 1.06 + 1.60 ± 1.71 / 41.51 ± 3.60 + 37.08 ± 1.76 / 49.32 ± 0.87 + 12.10.4 + 12.10.4 + 12.10.4 + 12.10.4 Tweeties/tweety-7b-dutch-v24a (few-shot) @@ -2618,23 +2635,6 @@ title: Dutch NLU 🇳🇱 12.6.1 12.6.1 - - NorwAI/NorwAI-Mistral-7B (few-shot) - 7537 - 68 - 4096 - True - 3,035 ± 503 / 911 ± 300 - 3.91 - 24.15 ± 5.73 / 26.49 ± 4.13 - 8.31 ± 1.56 / 20.06 ± 1.06 - 1.60 ± 1.71 / 41.51 ± 3.60 - 37.08 ± 1.76 / 49.32 ± 0.87 - 12.10.4 - 12.10.4 - 12.10.4 - 12.10.4 - PleIAs/Pleias-3b-Preview (few-shot) 3212 @@ -2642,7 +2642,7 @@ title: Dutch NLU 🇳🇱 4096 True 6,513 ± 1,241 / 1,282 ± 644 - 3.91 + 3.89 31.13 ± 3.71 / 29.34 ± 2.26 7.24 ± 2.08 / 29.45 ± 4.63 1.23 ± 1.73 / 44.71 ± 3.28 @@ -2659,7 +2659,7 @@ title: Dutch NLU 🇳🇱 2048 True 10,756 ± 3,589 / 1,157 ± 670 - 3.93 + 3.94 38.22 ± 3.45 / 35.62 ± 3.90 4.99 ± 3.86 / 29.17 ± 2.70 1.85 ± 1.45 / 40.34 ± 3.41 @@ -2676,7 +2676,7 @@ title: Dutch NLU 🇳🇱 131200 True 7,577 ± 1,884 / 1,555 ± 492 - 3.95 + 3.96 22.03 ± 4.43 / 19.22 ± 3.92 4.25 ± 2.95 / 26.57 ± 3.31 1.46 ± 1.83 / 42.29 ± 4.01 @@ -2693,7 +2693,7 @@ title: Dutch NLU 🇳🇱 8192 True 16,249 ± 3,690 / 3,689 ± 1,226 - 3.97 + 3.98 22.84 ± 5.42 / 25.11 ± 3.52 4.60 ± 2.12 / 29.94 ± 1.50 2.55 ± 1.41 / 40.88 ± 3.15 @@ -2703,23 +2703,6 @@ title: Dutch NLU 🇳🇱 13.1.0 13.1.0 - - HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot) - 1711 - 49 - 8192 - True - 15,971 ± 3,654 / 3,609 ± 1,197 - 4.00 - 31.84 ± 3.39 / 28.66 ± 1.77 - 1.56 ± 3.25 / 28.78 ± 2.60 - 5.05 ± 1.34 / 43.99 ± 4.14 - 40.55 ± 0.77 / 48.56 ± 0.95 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - Qwen/Qwen1.5-1.8B-Chat (few-shot) 1837 @@ -2727,7 +2710,7 @@ title: Dutch NLU 🇳🇱 32768 False 8,304 ± 1,846 / 1,933 ± 617 - 4.00 + 3.98 23.44 ± 5.09 / 25.00 ± 2.33 6.82 ± 1.82 / 30.97 ± 2.65 4.11 ± 1.73 / 43.70 ± 3.47 @@ -2738,38 +2721,21 @@ title: Dutch NLU 🇳🇱 12.5.0 - ibm-granite/granite-3.0-1b-a400m-base (few-shot) - 1385 + HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot) + 1711 49 - 4096 - True - 7,808 ± 2,183 / 1,289 ± 428 - 4.02 - 12.76 ± 7.37 / 14.65 ± 5.86 - 9.35 ± 1.70 / 31.57 ± 6.23 - 0.69 ± 1.52 / 44.03 ± 3.52 - 37.71 ± 0.79 / 47.08 ± 1.12 - 13.2.0 - 13.2.0 - 13.2.0 - 13.2.0 - - - PleIAs/Pleias-Nano (few-shot) - 1195 - 66 - 2048 + 8192 True - 2,519 ± 841 / 323 ± 104 - 4.04 - 23.58 ± 4.08 / 26.01 ± 4.43 - 7.90 ± 3.56 / 33.04 ± 3.49 - 1.79 ± 1.38 / 40.53 ± 3.24 - 26.11 ± 2.05 / 35.07 ± 1.64 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 + 15,971 ± 3,654 / 3,609 ± 1,197 + 4.00 + 31.84 ± 3.39 / 28.66 ± 1.77 + 1.56 ± 3.25 / 28.78 ± 2.60 + 5.05 ± 1.34 / 43.99 ± 4.14 + 40.55 ± 0.77 / 48.56 ± 0.95 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 3ebdola/Dialectal-Arabic-XLM-R-Base @@ -2778,7 +2744,7 @@ title: Dutch NLU 🇳🇱 512 True 12,783 ± 2,537 / 2,712 ± 885 - 4.08 + 4.03 44.46 ± 2.24 / 60.04 ± 1.09 8.39 ± 4.20 / 30.69 ± 2.83 2.07 ± 1.34 / 48.42 ± 1.31 @@ -2795,7 +2761,7 @@ title: Dutch NLU 🇳🇱 512 True 78,027 ± 15,466 / 17,064 ± 5,335 - 4.08 + 4.03 41.38 ± 2.82 / 56.29 ± 1.61 8.45 ± 2.80 / 29.85 ± 1.86 1.55 ± 1.97 / 49.24 ± 1.16 @@ -2805,6 +2771,23 @@ title: Dutch NLU 🇳🇱 0.0.0 0.0.0 + + ibm-granite/granite-3.0-1b-a400m-base (few-shot) + 1385 + 49 + 4096 + True + 7,808 ± 2,183 / 1,289 ± 428 + 4.04 + 12.76 ± 7.37 / 14.65 ± 5.86 + 9.35 ± 1.70 / 31.57 ± 6.23 + 0.69 ± 1.52 / 44.03 ± 3.52 + 37.71 ± 0.79 / 47.08 ± 1.12 + 13.2.0 + 13.2.0 + 13.2.0 + 13.2.0 + Qwen/Qwen1.5-0.5B-Chat (few-shot) 620 @@ -2812,7 +2795,7 @@ title: Dutch NLU 🇳🇱 32768 False 11,740 ± 3,000 / 2,209 ± 721 - 4.10 + 4.06 18.66 ± 4.43 / 17.56 ± 4.28 8.59 ± 3.20 / 29.65 ± 5.10 0.34 ± 2.02 / 43.92 ± 3.15 @@ -2822,6 +2805,23 @@ title: Dutch NLU 🇳🇱 12.1.0 12.5.0 + + PleIAs/Pleias-Nano (few-shot) + 1195 + 66 + 2048 + True + 2,519 ± 841 / 323 ± 104 + 4.07 + 23.58 ± 4.08 / 26.01 ± 4.43 + 7.90 ± 3.56 / 33.04 ± 3.49 + 1.79 ± 1.38 / 40.53 ± 3.24 + 26.11 ± 2.05 / 35.07 ± 1.64 + 14.0.4 + 14.1.2 + 14.1.2 + 14.0.4 + allenai/OLMo-7B-Twin-2T (few-shot) 6888 @@ -2863,7 +2863,7 @@ title: Dutch NLU 🇳🇱 32768 True 11,371 ± 2,924 / 2,122 ± 692 - 4.17 + 4.18 28.30 ± 3.90 / 28.67 ± 3.15 4.54 ± 2.76 / 26.53 ± 3.74 -0.42 ± 2.41 / 37.60 ± 3.89 @@ -2880,7 +2880,7 @@ title: Dutch NLU 🇳🇱 4096 True 4,438 ± 1,128 / 1,028 ± 346 - 4.18 + 4.19 22.50 ± 2.27 / 24.09 ± 2.40 6.04 ± 1.51 / 18.08 ± 2.09 -0.61 ± 1.30 / 46.51 ± 2.55 @@ -2897,7 +2897,7 @@ title: Dutch NLU 🇳🇱 32768 True 5,666 ± 1,328 / 1,256 ± 408 - 4.19 + 4.20 11.66 ± 6.46 / 15.15 ± 4.38 5.20 ± 1.78 / 35.43 ± 2.14 2.89 ± 1.91 / 41.36 ± 4.63 @@ -2914,7 +2914,7 @@ title: Dutch NLU 🇳🇱 8192 True 22,023 ± 6,203 / 3,675 ± 1,231 - 4.24 + 4.22 20.95 ± 2.02 / 25.63 ± 1.96 6.84 ± 1.76 / 27.74 ± 5.49 -1.50 ± 1.30 / 34.07 ± 0.45 @@ -2941,6 +2941,23 @@ title: Dutch NLU 🇳🇱 12.6.1 12.6.1 + + tiiuae/Falcon3-1B-Instruct (few-shot) + 1669 + 131 + 8192 + True + 9,270 ± 2,690 / 1,434 ± 437 + 4.26 + 28.25 ± 3.03 / 25.24 ± 2.38 + 3.73 ± 1.83 / 15.20 ± 2.26 + 0.76 ± 1.10 / 33.57 ± 0.34 + 19.08 ± 2.27 / 28.16 ± 2.64 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + HuggingFaceTB/SmolLM2-360M-Instruct (few-shot) 362 @@ -2948,7 +2965,7 @@ title: Dutch NLU 🇳🇱 8192 True 21,777 ± 6,115 / 3,617 ± 1,211 - 4.29 + 4.28 15.68 ± 5.54 / 22.21 ± 5.42 6.73 ± 2.20 / 27.67 ± 4.00 0.63 ± 1.05 / 43.48 ± 2.98 @@ -2982,7 +2999,7 @@ title: Dutch NLU 🇳🇱 2048 True 2,331 ± 787 / 301 ± 97 - 4.43 + 4.44 21.32 ± 2.14 / 22.20 ± 2.65 4.37 ± 2.16 / 18.06 ± 2.83 -0.19 ± 1.24 / 41.66 ± 3.35 @@ -2999,7 +3016,7 @@ title: Dutch NLU 🇳🇱 2176 True 8,536 ± 1,926 / 1,940 ± 619 - 4.45 + 4.46 22.58 ± 5.05 / 26.82 ± 3.69 4.92 ± 2.71 / 19.51 ± 4.22 -1.27 ± 1.85 / 41.38 ± 3.59 diff --git a/english-nlg.csv b/english-nlg.csv index a5285af5..bc078060 100644 --- a/english-nlg.csv +++ b/english-nlg.csv @@ -16,11 +16,11 @@ VAGOsolutions/SauerkrautLM-7b-LaserChat (few-shot),7242,32,4096,False,False,4413 "meta-llama/Llama-2-70b-hf (few-shot, val)",68977,32,4349,True,False,1892,1.53,72.38,60.98,43.12,74.5,71.63,54.29,74.35 "meta-llama/Meta-Llama-3-70B-Instruct (few-shot, val)",70554,128,8192,True,False,1673,1.53,81.3,66.18,38.1,53.31,70.35,72.39,83.86 "152334H/miqu-1-70b-sf (few-shot, val)",68977,32,33017,True,False,2126,1.56,71.83,62.99,39.97,64.42,71.27,64.27,77.6 -CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,1.57,78.35,67.62,46.5,63.2,70.22,60.4,60.49 VAGOsolutions/FC-SauerkrautLM-7b-beta (few-shot),-1,32,8192,False,False,2160,1.57,70.15,69.36,37.09,77.39,69.72,50.1,77.27 google/gemma-2-9b-it (few-shot),9242,256,8448,True,False,2062,1.57,58.07,68.4,51.58,62.03,69.72,63.99,75.87 -mistralai/Mistral-Nemo-Instruct-2407 (few-shot),12248,131,1024256,True,False,7095,1.57,75.24,63.05,44.75,67.7,70.56,57.55,67.97 +CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,1.58,78.35,67.62,46.5,63.2,70.22,60.4,60.49 "gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,1.58,77.38,66.75,52.43,41.03,68.73,66.63,79.95 +mistralai/Mistral-Nemo-Instruct-2407 (few-shot),12248,131,1024256,True,False,7095,1.58,75.24,63.05,44.75,67.7,70.56,57.55,67.97 "gpt-4o-mini-2024-07-18 (zero-shot, val)",-1,200,8191,True,False,908,1.61,75.8,61.65,47.74,56.98,68.6,63.18,77.31 Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,1.65,72.77,70.12,44.68,57.17,69.32,47.17,77.25 mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,1.65,72.4,63.46,35.86,68.42,69.38,53.42,78.36 @@ -33,28 +33,28 @@ CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,1.75,67.33, ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,1.76,66.17,68.03,39.76,71.21,69.3,49.99,52.45 mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot),46703,32,32768,True,False,5535,1.76,70.02,69.48,44.59,55.7,68.79,55.61,57.52 "VAGOsolutions/SauerkrautLM-7b-HerO (few-shot, val)",7242,32,32768,False,True,2477,1.77,69.26,68.63,29.87,60.92,70.2,49.51,71.47 -meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131328,True,False,1005,1.77,76.95,68.12,34.34,47.88,69.57,56.62,69.03 +meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131328,True,False,1473,1.77,76.95,68.12,34.34,47.88,69.57,56.62,69.03 nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,1.77,67.52,69.03,40.51,58.12,67.86,55.93,64.11 01-ai/Yi-1.5-6B (few-shot),6061,64,4352,True,False,2867,1.78,55.51,68.74,40.81,72.33,67.71,48.36,68.5 "meta-llama/Llama-2-70b-chat-hf (few-shot, val)",68977,32,4096,True,False,1979,1.81,72.8,63.76,28.37,64.7,71.04,47.0,61.56 -meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1007,1.81,75.02,67.64,32.29,54.84,69.28,53.77,57.64 +meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1483,1.82,75.02,67.64,32.29,54.84,69.28,53.77,57.64 NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,1.83,63.77,69.23,38.49,57.03,70.47,54.58,47.93 mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,1.83,65.31,68.87,43.07,63.97,68.68,56.64,33.42 robinsmits/Qwen1.5-7B-Dutch-Chat-Sft-Bf16 (few-shot),7719,152,32768,False,False,2413,1.85,67.52,63.1,37.75,64.88,68.45,47.66,63.35 meta-llama/Llama-3.1-8B (few-shot),8030,128,131328,True,False,2986,1.87,69.86,66.76,30.96,71.39,67.93,52.47,43.95 "mlabonne/NeuralBeagle14-7B (few-shot, val)",7242,32,8192,False,True,2549,1.87,69.16,63.85,28.4,52.69,70.55,51.74,71.96 CohereForAI/c4ai-command-r-v01 (few-shot),34981,256,8192,False,False,1919,1.88,71.96,67.26,40.81,49.79,68.97,50.59,58.04 -"cstr/Spaetzle-v8-7b (few-shot, val)",7242,32,32768,False,True,5980,1.88,69.4,65.39,26.69,49.74,71.02,52.23,74.61 +"cstr/Spaetzle-v8-7b (few-shot, val)",7242,32,32768,False,True,5980,1.89,69.4,65.39,26.69,49.74,71.02,52.23,74.61 neuralmagic/Sparse-Llama-3.1-8B-2of4 (few-shot),8030,128,131072,True,False,2996,1.89,59.28,68.49,21.95,78.16,69.54,46.35,60.61 senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,1.89,70.62,67.78,30.99,49.56,70.76,44.11,69.2 microsoft/Phi-3-mini-128k-instruct (few-shot),3821,32,131072,True,False,7312,1.92,64.09,46.77,31.62,71.25,69.54,57.66,72.26 -mistralai/Mistral-7B-v0.1 (few-shot),7242,32,32768,True,False,1446,1.94,63.4,68.17,30.92,73.45,69.11,47.74,34.96 -alpindale/Mistral-7B-v0.2-hf (few-shot),7242,32,32768,True,False,1841,1.95,61.02,67.1,29.82,73.5,69.02,47.13,35.88 -meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1335,1.95,66.31,64.3,28.18,70.38,67.9,52.54,41.19 -mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,1.95,61.02,67.29,30.1,73.59,69.04,47.63,35.63 -ibm-granite/granite-3.0-2b-base (few-shot),2534,49,4352,True,False,10187,1.96,57.78,67.81,22.81,72.9,67.33,37.8,67.23 -"mlabonne/AlphaMonarch-7B (few-shot, val)",7242,32,8192,False,True,5340,1.96,69.19,63.77,28.43,44.39,69.77,46.53,71.36 -CohereForAI/aya-23-8B (few-shot),8028,256,8192,False,False,2707,1.97,56.16,68.27,23.82,74.23,72.14,33.04,55.1 +meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1477,1.95,66.31,64.3,28.18,70.38,67.9,52.54,41.19 +mistralai/Mistral-7B-v0.1 (few-shot),7242,32,32768,True,False,1446,1.95,63.4,68.17,30.92,73.45,69.11,47.74,34.96 +alpindale/Mistral-7B-v0.2-hf (few-shot),7242,32,32768,True,False,1841,1.96,61.02,67.1,29.82,73.5,69.02,47.13,35.88 +mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,1.96,61.02,67.29,30.1,73.59,69.04,47.63,35.63 +ibm-granite/granite-3.0-2b-base (few-shot),2534,49,4352,True,False,10187,1.97,57.78,67.81,22.81,72.9,67.33,37.8,67.23 +"mlabonne/AlphaMonarch-7B (few-shot, val)",7242,32,8192,False,True,5340,1.97,69.19,63.77,28.43,44.39,69.77,46.53,71.36 +CohereForAI/aya-23-8B (few-shot),8028,256,8192,False,False,2707,1.98,56.16,68.27,23.82,74.23,72.14,33.04,55.1 ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4096,True,False,2515,1.98,55.76,66.89,36.6,67.55,66.51,46.22,52.23 ibm-granite/granite-3.0-2b-instruct (few-shot),2634,49,4352,True,False,10194,2.0,61.97,67.54,31.7,59.78,68.61,39.82,57.51 occiglot/occiglot-7b-de-en-instruct (few-shot),7242,32,32768,False,False,1584,2.0,60.9,66.54,23.6,75.14,68.96,42.53,41.79 @@ -63,8 +63,8 @@ meta-llama/Llama-3.2-3B-Instruct (few-shot),3213,128,131328,False,False,10424,2. google/gemma-7b (few-shot),8538,256,8320,True,False,1378,2.04,47.2,63.88,35.75,69.4,69.25,50.57,35.79 Qwen/Qwen1.5-4B (few-shot),3950,152,32768,True,False,3248,2.06,55.45,60.55,28.6,70.49,68.67,39.82,51.82 "claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,2.06,82.11,67.01,51.09,52.41,67.73,26.14,13.56 -timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,2.09,59.1,68.41,25.43,71.89,67.99,44.09,32.29 mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32768,False,False,634,2.1,57.58,61.44,34.92,65.38,69.62,38.4,35.72 +timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,2.1,59.1,68.41,25.43,71.89,67.99,44.09,32.29 utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.1,44.81,62.54,28.1,71.71,69.27,45.95,43.97 RuterNorway/Llama-2-13b-chat-norwegian (few-shot),-1,32,4096,False,False,3254,2.11,64.93,64.14,28.08,62.09,68.84,36.49,38.09 occiglot/occiglot-7b-de-en (few-shot),7242,32,32768,True,False,1992,2.11,56.07,65.29,25.78,73.13,68.76,41.47,32.75 @@ -72,75 +72,76 @@ Qwen/Qwen1.5-4B-Chat (few-shot),3950,152,32768,False,False,4347,2.13,58.56,59.62 meta-llama/Llama-2-13b-chat-hf (few-shot),13016,32,4320,True,False,2849,2.13,68.75,62.37,25.07,61.56,69.4,38.0,42.14 meta-llama/Llama-2-13b-hf (few-shot),13016,32,4096,True,False,2898,2.16,63.75,61.85,26.41,73.48,67.73,38.04,28.16 mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,2.17,62.11,59.91,30.66,58.27,69.75,34.93,44.91 -nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,2.17,72.63,65.74,43.43,0.0,66.76,60.0,69.36 google/gemma-7b-it (few-shot),8538,256,8445,False,False,1792,2.18,66.7,55.62,31.36,72.58,67.24,35.27,32.54 +nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,2.18,72.63,65.74,43.43,0.0,66.76,60.0,69.36 occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,2.18,56.9,62.1,20.17,75.29,69.63,38.48,27.67 -microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,2047,True,False,3194,2.19,68.69,66.77,42.14,63.71,45.05,56.92,55.78 +microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,2047,True,False,8681,2.19,68.69,66.77,42.14,63.71,45.05,56.92,55.78 google/gemma-2-2b-it (few-shot),2614,256,8448,True,False,5374,2.2,44.36,66.37,34.69,55.07,68.09,42.89,45.52 -danish-foundation-models/munin-7b-v0.1dev0 (few-shot),7242,32,8192,True,False,6113,2.21,56.38,66.04,22.15,71.32,68.13,35.47,29.25 -ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,2.28,52.79,65.92,16.74,64.92,65.5,33.84,49.84 +danish-foundation-models/munin-7b-v0.1dev0 (few-shot),7242,32,8192,True,False,6113,2.22,56.38,66.04,22.15,71.32,68.13,35.47,29.25 meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4096,False,False,2643,2.28,62.53,62.23,22.71,64.45,69.95,30.47,30.18 -occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,2.29,55.37,63.32,18.92,72.38,68.61,37.04,23.54 +ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,2.29,52.79,65.92,16.74,64.92,65.5,33.84,49.84 ibm-granite/granite-8b-code-base-4k (few-shot),8055,49,4096,True,False,2313,2.3,72.76,62.35,21.57,69.8,67.73,25.63,16.44 ibm-granite/granite-8b-code-base (few-shot),8055,49,4351,True,False,1002,2.3,72.64,62.31,22.38,69.84,67.71,25.41,16.5 -ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,2.31,53.21,65.98,7.26,64.71,68.81,36.5,46.95 -ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,2.33,49.44,66.65,12.56,63.29,66.38,32.06,58.21 -meta-llama/Llama-3.2-3B (few-shot),3213,128,131328,True,False,3713,2.34,59.09,63.29,13.5,68.15,67.73,40.1,20.88 -stabilityai/stablelm-2-1_6b (few-shot),1645,100,4096,True,False,7259,2.34,47.5,64.69,8.01,71.81,69.71,18.92,54.53 +occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,2.3,55.37,63.32,18.92,72.38,68.61,37.04,23.54 +ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,2.32,53.21,65.98,7.26,64.71,68.81,36.5,46.95 +ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,2.34,49.44,66.65,12.56,63.29,66.38,32.06,58.21 ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,2.35,72.59,61.61,18.37,66.68,68.41,24.14,14.42 +meta-llama/Llama-3.2-3B (few-shot),3213,128,131328,True,False,3713,2.35,59.09,63.29,13.5,68.15,67.73,40.1,20.88 +stabilityai/stablelm-2-1_6b (few-shot),1645,100,4096,True,False,7259,2.35,47.5,64.69,8.01,71.81,69.71,18.92,54.53 meta-llama/Llama-2-7b-hf (few-shot),6738,32,4096,True,False,930,2.37,55.27,65.16,20.43,69.82,68.82,25.98,11.77 -microsoft/phi-2 (few-shot),2780,51,2048,True,False,3472,2.37,49.16,62.41,12.31,75.79,67.79,40.15,23.21 +microsoft/phi-2 (few-shot),2780,51,2048,True,False,3472,2.38,49.16,62.41,12.31,75.79,67.79,40.15,23.21 NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,2.4,63.12,66.47,38.82,29.16,57.65,54.57,30.13 allenai/OLMo-1.7-7B-hf (few-shot),6888,50,4096,True,False,3371,2.41,41.02,66.43,5.17,76.04,70.28,34.92,18.73 Rijgersberg/GEITje-7B (few-shot),7242,32,32768,True,False,5887,2.44,53.39,65.21,12.63,65.74,68.05,31.65,17.69 HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,2.49,47.58,66.78,20.53,58.07,62.45,32.9,25.32 -ibm-granite/granite-7b-base (few-shot),6738,32,2048,True,False,4405,2.53,47.76,66.41,5.76,70.34,65.88,29.83,18.44 +ibm-granite/granite-7b-base (few-shot),6738,32,2048,True,False,4405,2.54,47.76,66.41,5.76,70.34,65.88,29.83,18.44 MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,2.59,47.2,64.82,7.57,73.88,67.34,16.59,11.97 Qwen/Qwen1.5-1.8B (few-shot),1837,152,32768,True,False,5666,2.59,37.22,64.34,15.3,64.41,68.15,27.24,22.84 "claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,2.6,74.35,31.19,21.76,45.7,67.96,28.8,42.52 -NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,2.61,76.84,67.91,30.61,0.1,58.37,48.18,34.99 +NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,2.62,76.84,67.91,30.61,0.1,58.37,48.18,34.99 NbAiLab/nb-llama-3.1-8B (few-shot),8030,128,131072,True,False,1297,2.62,62.9,62.26,24.37,25.93,63.31,40.9,16.59 ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,2.62,58.3,59.01,10.33,65.04,67.46,14.1,10.67 -google/gemma-2-2b (few-shot),2614,256,8448,True,False,5235,2.66,30.82,66.28,10.09,64.96,67.13,35.14,11.26 HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,2.67,41.57,62.32,8.04,56.01,65.06,34.02,22.81 +google/gemma-2-2b (few-shot),2614,256,8448,True,False,5235,2.67,30.82,66.28,10.09,64.96,67.13,35.14,11.26 Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,2.7,40.89,55.33,11.23,60.69,67.23,26.84,23.89 -meta-llama/Llama-3.2-1B-Instruct (few-shot),1236,128,131328,False,False,7436,2.71,56.41,59.46,8.36,47.26,66.61,25.59,18.56 +meta-llama/Llama-3.2-1B-Instruct (few-shot),1236,128,131328,False,False,7436,2.72,56.41,59.46,8.36,47.26,66.61,25.59,18.56 AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot),20918,64,2048,True,False,1831,2.73,39.21,65.58,7.82,72.25,64.07,14.98,13.55 -ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,2.74,60.64,61.2,7.63,69.83,56.62,16.29,10.37 +ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,2.75,60.64,61.2,7.63,69.83,56.62,16.29,10.37 +tiiuae/Falcon3-1B-Instruct (few-shot),1669,131,8192,True,False,9270,2.75,43.0,54.47,17.44,53.15,67.33,20.49,17.5 ibm-granite/granite-3.0-1b-a400m-instruct (few-shot),1335,49,4096,True,False,7964,2.79,46.26,63.47,13.17,59.32,66.43,9.68,4.99 TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot),7800,100,4096,True,False,6197,2.84,45.89,59.29,9.11,66.74,68.17,0.92,0.61 google/gemma-2b (few-shot),2506,256,8192,True,False,6087,2.89,19.65,62.14,8.3,66.3,66.51,20.38,7.41 -VAGOsolutions/SauerkrautLM-Gemma-2b (few-shot),2506,256,8192,False,False,3607,2.9,23.28,61.91,6.92,64.68,65.15,22.65,14.54 -google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,2.91,40.05,48.83,5.83,63.77,67.28,18.21,10.84 +VAGOsolutions/SauerkrautLM-Gemma-2b (few-shot),2506,256,8192,False,False,3607,2.91,23.28,61.91,6.92,64.68,65.15,22.65,14.54 +google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,2.92,40.05,48.83,5.83,63.77,67.28,18.21,10.84 utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,2.97,37.47,58.61,5.3,63.26,67.24,3.93,1.26 LumiOpen/Viking-13B (few-shot),14030,131,4352,True,False,840,3.01,42.78,59.9,5.68,58.52,64.83,1.63,0.54 -openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,3.04,50.73,27.52,2.96,63.42,68.74,16.65,10.69 -NorwAI/NorwAI-Llama2-7B (few-shot),7033,68,4224,True,False,4438,3.06,38.51,63.6,2.23,45.44,67.11,5.02,2.08 +openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,3.05,50.73,27.52,2.96,63.42,68.74,16.65,10.69 +NorwAI/NorwAI-Llama2-7B (few-shot),7033,68,4224,True,False,4438,3.07,38.51,63.6,2.23,45.44,67.11,5.02,2.08 Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,3.07,37.51,57.15,2.94,42.57,65.22,18.24,10.89 -NbAiLab/nb-llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1296,3.11,40.91,47.12,6.03,51.34,66.65,4.91,1.21 Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,3.11,33.86,55.41,1.15,53.34,65.81,11.66,5.22 -meta-llama/Llama-3.2-1B (few-shot),1236,128,131328,True,False,7577,3.11,35.08,54.4,2.97,58.3,62.19,7.91,1.6 +NbAiLab/nb-llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1296,3.12,40.91,47.12,6.03,51.34,66.65,4.91,1.21 ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,3.12,29.84,64.13,3.99,55.74,64.55,1.42,1.42 -openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.12,44.48,23.69,8.52,56.97,68.81,15.31,9.35 +meta-llama/Llama-3.2-1B (few-shot),1236,128,131328,True,False,7577,3.12,35.08,54.4,2.97,58.3,62.19,7.91,1.6 +openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.13,44.48,23.69,8.52,56.97,68.81,15.31,9.35 NorwAI/NorwAI-Mistral-7B (few-shot),7537,68,4224,True,False,3035,3.14,35.84,56.87,3.08,52.77,63.96,7.42,2.24 -AI-Sweden-Models/gpt-sw3-20b (few-shot),20918,64,2048,True,False,1875,3.2,45.86,62.08,6.62,65.29,43.45,9.1,8.35 +AI-Sweden-Models/gpt-sw3-20b (few-shot),20918,64,2048,True,False,1875,3.21,45.86,62.08,6.62,65.29,43.45,9.1,8.35 allenai/OLMo-7B (few-shot),6888,50,2304,True,False,5403,3.27,38.23,60.7,-0.19,61.93,51.32,5.0,1.1 state-spaces/mamba-2.8b-hf (few-shot),2768,50,33024,True,False,2722,3.31,28.63,66.55,1.47,35.0,62.05,-0.41,-0.04 -HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,3.36,30.73,59.51,1.55,49.03,57.73,0.11,-0.06 HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,3.36,31.14,43.97,3.49,47.91,62.2,0.12,0.13 +HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,3.37,30.73,59.51,1.55,49.03,57.73,0.11,-0.06 allenai/OLMo-7B-Twin-2T (few-shot),6888,50,2304,True,False,5484,3.49,25.36,56.91,7.1,58.6,46.16,0.71,0.82 -allenai/OLMo-1B (few-shot),1177,50,2304,True,False,8536,3.61,26.47,60.05,0.72,43.87,46.18,-0.87,0.2 +allenai/OLMo-1B (few-shot),1177,50,2304,True,False,8536,3.62,26.47,60.05,0.72,43.87,46.18,-0.87,0.2 PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,3.69,40.45,47.89,0.28,26.77,50.31,1.03,-0.3 -RuterNorway/Llama-2-7b-chat-norwegian (few-shot),-1,32,4096,False,False,10890,3.78,18.69,21.95,0.01,36.51,60.11,3.71,0.62 -PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,3.8,21.6,45.04,-0.46,33.46,49.12,2.37,-0.33 -PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.84,27.37,36.35,-0.37,7.42,60.89,2.21,-0.9 -HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,3.99,29.96,18.64,1.85,26.9,52.96,1.34,0.1 -HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.04,31.26,26.69,1.78,13.88,52.05,1.51,-0.76 +RuterNorway/Llama-2-7b-chat-norwegian (few-shot),-1,32,4096,False,False,10890,3.79,18.69,21.95,0.01,36.51,60.11,3.71,0.62 +PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,3.81,21.6,45.04,-0.46,33.46,49.12,2.37,-0.33 +PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.85,27.37,36.35,-0.37,7.42,60.89,2.21,-0.9 +HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.0,29.96,18.64,1.85,26.9,52.96,1.34,0.1 +HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.05,31.26,26.69,1.78,13.88,52.05,1.51,-0.76 NbAiLab/nb-llama-3.2-3B (few-shot),3213,128,131072,True,False,1880,4.11,0.02,60.98,0.0,9.94,41.1,12.17,0.62 -NbAiLab/nb-llama-3.2-1B (few-shot),1236,128,131072,True,False,3424,4.18,3.98,39.54,3.41,26.96,43.27,2.45,-0.84 -PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.21,31.79,19.13,-0.03,12.35,46.13,-1.2,0.47 +NbAiLab/nb-llama-3.2-1B (few-shot),1236,128,131072,True,False,3424,4.19,3.98,39.54,3.41,26.96,43.27,2.45,-0.84 +PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.22,31.79,19.13,-0.03,12.35,46.13,-1.2,0.47 PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.22,27.45,27.39,0.31,15.62,43.83,0.65,-0.44 -RJuro/kanelsnegl-v0.1 (few-shot),7242,32,512,True,False,5847,4.52,0.0,0.0,0.41,0.0,61.26,0.0,0.36 -NorwAI/NorwAI-Mistral-7B-pretrain (few-shot),7537,68,4224,True,False,3024,4.65,12.34,-1.48,-0.48,0.72,49.61,-0.12,-0.01 -ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.8,0.0,0.0,2.48,0.01,44.8,-0.69,0.37 -ai-forever/mGPT (few-shot),-1,100,1024,True,False,11734,4.9,1.55,3.71,-0.42,5.58,34.62,0.37,-0.17 +RJuro/kanelsnegl-v0.1 (few-shot),7242,32,512,True,False,5847,4.53,0.0,0.0,0.41,0.0,61.26,0.0,0.36 +NorwAI/NorwAI-Mistral-7B-pretrain (few-shot),7537,68,4224,True,False,3024,4.66,12.34,-1.48,-0.48,0.72,49.61,-0.12,-0.01 +ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.81,0.0,0.0,2.48,0.01,44.8,-0.69,0.37 +ai-forever/mGPT (few-shot),-1,100,1024,True,False,11734,4.91,1.55,3.71,-0.42,5.58,34.62,0.37,-0.17 diff --git a/english-nlg.md b/english-nlg.md index 9fafac9f..da5d6d88 100644 --- a/english-nlg.md +++ b/english-nlg.md @@ -3,7 +3,7 @@ layout: leaderboard title: English NLG 🇬🇧 --- -
Last updated: 10/01/2025 12:30:44 CET
+
Last updated: 11/01/2025 11:03:54 CET
@@ -431,29 +431,6 @@ title: English NLG 🇬🇧 12.7.0 12.7.0 - - CohereForAI/c4ai-command-r-08-2024 (few-shot) - 32296 - 256 - 131072 - False - 1,909 ± 646 / 248 ± 84 - 1.57 - 78.35 ± 1.42 / 68.56 ± 1.40 - 67.62 ± 0.95 / 70.27 ± 1.06 - 46.50 ± 1.03 / 71.67 ± 0.79 - 63.20 ± 1.15 / 80.12 ± 0.66 - 70.22 ± 0.23 / 26.49 ± 0.34 - 60.40 ± 0.83 / 70.22 ± 0.62 - 60.49 ± 2.10 / 69.11 ± 1.87 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - VAGOsolutions/FC-SauerkrautLM-7b-beta (few-shot) unknown @@ -501,27 +478,27 @@ title: English NLG 🇬🇧 13.0.0 - mistralai/Mistral-Nemo-Instruct-2407 (few-shot) - 12248 - 131 - 1024256 - True - 7,095 ± 2,193 / 1,063 ± 344 - 1.57 - 75.24 ± 0.84 / 67.78 ± 2.01 - 63.05 ± 1.41 / 70.34 ± 0.52 - 44.75 ± 2.73 / 71.43 ± 1.71 - 67.70 ± 1.57 / 83.88 ± 1.27 - 70.56 ± 0.58 / 25.57 ± 0.59 - 57.55 ± 1.00 / 68.02 ± 0.70 - 67.97 ± 1.89 / 75.51 ± 1.48 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 + CohereForAI/c4ai-command-r-08-2024 (few-shot) + 32296 + 256 + 131072 + False + 1,909 ± 646 / 248 ± 84 + 1.58 + 78.35 ± 1.42 / 68.56 ± 1.40 + 67.62 ± 0.95 / 70.27 ± 1.06 + 46.50 ± 1.03 / 71.67 ± 0.79 + 63.20 ± 1.15 / 80.12 ± 0.66 + 70.22 ± 0.23 / 26.49 ± 0.34 + 60.40 ± 0.83 / 70.22 ± 0.62 + 60.49 ± 2.10 / 69.11 ± 1.87 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 gpt-4o-mini-2024-07-18 (few-shot, val) @@ -546,6 +523,29 @@ title: English NLG 🇬🇧 14.0.1 14.0.1 + + mistralai/Mistral-Nemo-Instruct-2407 (few-shot) + 12248 + 131 + 1024256 + True + 7,095 ± 2,193 / 1,063 ± 344 + 1.58 + 75.24 ± 0.84 / 67.78 ± 2.01 + 63.05 ± 1.41 / 70.34 ± 0.52 + 44.75 ± 2.73 / 71.43 ± 1.71 + 67.70 ± 1.57 / 83.88 ± 1.27 + 70.56 ± 0.58 / 25.57 ± 0.59 + 57.55 ± 1.00 / 68.02 ± 0.70 + 67.97 ± 1.89 / 75.51 ± 1.48 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + gpt-4o-mini-2024-07-18 (zero-shot, val) unknown @@ -828,7 +828,7 @@ title: English NLG 🇬🇧 128 131328 True - 1,005 ± 330 / 196 ± 74 + 1,473 ± 377 / 283 ± 96 1.77 76.95 ± 0.95 / 72.47 ± 0.82 68.12 ± 0.92 / 72.48 ± 0.53 @@ -920,8 +920,8 @@ title: English NLG 🇬🇧 128 8192 True - 1,007 ± 316 / 162 ± 45 - 1.81 + 1,483 ± 377 / 287 ± 97 + 1.82 75.02 ± 1.31 / 69.47 ± 1.18 67.64 ± 1.12 / 71.04 ± 1.17 32.29 ± 3.05 / 64.85 ± 2.07 @@ -1082,7 +1082,7 @@ title: English NLG 🇬🇧 32768 False 5,980 ± 1,031 / 1,714 ± 552 - 1.88 + 1.89 69.40 ± 1.47 / 54.63 ± 3.27 65.39 ± 2.32 / 73.11 ± 1.62 26.69 ± 3.88 / 62.82 ± 2.19 @@ -1167,6 +1167,29 @@ title: English NLG 🇬🇧 12.10.0 12.10.0 + + meta-llama/Meta-Llama-3-8B (few-shot) + 8030 + 128 + 8192 + True + 1,477 ± 376 / 285 ± 97 + 1.95 + 66.31 ± 2.09 / 58.68 ± 1.95 + 64.30 ± 0.65 / 69.26 ± 0.50 + 28.18 ± 3.96 / 58.97 ± 4.03 + 70.38 ± 3.51 / 82.95 ± 2.38 + 67.90 ± 0.49 / 21.54 ± 0.57 + 52.54 ± 0.88 / 64.26 ± 0.66 + 41.19 ± 4.40 / 54.78 ± 3.62 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + mistralai/Mistral-7B-v0.1 (few-shot) 7242 @@ -1174,7 +1197,7 @@ title: English NLG 🇬🇧 32768 True 1,446 ± 354 / 295 ± 100 - 1.94 + 1.95 63.40 ± 2.72 / 56.92 ± 2.17 68.17 ± 1.33 / 70.74 ± 0.93 30.92 ± 4.81 / 63.79 ± 4.42 @@ -1197,7 +1220,7 @@ title: English NLG 🇬🇧 32768 True 1,841 ± 297 / 651 ± 193 - 1.95 + 1.96 61.02 ± 2.70 / 55.57 ± 2.50 67.10 ± 0.81 / 70.66 ± 0.76 29.82 ± 5.18 / 62.86 ± 4.72 @@ -1213,29 +1236,6 @@ title: English NLG 🇬🇧 12.5.2 12.5.2 - - meta-llama/Meta-Llama-3-8B (few-shot) - 8030 - 128 - 8192 - True - 1,335 ± 338 / 260 ± 88 - 1.95 - 66.31 ± 2.09 / 58.68 ± 1.95 - 64.30 ± 0.65 / 69.26 ± 0.50 - 28.18 ± 3.96 / 58.97 ± 4.03 - 70.38 ± 3.51 / 82.95 ± 2.38 - 67.90 ± 0.49 / 21.54 ± 0.57 - 52.54 ± 0.88 / 64.26 ± 0.66 - 41.19 ± 4.40 / 54.78 ± 3.62 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - mistralai/Mistral-7B-v0.3 (few-shot) 7248 @@ -1243,7 +1243,7 @@ title: English NLG 🇬🇧 32768 True 1,364 ± 343 / 266 ± 90 - 1.95 + 1.96 61.02 ± 2.74 / 55.65 ± 2.55 67.29 ± 0.80 / 70.81 ± 0.84 30.10 ± 5.12 / 62.99 ± 4.71 @@ -1266,7 +1266,7 @@ title: English NLG 🇬🇧 4352 True 10,187 ± 2,363 / 2,204 ± 737 - 1.96 + 1.97 57.78 ± 1.91 / 50.30 ± 2.29 67.81 ± 1.00 / 61.89 ± 1.51 22.81 ± 2.71 / 60.07 ± 2.49 @@ -1289,7 +1289,7 @@ title: English NLG 🇬🇧 8192 False 5,340 ± 1,262 / 1,157 ± 375 - 1.96 + 1.97 69.19 ± 2.03 / 55.64 ± 3.53 63.77 ± 2.55 / 71.13 ± 1.83 28.43 ± 3.97 / 62.28 ± 1.86 @@ -1312,7 +1312,7 @@ title: English NLG 🇬🇧 8192 False 2,707 ± 688 / 497 ± 166 - 1.97 + 1.98 56.16 ± 3.59 / 51.12 ± 2.58 68.27 ± 0.53 / 60.37 ± 0.57 23.82 ± 2.16 / 60.83 ± 1.63 @@ -1512,29 +1512,6 @@ title: English NLG 🇬🇧 14.0.3 14.0.3 - - timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot) - 7242 - 32 - 32768 - True - 5,054 ± 1,200 / 1,056 ± 339 - 2.09 - 59.10 ± 1.87 / 51.31 ± 1.87 - 68.41 ± 1.17 / 70.85 ± 0.74 - 25.43 ± 4.22 / 60.79 ± 3.45 - 71.89 ± 2.20 / 82.99 ± 1.78 - 67.99 ± 0.41 / 22.12 ± 0.52 - 44.09 ± 1.21 / 56.37 ± 0.96 - 32.29 ± 4.57 / 45.16 ± 4.28 - 12.5.3 - 12.5.3 - 12.5.3 - 12.5.3 - 12.5.3 - 12.5.3 - 12.5.3 - mistralai/Mistral-7B-Instruct-v0.1 (few-shot) 7242 @@ -1558,6 +1535,29 @@ title: English NLG 🇬🇧 9.3.1 9.3.1 + + timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot) + 7242 + 32 + 32768 + True + 5,054 ± 1,200 / 1,056 ± 339 + 2.10 + 59.10 ± 1.87 / 51.31 ± 1.87 + 68.41 ± 1.17 / 70.85 ± 0.74 + 25.43 ± 4.22 / 60.79 ± 3.45 + 71.89 ± 2.20 / 82.99 ± 1.78 + 67.99 ± 0.41 / 22.12 ± 0.52 + 44.09 ± 1.21 / 56.37 ± 0.96 + 32.29 ± 4.57 / 45.16 ± 4.28 + 12.5.3 + 12.5.3 + 12.5.3 + 12.5.3 + 12.5.3 + 12.5.3 + 12.5.3 + utter-project/EuroLLM-9B-Instruct (few-shot) 9152 @@ -1719,29 +1719,6 @@ title: English NLG 🇬🇧 9.3.2 9.3.2 - - nvidia/mistral-nemo-minitron-8b-instruct (few-shot) - 8414 - 131 - 8192 - True - 3,161 ± 676 / 1,247 ± 481 - 2.17 - 72.63 ± 1.76 / 63.75 ± 1.29 - 65.74 ± 1.14 / 69.65 ± 0.96 - 43.43 ± 1.62 / 69.19 ± 1.28 - 0.00 ± 0.00 / 20.81 ± 0.82 - 66.76 ± 0.07 / 17.17 ± 0.16 - 60.00 ± 0.90 / 69.95 ± 0.66 - 69.36 ± 1.38 / 76.63 ± 1.08 - 14.1.1 - 14.1.1 - 14.1.1 - 14.1.1 - 14.1.1 - 14.1.1 - 14.1.1 - google/gemma-7b-it (few-shot) 8538 @@ -1765,6 +1742,29 @@ title: English NLG 🇬🇧 12.10.0 12.10.0 + + nvidia/mistral-nemo-minitron-8b-instruct (few-shot) + 8414 + 131 + 8192 + True + 3,161 ± 676 / 1,247 ± 481 + 2.18 + 72.63 ± 1.76 / 63.75 ± 1.29 + 65.74 ± 1.14 / 69.65 ± 0.96 + 43.43 ± 1.62 / 69.19 ± 1.28 + 0.00 ± 0.00 / 20.81 ± 0.82 + 66.76 ± 0.07 / 17.17 ± 0.16 + 60.00 ± 0.90 / 69.95 ± 0.66 + 69.36 ± 1.38 / 76.63 ± 1.08 + 14.1.1 + 14.1.1 + 14.1.1 + 14.1.1 + 14.1.1 + 14.1.1 + 14.1.1 + occiglot/occiglot-7b-eu5-instruct (few-shot) 7242 @@ -1794,7 +1794,7 @@ title: English NLG 🇬🇧 32 2047 True - 3,194 ± 687 / 650 ± 216 + 8,681 ± 1,650 / 2,177 ± 717 2.19 68.69 ± 1.08 / 58.53 ± 1.82 66.77 ± 0.54 / 70.41 ± 0.88 @@ -1841,7 +1841,7 @@ title: English NLG 🇬🇧 8192 True 6,113 ± 1,044 / 1,790 ± 579 - 2.21 + 2.22 56.38 ± 2.95 / 50.80 ± 2.82 66.04 ± 1.68 / 65.21 ± 1.48 22.15 ± 3.57 / 57.71 ± 4.24 @@ -1858,35 +1858,12 @@ title: English NLG 🇬🇧 12.5.0 - ibm-granite/granite-3.0-3b-a800m-instruct (few-shot) - 3374 - 49 + meta-llama/Llama-2-7b-chat-hf (few-shot) + 6738 + 32 4096 - True - 10,246 ± 3,021 / 1,629 ± 550 - 2.28 - 52.79 ± 4.09 / 43.45 ± 2.82 - 65.92 ± 1.02 / 70.47 ± 0.75 - 16.74 ± 2.67 / 55.45 ± 3.35 - 64.92 ± 2.84 / 80.88 ± 1.27 - 65.50 ± 0.97 / 21.90 ± 0.40 - 33.84 ± 0.87 / 50.07 ± 0.67 - 49.84 ± 1.22 / 61.87 ± 1.03 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - - - meta-llama/Llama-2-7b-chat-hf (few-shot) - 6738 - 32 - 4096 - False - 2,643 ± 455 / 800 ± 247 + False + 2,643 ± 455 / 800 ± 247 2.28 62.53 ± 1.35 / 53.42 ± 2.04 62.23 ± 1.29 / 68.09 ± 1.34 @@ -1904,27 +1881,27 @@ title: English NLG 🇬🇧 9.3.1 - occiglot/occiglot-7b-eu5 (few-shot) - 7242 - 32 - 32768 + ibm-granite/granite-3.0-3b-a800m-instruct (few-shot) + 3374 + 49 + 4096 True - 2,219 ± 427 / 717 ± 224 + 10,246 ± 3,021 / 1,629 ± 550 2.29 - 55.37 ± 2.94 / 51.08 ± 2.87 - 63.32 ± 1.29 / 68.50 ± 0.53 - 18.92 ± 2.39 / 57.96 ± 1.89 - 72.38 ± 2.57 / 83.46 ± 1.49 - 68.61 ± 0.55 / 23.48 ± 0.74 - 37.04 ± 1.33 / 52.33 ± 1.02 - 23.54 ± 2.08 / 40.78 ± 1.59 - 12.5.2 - 12.1.0 - 12.1.0 - 12.1.0 - 12.1.0 - 12.2.0 - 12.2.0 + 52.79 ± 4.09 / 43.45 ± 2.82 + 65.92 ± 1.02 / 70.47 ± 0.75 + 16.74 ± 2.67 / 55.45 ± 3.35 + 64.92 ± 2.84 / 80.88 ± 1.27 + 65.50 ± 0.97 / 21.90 ± 0.40 + 33.84 ± 0.87 / 50.07 ± 0.67 + 49.84 ± 1.22 / 61.87 ± 1.03 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 ibm-granite/granite-8b-code-base-4k (few-shot) @@ -1972,6 +1949,29 @@ title: English NLG 🇬🇧 12.10.5 12.10.5 + + occiglot/occiglot-7b-eu5 (few-shot) + 7242 + 32 + 32768 + True + 2,219 ± 427 / 717 ± 224 + 2.30 + 55.37 ± 2.94 / 51.08 ± 2.87 + 63.32 ± 1.29 / 68.50 ± 0.53 + 18.92 ± 2.39 / 57.96 ± 1.89 + 72.38 ± 2.57 / 83.46 ± 1.49 + 68.61 ± 0.55 / 23.48 ± 0.74 + 37.04 ± 1.33 / 52.33 ± 1.02 + 23.54 ± 2.08 / 40.78 ± 1.59 + 12.5.2 + 12.1.0 + 12.1.0 + 12.1.0 + 12.1.0 + 12.2.0 + 12.2.0 + ibm-granite/granite-7b-instruct (few-shot) 6738 @@ -1979,7 +1979,7 @@ title: English NLG 🇬🇧 4096 True 3,136 ± 558 / 942 ± 290 - 2.31 + 2.32 53.21 ± 1.85 / 45.68 ± 2.60 65.98 ± 1.15 / 64.68 ± 2.08 7.26 ± 2.04 / 52.39 ± 1.92 @@ -2002,7 +2002,7 @@ title: English NLG 🇬🇧 4096 True 10,504 ± 3,028 / 1,678 ± 559 - 2.33 + 2.34 49.44 ± 3.68 / 39.69 ± 2.34 66.65 ± 1.04 / 65.72 ± 1.32 12.56 ± 2.15 / 54.20 ± 3.42 @@ -2018,6 +2018,29 @@ title: English NLG 🇬🇧 13.0.0 13.0.0 + + ibm-granite/granite-8b-code-instruct-4k (few-shot) + 8055 + 49 + 4096 + True + 5,617 ± 995 / 1,623 ± 540 + 2.35 + 72.59 ± 0.91 / 65.83 ± 1.30 + 61.61 ± 1.45 / 67.09 ± 1.38 + 18.37 ± 2.07 / 56.26 ± 2.62 + 66.68 ± 3.56 / 78.95 ± 2.38 + 68.41 ± 0.33 / 24.66 ± 0.47 + 24.14 ± 0.58 / 42.17 ± 0.33 + 14.42 ± 2.00 / 34.50 ± 1.81 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + meta-llama/Llama-3.2-3B (few-shot) 3213 @@ -2025,7 +2048,7 @@ title: English NLG 🇬🇧 131328 True 3,713 ± 877 / 836 ± 267 - 2.34 + 2.35 59.09 ± 1.44 / 52.03 ± 1.96 63.29 ± 1.29 / 67.82 ± 0.74 13.50 ± 4.14 / 50.33 ± 5.61 @@ -2048,7 +2071,7 @@ title: English NLG 🇬🇧 4096 True 7,259 ± 2,120 / 1,240 ± 432 - 2.34 + 2.35 47.50 ± 1.70 / 41.85 ± 1.68 64.69 ± 1.33 / 57.60 ± 0.63 8.01 ± 1.97 / 51.78 ± 1.65 @@ -2064,29 +2087,6 @@ title: English NLG 🇬🇧 12.10.8 12.10.8 - - ibm-granite/granite-8b-code-instruct-4k (few-shot) - 8055 - 49 - 4096 - True - 5,617 ± 995 / 1,623 ± 540 - 2.35 - 72.59 ± 0.91 / 65.83 ± 1.30 - 61.61 ± 1.45 / 67.09 ± 1.38 - 18.37 ± 2.07 / 56.26 ± 2.62 - 66.68 ± 3.56 / 78.95 ± 2.38 - 68.41 ± 0.33 / 24.66 ± 0.47 - 24.14 ± 0.58 / 42.17 ± 0.33 - 14.42 ± 2.00 / 34.50 ± 1.81 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - meta-llama/Llama-2-7b-hf (few-shot) 6738 @@ -2117,7 +2117,7 @@ title: English NLG 🇬🇧 2048 True 3,472 ± 885 / 728 ± 239 - 2.37 + 2.38 49.16 ± 3.09 / 43.10 ± 2.58 62.41 ± 1.51 / 67.24 ± 1.18 12.31 ± 2.96 / 48.73 ± 5.08 @@ -2232,7 +2232,7 @@ title: English NLG 🇬🇧 2048 True 4,405 ± 1,098 / 1,032 ± 345 - 2.53 + 2.54 47.76 ± 2.72 / 44.84 ± 2.71 66.41 ± 0.85 / 65.96 ± 2.20 5.76 ± 1.50 / 50.36 ± 2.34 @@ -2324,7 +2324,7 @@ title: English NLG 🇬🇧 131072 True 1,220 ± 411 / 158 ± 53 - 2.61 + 2.62 76.84 ± 1.89 / 72.90 ± 1.50 67.91 ± 1.19 / 68.81 ± 1.37 30.61 ± 4.10 / 62.57 ± 4.17 @@ -2386,29 +2386,6 @@ title: English NLG 🇬🇧 13.0.0 13.0.0 - - google/gemma-2-2b (few-shot) - 2614 - 256 - 8448 - True - 5,235 ± 1,226 / 1,154 ± 366 - 2.66 - 30.82 ± 4.46 / 27.49 ± 3.87 - 66.28 ± 0.84 / 65.60 ± 1.23 - 10.09 ± 2.52 / 48.92 ± 3.34 - 64.96 ± 3.77 / 78.21 ± 2.16 - 67.13 ± 0.54 / 21.94 ± 0.39 - 35.14 ± 1.07 / 51.22 ± 0.84 - 11.26 ± 1.35 / 31.40 ± 1.40 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - HuggingFaceTB/SmolLM2-1.7B (few-shot) 1711 @@ -2432,6 +2409,29 @@ title: English NLG 🇬🇧 13.1.0 13.1.0 + + google/gemma-2-2b (few-shot) + 2614 + 256 + 8448 + True + 5,235 ± 1,226 / 1,154 ± 366 + 2.67 + 30.82 ± 4.46 / 27.49 ± 3.87 + 66.28 ± 0.84 / 65.60 ± 1.23 + 10.09 ± 2.52 / 48.92 ± 3.34 + 64.96 ± 3.77 / 78.21 ± 2.16 + 67.13 ± 0.54 / 21.94 ± 0.39 + 35.14 ± 1.07 / 51.22 ± 0.84 + 11.26 ± 1.35 / 31.40 ± 1.40 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + Qwen/Qwen1.5-1.8B-Chat (few-shot) 1837 @@ -2462,7 +2462,7 @@ title: English NLG 🇬🇧 131328 False 7,436 ± 1,846 / 1,508 ± 479 - 2.71 + 2.72 56.41 ± 1.79 / 52.05 ± 1.57 59.46 ± 1.16 / 65.61 ± 1.08 8.36 ± 0.71 / 49.50 ± 2.90 @@ -2508,7 +2508,7 @@ title: English NLG 🇬🇧 2048 True 2,732 ± 868 / 662 ± 238 - 2.74 + 2.75 60.64 ± 2.11 / 55.14 ± 2.01 61.20 ± 1.16 / 61.92 ± 1.68 7.63 ± 2.79 / 46.39 ± 3.79 @@ -2524,6 +2524,29 @@ title: English NLG 🇬🇧 13.0.0 13.0.0 + + tiiuae/Falcon3-1B-Instruct (few-shot) + 1669 + 131 + 8192 + True + 9,270 ± 2,690 / 1,434 ± 437 + 2.75 + 43.00 ± 1.94 / 39.96 ± 1.87 + 54.47 ± 1.37 / 64.39 ± 0.56 + 17.44 ± 1.67 / 50.79 ± 2.44 + 53.15 ± 1.38 / 70.06 ± 0.79 + 67.33 ± 0.15 / 22.37 ± 0.27 + 20.49 ± 1.00 / 38.88 ± 0.87 + 17.50 ± 0.84 / 36.85 ± 0.65 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + ibm-granite/granite-3.0-1b-a400m-instruct (few-shot) 1335 @@ -2600,7 +2623,7 @@ title: English NLG 🇬🇧 8192 False 3,607 ± 565 / 1,212 ± 349 - 2.90 + 2.91 23.28 ± 6.45 / 21.81 ± 5.61 61.91 ± 2.08 / 67.80 ± 1.12 6.92 ± 2.36 / 44.29 ± 3.87 @@ -2623,7 +2646,7 @@ title: English NLG 🇬🇧 8192 False 6,471 ± 1,142 / 1,961 ± 584 - 2.91 + 2.92 40.05 ± 2.56 / 33.77 ± 1.94 48.83 ± 1.00 / 60.88 ± 0.70 5.83 ± 1.52 / 50.74 ± 1.73 @@ -2692,7 +2715,7 @@ title: English NLG 🇬🇧 4096 False 1,254 ± 328 / 243 ± 83 - 3.04 + 3.05 50.73 ± 2.64 / 38.64 ± 1.60 27.52 ± 3.38 / 31.81 ± 3.98 2.96 ± 2.64 / 35.23 ± 1.82 @@ -2715,7 +2738,7 @@ title: English NLG 🇬🇧 4224 True 4,438 ± 1,128 / 1,028 ± 346 - 3.06 + 3.07 38.51 ± 3.33 / 38.08 ± 2.64 63.60 ± 2.87 / 58.50 ± 1.25 2.23 ± 1.32 / 34.15 ± 0.60 @@ -2754,29 +2777,6 @@ title: English NLG 🇬🇧 12.1.0 12.1.0 - - NbAiLab/nb-llama-3.1-8B-Instruct (few-shot) - 8030 - 128 - 131072 - True - 1,296 ± 335 / 246 ± 84 - 3.11 - 40.91 ± 6.01 / 37.55 ± 4.86 - 47.12 ± 10.26 / 54.09 ± 8.04 - 6.03 ± 1.71 / 49.55 ± 3.61 - 51.34 ± 2.37 / 65.28 ± 2.29 - 66.65 ± 1.14 / 20.92 ± 1.64 - 4.91 ± 2.84 / 25.92 ± 1.81 - 1.21 ± 1.04 / 25.67 ± 0.89 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - Qwen/Qwen1.5-0.5B-Chat (few-shot) 620 @@ -2801,27 +2801,27 @@ title: English NLG 🇬🇧 12.1.0 - meta-llama/Llama-3.2-1B (few-shot) - 1236 + NbAiLab/nb-llama-3.1-8B-Instruct (few-shot) + 8030 128 - 131328 + 131072 True - 7,577 ± 1,884 / 1,555 ± 492 - 3.11 - 35.08 ± 5.88 / 32.44 ± 4.89 - 54.40 ± 2.92 / 64.38 ± 2.10 - 2.97 ± 0.84 / 45.05 ± 4.18 - 58.30 ± 2.84 / 71.04 ± 1.83 - 62.19 ± 1.70 / 16.53 ± 1.13 - 7.91 ± 0.72 / 29.30 ± 0.46 - 1.60 ± 1.21 / 26.25 ± 0.85 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 + 1,296 ± 335 / 246 ± 84 + 3.12 + 40.91 ± 6.01 / 37.55 ± 4.86 + 47.12 ± 10.26 / 54.09 ± 8.04 + 6.03 ± 1.71 / 49.55 ± 3.61 + 51.34 ± 2.37 / 65.28 ± 2.29 + 66.65 ± 1.14 / 20.92 ± 1.64 + 4.91 ± 2.84 / 25.92 ± 1.81 + 1.21 ± 1.04 / 25.67 ± 0.89 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 ibm-granite/granite-3.0-1b-a400m-base (few-shot) @@ -2846,6 +2846,29 @@ title: English NLG 🇬🇧 13.2.0 13.2.0 + + meta-llama/Llama-3.2-1B (few-shot) + 1236 + 128 + 131328 + True + 7,577 ± 1,884 / 1,555 ± 492 + 3.12 + 35.08 ± 5.88 / 32.44 ± 4.89 + 54.40 ± 2.92 / 64.38 ± 2.10 + 2.97 ± 0.84 / 45.05 ± 4.18 + 58.30 ± 2.84 / 71.04 ± 1.83 + 62.19 ± 1.70 / 16.53 ± 1.13 + 7.91 ± 0.72 / 29.30 ± 0.46 + 1.60 ± 1.21 / 26.25 ± 0.85 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot) 7453 @@ -2853,7 +2876,7 @@ title: English NLG 🇬🇧 4096 True 1,438 ± 410 / 233 ± 79 - 3.12 + 3.13 44.48 ± 3.17 / 36.31 ± 2.23 23.69 ± 3.36 / 25.98 ± 3.59 8.52 ± 2.60 / 51.57 ± 2.62 @@ -2899,7 +2922,7 @@ title: English NLG 🇬🇧 2048 True 1,875 ± 673 / 261 ± 91 - 3.20 + 3.21 45.86 ± 3.18 / 40.23 ± 2.41 62.08 ± 3.29 / 55.11 ± 1.68 6.62 ± 2.43 / 48.79 ± 3.77 @@ -2962,20 +2985,20 @@ title: English NLG 🇬🇧 13.0.0 - HuggingFaceTB/SmolLM2-360M-Instruct (few-shot) + HuggingFaceTB/SmolLM2-360M (few-shot) 362 49 8192 True - 21,777 ± 6,115 / 3,617 ± 1,211 + 22,023 ± 6,203 / 3,675 ± 1,231 3.36 - 30.73 ± 4.30 / 29.47 ± 4.10 - 59.51 ± 3.73 / 54.82 ± 2.43 - 1.55 ± 1.90 / 43.18 ± 5.08 - 49.03 ± 1.47 / 60.00 ± 1.53 - 57.73 ± 4.93 / 15.68 ± 1.21 - 0.11 ± 1.23 / 23.20 ± 0.56 - -0.06 ± 0.39 / 24.60 ± 0.44 + 31.14 ± 1.79 / 28.54 ± 0.86 + 43.97 ± 5.28 / 55.08 ± 4.26 + 3.49 ± 2.49 / 46.52 ± 4.13 + 47.91 ± 4.97 / 60.41 ± 3.91 + 62.20 ± 1.04 / 17.61 ± 0.50 + 0.12 ± 1.55 / 23.00 ± 0.82 + 0.13 ± 1.34 / 24.53 ± 0.49 13.1.0 13.1.0 13.1.0 @@ -2985,20 +3008,20 @@ title: English NLG 🇬🇧 13.1.0 - HuggingFaceTB/SmolLM2-360M (few-shot) + HuggingFaceTB/SmolLM2-360M-Instruct (few-shot) 362 49 8192 True - 22,023 ± 6,203 / 3,675 ± 1,231 - 3.36 - 31.14 ± 1.79 / 28.54 ± 0.86 - 43.97 ± 5.28 / 55.08 ± 4.26 - 3.49 ± 2.49 / 46.52 ± 4.13 - 47.91 ± 4.97 / 60.41 ± 3.91 - 62.20 ± 1.04 / 17.61 ± 0.50 - 0.12 ± 1.55 / 23.00 ± 0.82 - 0.13 ± 1.34 / 24.53 ± 0.49 + 21,777 ± 6,115 / 3,617 ± 1,211 + 3.37 + 30.73 ± 4.30 / 29.47 ± 4.10 + 59.51 ± 3.73 / 54.82 ± 2.43 + 1.55 ± 1.90 / 43.18 ± 5.08 + 49.03 ± 1.47 / 60.00 ± 1.53 + 57.73 ± 4.93 / 15.68 ± 1.21 + 0.11 ± 1.23 / 23.20 ± 0.56 + -0.06 ± 0.39 / 24.60 ± 0.44 13.1.0 13.1.0 13.1.0 @@ -3037,7 +3060,7 @@ title: English NLG 🇬🇧 2304 True 8,536 ± 1,926 / 1,940 ± 619 - 3.61 + 3.62 26.47 ± 6.25 / 28.27 ± 5.35 60.05 ± 3.94 / 56.18 ± 1.90 0.72 ± 1.90 / 42.84 ± 3.50 @@ -3083,7 +3106,7 @@ title: English NLG 🇬🇧 4096 False 10,890 ± 2,686 / 2,186 ± 750 - 3.78 + 3.79 18.69 ± 7.23 / 18.50 ± 6.51 21.95 ± 6.30 / 33.38 ± 4.79 0.01 ± 1.91 / 39.40 ± 3.94 @@ -3106,7 +3129,7 @@ title: English NLG 🇬🇧 2048 True 2,519 ± 841 / 323 ± 104 - 3.80 + 3.81 21.60 ± 4.23 / 23.22 ± 3.33 45.04 ± 5.19 / 50.60 ± 4.14 -0.46 ± 1.47 / 44.56 ± 3.38 @@ -3129,7 +3152,7 @@ title: English NLG 🇬🇧 4096 True 6,513 ± 1,241 / 1,282 ± 644 - 3.84 + 3.85 27.37 ± 4.57 / 26.50 ± 4.52 36.35 ± 7.92 / 45.58 ± 8.18 -0.37 ± 1.89 / 44.67 ± 2.86 @@ -3152,7 +3175,7 @@ title: English NLG 🇬🇧 8192 True 25,602 ± 7,583 / 3,953 ± 1,325 - 3.99 + 4.00 29.96 ± 3.19 / 28.98 ± 3.29 18.64 ± 8.52 / 28.83 ± 5.86 1.85 ± 1.20 / 44.03 ± 3.98 @@ -3175,7 +3198,7 @@ title: English NLG 🇬🇧 8192 True 26,346 ± 7,812 / 4,082 ± 1,372 - 4.04 + 4.05 31.26 ± 3.84 / 30.44 ± 3.28 26.69 ± 10.82 / 34.46 ± 8.00 1.78 ± 1.67 / 43.50 ± 3.99 @@ -3221,7 +3244,7 @@ title: English NLG 🇬🇧 131072 True 3,424 ± 1,080 / 464 ± 158 - 4.18 + 4.19 3.98 ± 3.16 / 3.68 ± 2.99 39.54 ± 6.40 / 48.55 ± 7.75 3.41 ± 1.85 / 41.04 ± 4.56 @@ -3244,7 +3267,7 @@ title: English NLG 🇬🇧 2048 True 10,242 ± 3,432 / 1,335 ± 484 - 4.21 + 4.22 31.79 ± 3.88 / 31.32 ± 2.81 19.13 ± 9.92 / 33.51 ± 6.97 -0.03 ± 1.07 / 36.37 ± 2.34 @@ -3290,7 +3313,7 @@ title: English NLG 🇬🇧 512 True 5,847 ± 1,029 / 1,640 ± 525 - 4.52 + 4.53 0.00 ± 0.00 / 0.00 ± 0.00 0.00 ± 0.00 / 19.61 ± 0.22 0.41 ± 0.55 / 33.46 ± 0.37 @@ -3313,7 +3336,7 @@ title: English NLG 🇬🇧 4224 True 3,024 ± 496 / 909 ± 301 - 4.65 + 4.66 12.34 ± 2.70 / 12.41 ± 2.54 -1.48 ± 3.09 / 21.17 ± 2.22 -0.48 ± 1.52 / 42.45 ± 3.99 @@ -3336,7 +3359,7 @@ title: English NLG 🇬🇧 8192 True 7,692 ± 1,423 / 1,960 ± 644 - 4.80 + 4.81 0.00 ± 0.00 / 0.00 ± 0.00 0.00 ± 0.00 / 19.61 ± 0.22 2.48 ± 1.94 / 34.52 ± 0.85 @@ -3359,7 +3382,7 @@ title: English NLG 🇬🇧 1024 True 11,734 ± 3,124 / 2,174 ± 720 - 4.90 + 4.91 1.55 ± 1.98 / 1.45 ± 1.82 3.71 ± 3.16 / 22.09 ± 2.08 -0.42 ± 1.56 / 40.58 ± 3.74 diff --git a/english-nlu.csv b/english-nlu.csv index b6e5c853..1e67ea10 100644 --- a/english-nlu.csv +++ b/english-nlu.csv @@ -5,89 +5,89 @@ google/electra-base-discriminator,109,31,512,True,False,9977,1.41,89.83,63.55,67 Qwen/QwQ-32B-Preview (few-shot),32764,152,32768,True,False,2258,1.45,76.84,68.94,57.74,71.22 "gpt-4-0613 (few-shot, val)",-1,100,8191,True,False,597,1.48,78.06,69.06,55.76,67.35 intfloat/multilingual-e5-large-instruct,560,250,514,True,False,5947,1.49,91.43,66.42,53.05,61.34 -FacebookAI/roberta-large,354,50,512,True,False,4542,1.51,91.53,62.92,48.77,71.23 +FacebookAI/roberta-large,354,50,512,True,False,4542,1.52,91.53,62.92,48.77,71.23 meta-llama/Llama-3.1-405B-Instruct-FP8 (few-shot),405869,128,131072,True,False,799,1.52,82.86,70.6,53.8,62.69 intfloat/multilingual-e5-large,560,250,512,True,False,6732,1.54,91.69,64.37,53.58,60.47 FacebookAI/roberta-base,124,50,512,True,False,13354,1.55,91.0,59.54,57.29,62.75 "gpt-4-1106-preview (few-shot, val)",-1,100,128126,True,False,576,1.57,81.79,67.55,51.21,66.6 google-bert/bert-large-cased,333,29,512,True,False,5051,1.59,89.84,58.19,63.62,55.17 meta-llama/Llama-3.1-70B-Instruct (few-shot),70554,128,131072,True,False,1409,1.59,83.16,69.96,50.83,60.82 -microsoft/mdeberta-v3-base,278,251,512,True,False,20637,1.59,91.83,53.75,62.11,62.1 +microsoft/mdeberta-v3-base,278,251,512,True,False,20637,1.6,91.83,53.75,62.11,62.1 AI-Sweden-Models/roberta-large-1160k,354,50,512,True,False,14014,1.61,89.53,53.9,55.31,69.89 Qwen/Qwen2.5-72B-Instruct (few-shot),72706,152,32768,True,False,1219,1.62,75.84,68.66,56.46,58.39 -google/rembert,575,250,512,True,False,11736,1.62,90.17,51.74,55.55,69.02 "meta-llama/Meta-Llama-3-70B (few-shot, val)",70554,128,8192,True,False,312,1.62,79.06,65.53,46.28,75.2 +google/rembert,575,250,512,True,False,11736,1.63,90.17,51.74,55.55,69.02 "gpt-4o-2024-05-13 (few-shot, val)",-1,200,128000,True,False,916,1.65,83.48,62.74,46.56,65.41 ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot),28411,256,4096,True,False,3633,1.66,81.06,68.92,49.06,61.27 VAGOsolutions/SauerkrautLM-7b-LaserChat (few-shot),7242,32,4096,False,False,4413,1.66,72.31,69.64,46.08,72.23 upstage/SOLAR-10.7B-v1.0 (few-shot),10732,32,4096,True,False,3780,1.67,69.5,70.01,41.35,76.79 AI-Sweden-Models/roberta-large-1350k,354,50,512,True,False,5744,1.69,89.48,51.88,50.69,69.46 google-bert/bert-large-uncased,334,31,512,True,False,4711,1.7,88.8,57.94,59.27,52.38 -meta-llama/Llama-3.3-70B-Instruct (few-shot),70554,128,131072,True,False,1353,1.71,82.35,71.07,51.27,50.23 -CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,1.72,78.35,67.62,46.5,63.2 +meta-llama/Llama-3.3-70B-Instruct (few-shot),70554,128,131072,True,False,1353,1.72,82.35,71.07,51.27,50.23 +CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,1.73,78.35,67.62,46.5,63.2 VAGOsolutions/FC-SauerkrautLM-7b-beta (few-shot),-1,32,8192,False,False,2160,1.74,70.15,69.36,37.09,77.39 "claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,1.74,82.11,67.01,51.09,52.41 google/gemma-2-27b-it (few-shot),27227,256,8320,True,False,1516,1.74,62.4,68.68,54.17,66.96 -intfloat/multilingual-e5-base,278,250,512,True,False,14965,1.76,89.65,61.46,51.32,50.78 "meta-llama/Llama-2-70b-hf (few-shot, val)",68977,32,4221,True,False,1892,1.76,72.38,60.98,43.12,74.5 mistralai/Mistral-Nemo-Instruct-2407 (few-shot),12248,131,1024128,True,False,7095,1.76,75.24,63.05,44.75,67.7 +intfloat/multilingual-e5-base,278,250,512,True,False,14965,1.77,89.65,61.46,51.32,50.78 nvidia/Llama-3.1-Nemotron-70B-Instruct-HF (few-shot),70554,128,131072,True,False,1208,1.77,73.66,68.56,51.33,56.87 distilbert/distilroberta-base,82,50,512,True,False,17448,1.79,90.04,56.08,54.9,49.36 "gpt-3.5-turbo-0613 (few-shot, val)",-1,100,4095,True,False,921,1.82,71.48,66.41,41.43,67.9 "gpt-4o-2024-05-13 (zero-shot, val)",-1,200,8191,True,False,637,1.82,81.23,63.46,46.45,57.64 -Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,1.83,72.77,70.12,44.68,57.17 ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,1.83,66.17,68.03,39.76,71.21 -model-garden-lms/teams-base-finewebs-1m,135,64,512,True,False,31254,1.83,88.34,58.82,48.65,52.3 +Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,1.84,72.77,70.12,44.68,57.17 +model-garden-lms/teams-base-finewebs-1m,135,64,512,True,False,31254,1.84,88.34,58.82,48.65,52.3 skole-gpt-mixtral (few-shot),-1,32,32768,False,False,3583,1.84,67.43,68.55,39.75,65.93 "gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,1.85,77.38,66.75,52.43,41.03 -mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,1.85,65.31,68.87,43.07,63.97 +mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,1.86,65.31,68.87,43.07,63.97 setu4993/LaBSE,471,501,512,True,False,25418,1.86,90.33,52.93,50.7,53.77 google-bert/bert-base-uncased,109,31,512,True,False,10296,1.88,87.62,54.01,56.97,42.37 -"gpt-4o-mini-2024-07-18 (zero-shot, val)",-1,200,8191,True,False,908,1.88,75.8,61.65,47.74,56.98 -mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,1.89,72.4,63.46,35.86,68.42 +"gpt-4o-mini-2024-07-18 (zero-shot, val)",-1,200,8191,True,False,908,1.89,75.8,61.65,47.74,56.98 google/gemma-2-9b-it (few-shot),9242,256,8320,True,False,2062,1.9,58.07,68.4,51.58,62.03 -microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,2047,True,False,3194,1.9,68.69,66.77,42.14,63.71 +microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,2047,True,False,8681,1.9,68.69,66.77,42.14,63.71 +mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,1.9,72.4,63.46,35.86,68.42 01-ai/Yi-1.5-6B (few-shot),6061,64,4224,True,False,2867,1.91,55.51,68.74,40.81,72.33 "152334H/miqu-1-70b-sf (few-shot, val)",68977,32,32889,True,False,2126,1.91,71.83,62.99,39.97,64.42 mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot),46703,32,32768,True,False,5535,1.91,70.02,69.48,44.59,55.7 -FacebookAI/xlm-roberta-large,559,250,512,True,False,17897,1.92,89.81,41.97,35.55,68.88 meta-llama/Llama-3.1-8B (few-shot),8030,128,131200,True,False,2986,1.92,69.86,66.76,30.96,71.39 nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,1.92,67.52,69.03,40.51,58.12 +FacebookAI/xlm-roberta-large,559,250,512,True,False,17897,1.93,89.81,41.97,35.55,68.88 "meta-llama/Meta-Llama-3-70B-Instruct (few-shot, val)",70554,128,8317,True,False,1673,1.93,81.3,66.18,38.1,53.31 google/electra-large-discriminator,334,31,512,True,False,4700,1.94,67.87,48.08,55.46,70.66 CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,1.95,67.33,68.67,31.18,68.33 google/gemma-2-9b (few-shot),9242,256,8320,True,False,2038,1.95,50.9,68.91,43.79,69.17 mistralai/Mistral-7B-v0.1 (few-shot),7242,32,32768,True,False,1446,1.96,63.4,68.17,30.92,73.45 -alpindale/Mistral-7B-v0.2-hf (few-shot),7242,32,32768,True,False,1841,1.99,61.02,67.1,29.82,73.5 -mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,1.99,61.02,67.29,30.1,73.59 -robinsmits/Qwen1.5-7B-Dutch-Chat-Sft-Bf16 (few-shot),7719,152,32768,False,False,2413,1.99,67.52,63.1,37.75,64.88 NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,2.0,63.77,69.23,38.49,57.03 -meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1007,2.0,75.02,67.64,32.29,54.84 +alpindale/Mistral-7B-v0.2-hf (few-shot),7242,32,32768,True,False,1841,2.0,61.02,67.1,29.82,73.5 +meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1483,2.0,75.02,67.64,32.29,54.84 +mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,2.0,61.02,67.29,30.1,73.59 +robinsmits/Qwen1.5-7B-Dutch-Chat-Sft-Bf16 (few-shot),7719,152,32768,False,False,2413,2.0,67.52,63.1,37.75,64.88 "VAGOsolutions/SauerkrautLM-7b-HerO (few-shot, val)",7242,32,32768,False,True,2477,2.01,69.26,68.63,29.87,60.92 CohereForAI/c4ai-command-r-v01 (few-shot),34981,256,8192,False,False,1919,2.02,71.96,67.26,40.81,49.79 "meta-llama/Llama-2-70b-chat-hf (few-shot, val)",68977,32,4221,True,False,1979,2.02,72.8,63.76,28.37,64.7 model-garden-lms/bert-base-finewebs-951k,136,64,512,True,False,36844,2.02,88.31,57.5,40.02,47.03 -meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131200,True,False,1005,2.03,76.95,68.12,34.34,47.88 -meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1335,2.03,66.31,64.3,28.18,70.38 +meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1477,2.03,66.31,64.3,28.18,70.38 +meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131200,True,False,1473,2.04,76.95,68.12,34.34,47.88 occiglot/occiglot-7b-de-en-instruct (few-shot),7242,32,32768,False,False,1584,2.04,60.9,66.54,23.6,75.14 ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4096,True,False,2515,2.05,55.76,66.89,36.6,67.55 -model-garden-lms/bert-base-token-dropping-finewebs-901k,136,64,512,True,False,36949,2.06,87.7,54.0,39.99,45.22 +model-garden-lms/bert-base-token-dropping-finewebs-901k,136,64,512,True,False,36949,2.07,87.7,54.0,39.99,45.22 neuralmagic/Sparse-Llama-3.1-8B-2of4 (few-shot),8030,128,131072,True,False,2996,2.09,59.28,68.49,21.95,78.16 ibm-granite/granite-3.0-2b-base (few-shot),2534,49,4224,True,False,10187,2.1,57.78,67.81,22.81,72.9 timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,2.1,59.1,68.41,25.43,71.89 CohereForAI/aya-23-8B (few-shot),8028,256,8192,False,False,2707,2.11,56.16,68.27,23.82,74.23 -google/gemma-7b-it (few-shot),8538,256,8317,False,False,1792,2.11,66.7,55.62,31.36,72.58 ibm-granite/granite-8b-code-base-4k (few-shot),8055,49,4096,True,False,2313,2.11,72.76,62.35,21.57,69.8 ibm-granite/granite-8b-code-base (few-shot),8055,49,4223,True,False,1002,2.11,72.64,62.31,22.38,69.84 +google/gemma-7b-it (few-shot),8538,256,8317,False,False,1792,2.12,66.7,55.62,31.36,72.58 meta-llama/Llama-2-13b-hf (few-shot),13016,32,4096,True,False,2898,2.12,63.75,61.85,26.41,73.48 RuterNorway/Llama-2-13b-chat-norwegian (few-shot),-1,32,4096,False,False,3254,2.13,64.93,64.14,28.08,62.09 -ibm-granite/granite-3.0-2b-instruct (few-shot),2634,49,4224,True,False,10194,2.14,61.97,67.54,31.7,59.78 google-bert/bert-base-multilingual-cased,177,120,512,True,False,14083,2.15,89.32,41.89,38.34,55.19 +ibm-granite/granite-3.0-2b-instruct (few-shot),2634,49,4224,True,False,10194,2.15,61.97,67.54,31.7,59.78 meta-llama/Llama-3.2-3B-Instruct (few-shot),3213,128,131200,False,False,10424,2.15,68.44,66.0,32.04,49.54 mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32768,False,False,634,2.15,57.58,61.44,34.92,65.38 occiglot/occiglot-7b-de-en (few-shot),7242,32,32768,True,False,1992,2.15,56.07,65.29,25.78,73.13 -senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.15,70.62,67.78,30.99,49.56 google/gemma-7b (few-shot),8538,256,8192,True,False,1378,2.16,47.2,63.88,35.75,69.4 +senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.16,70.62,67.78,30.99,49.56 Qwen/Qwen1.5-4B (few-shot),3950,152,32768,True,False,3248,2.18,55.45,60.55,28.6,70.49 "gpt-4-1106-preview (zero-shot, val)",-1,100,8191,True,False,436,2.18,42.4,65.24,44.59,62.94 "mlabonne/NeuralBeagle14-7B (few-shot, val)",7242,32,8192,False,True,2549,2.18,69.16,63.85,28.4,52.69 @@ -95,20 +95,20 @@ ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617, Qwen/Qwen1.5-4B-Chat (few-shot),3950,152,32768,False,False,4347,2.2,58.56,59.62,28.55,70.04 danish-foundation-models/munin-7b-v0.1dev0 (few-shot),7242,32,8192,True,False,6113,2.2,56.38,66.04,22.15,71.32 meta-llama/Llama-2-13b-chat-hf (few-shot),13016,32,4096,True,False,2849,2.2,68.75,62.37,25.07,61.56 -occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,2.2,56.9,62.1,20.17,75.29 -ZurichNLP/unsup-simcse-xlm-roberta-base,277,250,512,True,False,34520,2.23,85.88,51.46,35.83,43.26 +occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,2.21,56.9,62.1,20.17,75.29 +ZurichNLP/unsup-simcse-xlm-roberta-base,277,250,512,True,False,34520,2.24,85.88,51.46,35.83,43.26 distilbert/distilbert-base-cased,65,29,512,True,False,19667,2.24,84.75,50.94,53.47,24.93 -Twitter/twhin-bert-large,562,250,512,True,False,9707,2.25,89.5,45.98,30.58,48.44 meta-llama/Llama-2-7b-hf (few-shot),6738,32,4096,True,False,930,2.25,55.27,65.16,20.43,69.82 +Twitter/twhin-bert-large,562,250,512,True,False,9707,2.26,89.5,45.98,30.58,48.44 "cstr/Spaetzle-v8-7b (few-shot, val)",7242,32,32768,False,True,5980,2.26,69.4,65.39,26.69,49.74 "mlabonne/AlphaMonarch-7B (few-shot, val)",7242,32,8192,False,True,5340,2.26,69.19,63.77,28.43,44.39 utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.26,44.81,62.54,28.1,71.71 -mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,2.27,62.11,59.91,30.66,58.27 -meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4096,False,False,2643,2.29,62.53,62.23,22.71,64.45 -microsoft/Phi-3-mini-128k-instruct (few-shot),3821,32,131072,True,False,7312,2.29,64.09,46.77,31.62,71.25 +mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,2.28,62.11,59.91,30.66,58.27 occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,2.29,55.37,63.32,18.92,72.38 +meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4096,False,False,2643,2.3,62.53,62.23,22.71,64.45 +microsoft/Phi-3-mini-128k-instruct (few-shot),3821,32,131072,True,False,7312,2.3,64.09,46.77,31.62,71.25 NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,2.34,63.12,66.47,38.82,29.16 -meta-llama/Llama-3.2-3B (few-shot),3213,128,131200,True,False,3713,2.35,59.09,63.29,13.5,68.15 +meta-llama/Llama-3.2-3B (few-shot),3213,128,131200,True,False,3713,2.36,59.09,63.29,13.5,68.15 google/gemma-2-2b-it (few-shot),2614,256,8320,True,False,5374,2.37,44.36,66.37,34.69,55.07 ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,2.37,52.79,65.92,16.74,64.92 ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,2.4,60.64,61.2,7.63,69.83 @@ -119,73 +119,74 @@ MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,2.44,47.2,64. microsoft/phi-2 (few-shot),2780,51,2048,True,False,3472,2.44,49.16,62.41,12.31,75.79 stabilityai/stablelm-2-1_6b (few-shot),1645,100,4096,True,False,7259,2.46,47.5,64.69,8.01,71.81 ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,2.47,58.3,59.01,10.33,65.04 -Twitter/twhin-bert-base,279,250,512,True,False,11514,2.48,87.77,41.09,32.26,35.15 -sentence-transformers/stsb-xlm-r-multilingual,278,250,512,True,False,15040,2.48,82.39,57.35,47.29,4.29 +Twitter/twhin-bert-base,279,250,512,True,False,11514,2.49,87.77,41.09,32.26,35.15 ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,2.49,49.44,66.65,12.56,63.29 +sentence-transformers/stsb-xlm-r-multilingual,278,250,512,True,False,15040,2.49,82.39,57.35,47.29,4.29 allenai/OLMo-1.7-7B-hf (few-shot),6888,50,4096,True,False,3371,2.5,41.02,66.43,5.17,76.04 ibm-granite/granite-7b-base (few-shot),6738,32,2175,True,False,4405,2.5,47.76,66.41,5.76,70.34 ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,2.51,53.21,65.98,7.26,64.71 cardiffnlp/twitter-xlm-roberta-base,277,250,512,True,False,34475,2.52,87.09,55.4,39.78,6.2 sentence-transformers/paraphrase-xlm-r-multilingual-v1,278,250,512,True,False,20154,2.53,84.05,54.92,45.85,4.13 -distilbert/distilbert-base-multilingual-cased,135,120,512,True,False,26355,2.55,87.7,36.48,40.79,29.0 AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot),20918,64,2048,True,False,1831,2.56,39.21,65.58,7.82,72.25 +distilbert/distilbert-base-multilingual-cased,135,120,512,True,False,26355,2.56,87.7,36.48,40.79,29.0 nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,2.57,72.63,65.74,43.43,0.0 ibm-granite/granite-3.0-1b-a400m-instruct (few-shot),1335,49,4096,True,False,7964,2.6,46.26,63.47,13.17,59.32 -AI-Sweden-Models/gpt-sw3-20b (few-shot),20918,64,2048,True,False,1875,2.62,45.86,62.08,6.62,65.29 TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot),7800,100,4096,True,False,6197,2.62,45.89,59.29,9.11,66.74 +AI-Sweden-Models/gpt-sw3-20b (few-shot),20918,64,2048,True,False,1875,2.63,45.86,62.08,6.62,65.29 Geotrend/distilbert-base-25lang-cased,109,85,512,True,False,26099,2.63,87.08,36.77,37.1,26.99 NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,2.63,76.84,67.91,30.61,0.1 Qwen/Qwen1.5-1.8B (few-shot),1837,152,32768,True,False,5666,2.65,37.22,64.34,15.3,64.41 NbAiLab/nb-llama-3.1-8B (few-shot),8030,128,131072,True,False,1297,2.66,62.9,62.26,24.37,25.93 google/gemma-2-2b (few-shot),2614,256,8320,True,False,5235,2.72,30.82,66.28,10.09,64.96 -HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,2.74,41.57,62.32,8.04,56.01 -meta-llama/Llama-3.2-1B-Instruct (few-shot),1236,128,131200,False,False,7436,2.74,56.41,59.46,8.36,47.26 +HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,2.75,41.57,62.32,8.04,56.01 +meta-llama/Llama-3.2-1B-Instruct (few-shot),1236,128,131200,False,False,7436,2.75,56.41,59.46,8.36,47.26 +tiiuae/Falcon3-1B-Instruct (few-shot),1669,131,8192,True,False,9270,2.78,43.0,54.47,17.44,53.15 LumiOpen/Viking-13B (few-shot),14030,131,4224,True,False,840,2.8,42.78,59.9,5.68,58.52 -Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,2.8,40.89,55.33,11.23,60.69 +Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,2.81,40.89,55.33,11.23,60.69 "claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,2.82,74.35,31.19,21.76,45.7 utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,2.84,37.47,58.61,5.3,63.26 allenai/OLMo-7B (few-shot),6888,50,2176,True,False,5403,2.86,38.23,60.7,-0.19,61.93 -google/gemma-2b (few-shot),2506,256,8192,True,False,6087,2.88,19.65,62.14,8.3,66.3 +google/gemma-2b (few-shot),2506,256,8192,True,False,6087,2.89,19.65,62.14,8.3,66.3 VAGOsolutions/SauerkrautLM-Gemma-2b (few-shot),2506,256,8192,False,False,3607,2.95,23.28,61.91,6.92,64.68 allenai/OLMo-7B-Twin-2T (few-shot),6888,50,2176,True,False,5484,2.95,25.36,56.91,7.1,58.6 -google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,2.95,40.05,48.83,5.83,63.77 NorwAI/NorwAI-Llama2-7B (few-shot),7033,68,4096,True,False,4438,2.96,38.51,63.6,2.23,45.44 +google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,2.96,40.05,48.83,5.83,63.77 ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,2.97,29.84,64.13,3.99,55.74 microsoft/xlm-align-base,277,250,512,True,False,14744,2.98,88.62,11.09,8.46,49.64 meta-llama/Llama-3.2-1B (few-shot),1236,128,131200,True,False,7577,2.99,35.08,54.4,2.97,58.3 -NorwAI/NorwAI-Mistral-7B (few-shot),7537,68,4096,True,False,3035,3.02,35.84,56.87,3.08,52.77 +NorwAI/NorwAI-Mistral-7B (few-shot),7537,68,4096,True,False,3035,3.03,35.84,56.87,3.08,52.77 NbAiLab/nb-llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1296,3.05,40.91,47.12,6.03,51.34 -Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,3.08,33.86,55.41,1.15,53.34 -HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,3.14,30.73,59.51,1.55,49.03 +Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,3.09,33.86,55.41,1.15,53.34 Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,3.14,37.51,57.15,2.94,42.57 -allenai/OLMo-1B (few-shot),1177,50,2176,True,False,8536,3.16,26.47,60.05,0.72,43.87 -openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,3.17,50.73,27.52,2.96,63.42 -sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking,135,120,512,True,False,33753,3.17,81.71,50.69,2.16,4.16 -sentence-transformers/quora-distilbert-multilingual,135,120,512,True,False,26458,3.17,81.71,50.69,2.16,4.19 -state-spaces/mamba-2.8b-hf (few-shot),2768,50,32896,True,False,2722,3.17,28.63,66.55,1.47,35.0 +HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,3.15,30.73,59.51,1.55,49.03 +allenai/OLMo-1B (few-shot),1177,50,2176,True,False,8536,3.17,26.47,60.05,0.72,43.87 +openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,3.18,50.73,27.52,2.96,63.42 +sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking,135,120,512,True,False,33753,3.18,81.71,50.69,2.16,4.16 +sentence-transformers/quora-distilbert-multilingual,135,120,512,True,False,26458,3.18,81.71,50.69,2.16,4.19 +state-spaces/mamba-2.8b-hf (few-shot),2768,50,32896,True,False,2722,3.18,28.63,66.55,1.47,35.0 EuropeanParliament/EUBERT,94,66,512,True,False,20070,3.19,71.43,17.53,23.93,31.01 dbmdz/bert-base-historic-multilingual-cased,111,32,512,True,False,20047,3.23,77.64,12.42,13.65,33.29 sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2,118,250,512,True,False,29201,3.23,77.5,53.1,-0.35,3.13 -openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.27,44.48,23.69,8.52,56.97 -HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,3.28,31.14,43.97,3.49,47.91 +openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.28,44.48,23.69,8.52,56.97 +HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,3.29,31.14,43.97,3.49,47.91 PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,3.47,40.45,47.89,0.28,26.77 -PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,3.56,21.6,45.04,-0.46,33.46 sentence-transformers/distiluse-base-multilingual-cased-v2,135,120,512,True,False,33247,3.56,71.9,36.22,0.47,2.4 sentence-transformers/distiluse-base-multilingual-cased,135,120,512,True,False,19206,3.56,71.9,36.22,0.47,2.44 +PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,3.57,21.6,45.04,-0.46,33.46 sentence-transformers/distiluse-base-multilingual-cased-v1,135,120,512,True,False,34042,3.59,71.33,36.75,0.24,1.3 dbmdz/bert-tiny-historic-multilingual-cased,5,32,512,True,False,78027,3.74,65.85,25.85,1.21,4.0 RuterNorway/Llama-2-7b-chat-norwegian (few-shot),-1,32,4096,False,False,10890,3.89,18.69,21.95,0.01,36.51 NbAiLab/nb-llama-3.2-3B (few-shot),3213,128,131072,True,False,1880,3.93,0.02,60.98,0.0,9.94 -NbAiLab/nb-llama-3.2-1B (few-shot),1236,128,131072,True,False,3424,3.95,3.98,39.54,3.41,26.96 +NbAiLab/nb-llama-3.2-1B (few-shot),1236,128,131072,True,False,3424,3.96,3.98,39.54,3.41,26.96 HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,3.98,31.26,26.69,1.78,13.88 PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.99,27.37,36.35,-0.37,7.42 -HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.02,29.96,18.64,1.85,26.9 -PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.03,31.79,19.13,-0.03,12.35 +HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.03,29.96,18.64,1.85,26.9 +PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.04,31.79,19.13,-0.03,12.35 PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.04,27.45,27.39,0.31,15.62 -3ebdola/Dialectal-Arabic-XLM-R-Base,277,250,512,True,False,12783,4.08,68.25,1.92,1.08,2.79 +3ebdola/Dialectal-Arabic-XLM-R-Base,277,250,512,True,False,12783,4.09,68.25,1.92,1.08,2.79 fresh-xlm-roberta-base,277,250,512,True,False,2214,4.51,34.64,4.0,1.33,0.43 fresh-electra-small,13,31,512,True,False,7840,4.57,30.77,0.58,-0.17,0.46 -ai-forever/mGPT (few-shot),-1,100,1024,True,False,11734,4.8,1.55,3.71,-0.42,5.58 -NorwAI/NorwAI-Mistral-7B-pretrain (few-shot),7537,68,4096,True,False,3024,4.84,12.34,-1.48,-0.48,0.72 -ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.94,0.0,0.0,2.48,0.01 -RJuro/kanelsnegl-v0.1 (few-shot),7242,32,512,True,False,5847,4.99,0.0,0.0,0.41,0.0 +ai-forever/mGPT (few-shot),-1,100,1024,True,False,11734,4.81,1.55,3.71,-0.42,5.58 +NorwAI/NorwAI-Mistral-7B-pretrain (few-shot),7537,68,4096,True,False,3024,4.85,12.34,-1.48,-0.48,0.72 +ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.95,0.0,0.0,2.48,0.01 +RJuro/kanelsnegl-v0.1 (few-shot),7242,32,512,True,False,5847,5.0,0.0,0.0,0.41,0.0 diff --git a/english-nlu.md b/english-nlu.md index 608f6f7e..ff46a9f8 100644 --- a/english-nlu.md +++ b/english-nlu.md @@ -3,7 +3,7 @@ layout: leaderboard title: English NLU 🇬🇧 --- -
Last updated: 10/01/2025 12:30:41 CET
+
Last updated: 11/01/2025 11:03:51 CET
@@ -143,7 +143,7 @@ title: English NLU 🇬🇧 512 True 4,542 ± 1,120 / 845 ± 267 - 1.51 + 1.52 91.53 ± 0.85 / 91.21 ± 0.76 62.92 ± 1.84 / 62.60 ± 3.18 48.77 ± 15.71 / 71.46 ± 10.95 @@ -262,7 +262,7 @@ title: English NLU 🇬🇧 512 True 20,637 ± 3,925 / 4,497 ± 1,502 - 1.59 + 1.60 91.83 ± 0.50 / 91.40 ± 0.43 53.75 ± 1.47 / 56.40 ± 2.82 62.11 ± 1.53 / 80.67 ± 0.77 @@ -306,23 +306,6 @@ title: English NLU 🇬🇧 14.0.4 14.0.4 - - google/rembert - 575 - 250 - 512 - True - 11,736 ± 2,822 / 2,102 ± 677 - 1.62 - 90.17 ± 0.50 / 89.85 ± 0.49 - 51.74 ± 10.59 / 58.97 ± 6.34 - 55.55 ± 1.66 / 77.55 ± 0.89 - 69.02 ± 1.23 / 81.57 ± 1.05 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - meta-llama/Meta-Llama-3-70B (few-shot, val) 70554 @@ -340,6 +323,23 @@ title: English NLU 🇬🇧 12.7.0 12.7.0 + + google/rembert + 575 + 250 + 512 + True + 11,736 ± 2,822 / 2,102 ± 677 + 1.63 + 90.17 ± 0.50 / 89.85 ± 0.49 + 51.74 ± 10.59 / 58.97 ± 6.34 + 55.55 ± 1.66 / 77.55 ± 0.89 + 69.02 ± 1.23 / 81.57 ± 1.05 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + gpt-4o-2024-05-13 (few-shot, val) unknown @@ -449,7 +449,7 @@ title: English NLU 🇬🇧 131072 True 1,353 ± 443 / 180 ± 61 - 1.71 + 1.72 82.35 ± 0.76 / 76.42 ± 0.70 71.07 ± 0.91 / 72.62 ± 0.74 51.27 ± 1.18 / 74.25 ± 0.77 @@ -466,7 +466,7 @@ title: English NLU 🇬🇧 131072 False 1,909 ± 646 / 248 ± 84 - 1.72 + 1.73 78.35 ± 1.42 / 68.56 ± 1.40 67.62 ± 0.95 / 70.27 ± 1.06 46.50 ± 1.03 / 71.67 ± 0.79 @@ -527,23 +527,6 @@ title: English NLU 🇬🇧 13.0.0 13.0.0 - - intfloat/multilingual-e5-base - 278 - 250 - 512 - True - 14,965 ± 2,890 / 3,322 ± 1,074 - 1.76 - 89.65 ± 0.52 / 89.71 ± 0.48 - 61.46 ± 0.89 / 60.48 ± 2.52 - 51.32 ± 1.98 / 74.21 ± 1.18 - 50.78 ± 1.14 / 63.05 ± 0.89 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - meta-llama/Llama-2-70b-hf (few-shot, val) 68977 @@ -578,6 +561,23 @@ title: English NLU 🇬🇧 13.0.0 13.0.0 + + intfloat/multilingual-e5-base + 278 + 250 + 512 + True + 14,965 ± 2,890 / 3,322 ± 1,074 + 1.77 + 89.65 ± 0.52 / 89.71 ± 0.48 + 61.46 ± 0.89 / 60.48 ± 2.52 + 51.32 ± 1.98 / 74.21 ± 1.18 + 50.78 ± 1.14 / 63.05 ± 0.89 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + nvidia/Llama-3.1-Nemotron-70B-Instruct-HF (few-shot) 70554 @@ -646,23 +646,6 @@ title: English NLU 🇬🇧 14.0.3 14.0.3 - - Nexusflow/Starling-LM-7B-beta (few-shot) - 7242 - 32 - 4096 - False - 4,136 ± 1,282 / 668 ± 326 - 1.83 - 72.77 ± 1.02 / 57.29 ± 1.58 - 70.12 ± 0.78 / 74.54 ± 0.50 - 44.68 ± 0.97 / 71.05 ± 0.52 - 57.17 ± 2.60 / 80.36 ± 1.40 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - ibm-granite/granite-3.0-8b-instruct (few-shot) 8171 @@ -680,6 +663,23 @@ title: English NLU 🇬🇧 14.0.4 14.0.4 + + Nexusflow/Starling-LM-7B-beta (few-shot) + 7242 + 32 + 4096 + False + 4,136 ± 1,282 / 668 ± 326 + 1.84 + 72.77 ± 1.02 / 57.29 ± 1.58 + 70.12 ± 0.78 / 74.54 ± 0.50 + 44.68 ± 0.97 / 71.05 ± 0.52 + 57.17 ± 2.60 / 80.36 ± 1.40 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + model-garden-lms/teams-base-finewebs-1m 135 @@ -687,7 +687,7 @@ title: English NLU 🇬🇧 512 True 31,254 ± 9,103 / 5,078 ± 1,655 - 1.83 + 1.84 88.34 ± 1.00 / 87.76 ± 0.99 58.82 ± 1.65 / 58.36 ± 1.31 48.65 ± 7.63 / 73.04 ± 4.18 @@ -738,7 +738,7 @@ title: English NLU 🇬🇧 32768 True 2,363 ± 794 / 311 ± 105 - 1.85 + 1.86 65.31 ± 3.21 / 60.19 ± 2.58 68.87 ± 0.60 / 66.34 ± 0.90 43.07 ± 1.73 / 70.80 ± 1.06 @@ -789,7 +789,7 @@ title: English NLU 🇬🇧 8191 True 908 ± 303 / 96 ± 36 - 1.88 + 1.89 75.80 ± 0.83 / 52.95 ± 1.04 61.65 ± 2.17 / 71.46 ± 1.72 47.74 ± 4.29 / 73.28 ± 2.23 @@ -799,23 +799,6 @@ title: English NLU 🇬🇧 14.0.1 14.0.1 - - mistralai/Ministral-8B-Instruct-2410 (few-shot) - 8020 - 131 - 32768 - True - 1,302 ± 323 / 253 ± 86 - 1.89 - 72.40 ± 0.80 / 65.83 ± 1.64 - 63.46 ± 2.10 / 69.49 ± 1.15 - 35.86 ± 7.94 / 65.20 ± 6.98 - 68.42 ± 1.21 / 83.97 ± 0.74 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - google/gemma-2-9b-it (few-shot) 9242 @@ -839,7 +822,7 @@ title: English NLU 🇬🇧 32 2047 True - 3,194 ± 687 / 650 ± 216 + 8,681 ± 1,650 / 2,177 ± 717 1.90 68.69 ± 1.08 / 58.53 ± 1.82 66.77 ± 0.54 / 70.41 ± 0.88 @@ -850,6 +833,23 @@ title: English NLU 🇬🇧 14.0.4 14.0.4 + + mistralai/Ministral-8B-Instruct-2410 (few-shot) + 8020 + 131 + 32768 + True + 1,302 ± 323 / 253 ± 86 + 1.90 + 72.40 ± 0.80 / 65.83 ± 1.64 + 63.46 ± 2.10 / 69.49 ± 1.15 + 35.86 ± 7.94 / 65.20 ± 6.98 + 68.42 ± 1.21 / 83.97 ± 0.74 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 01-ai/Yi-1.5-6B (few-shot) 6061 @@ -901,23 +901,6 @@ title: English NLU 🇬🇧 14.0.4 14.0.4 - - FacebookAI/xlm-roberta-large - 559 - 250 - 512 - True - 17,897 ± 3,921 / 3,463 ± 1,141 - 1.92 - 89.81 ± 0.60 / 89.25 ± 0.72 - 41.97 ± 17.48 / 50.33 ± 9.16 - 35.55 ± 18.61 / 63.79 ± 12.17 - 68.88 ± 1.40 / 79.18 ± 1.17 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - meta-llama/Llama-3.1-8B (few-shot) 8030 @@ -952,6 +935,23 @@ title: English NLU 🇬🇧 14.1.1 14.1.1 + + FacebookAI/xlm-roberta-large + 559 + 250 + 512 + True + 17,897 ± 3,921 / 3,463 ± 1,141 + 1.93 + 89.81 ± 0.60 / 89.25 ± 0.72 + 41.97 ± 17.48 / 50.33 ± 9.16 + 35.55 ± 18.61 / 63.79 ± 12.17 + 68.88 ± 1.40 / 79.18 ± 1.17 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + meta-llama/Meta-Llama-3-70B-Instruct (few-shot, val) 70554 @@ -1037,6 +1037,23 @@ title: English NLU 🇬🇧 9.1.2 12.5.1 + + NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot) + 46998 + 68 + 32768 + True + 9,015 ± 2,966 / 1,121 ± 510 + 2.00 + 63.77 ± 2.22 / 57.86 ± 1.49 + 69.23 ± 0.98 / 66.61 ± 1.42 + 38.49 ± 4.39 / 65.15 ± 4.44 + 57.03 ± 4.40 / 76.18 ± 2.59 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + alpindale/Mistral-7B-v0.2-hf (few-shot) 7242 @@ -1044,7 +1061,7 @@ title: English NLU 🇬🇧 32768 True 1,841 ± 297 / 651 ± 193 - 1.99 + 2.00 61.02 ± 2.70 / 55.57 ± 2.50 67.10 ± 0.81 / 70.66 ± 0.76 29.82 ± 5.18 / 62.86 ± 4.72 @@ -1054,6 +1071,23 @@ title: English NLU 🇬🇧 12.5.2 12.5.2 + + meta-llama/Meta-Llama-3-8B-Instruct (few-shot) + 8030 + 128 + 8192 + True + 1,483 ± 377 / 287 ± 97 + 2.00 + 75.02 ± 1.31 / 69.47 ± 1.18 + 67.64 ± 1.12 / 71.04 ± 1.17 + 32.29 ± 3.05 / 64.85 ± 2.07 + 54.84 ± 2.22 / 79.10 ± 1.10 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + mistralai/Mistral-7B-v0.3 (few-shot) 7248 @@ -1061,7 +1095,7 @@ title: English NLU 🇬🇧 32768 True 1,364 ± 343 / 266 ± 90 - 1.99 + 2.00 61.02 ± 2.74 / 55.65 ± 2.55 67.29 ± 0.80 / 70.81 ± 0.84 30.10 ± 5.12 / 62.99 ± 4.71 @@ -1078,7 +1112,7 @@ title: English NLU 🇬🇧 32768 False 2,413 ± 463 / 700 ± 220 - 1.99 + 2.00 67.52 ± 1.19 / 59.09 ± 2.64 63.10 ± 1.92 / 70.11 ± 0.80 37.75 ± 2.52 / 67.53 ± 1.75 @@ -1088,40 +1122,6 @@ title: English NLU 🇬🇧 12.6.1 12.6.1 - - NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot) - 46998 - 68 - 32768 - True - 9,015 ± 2,966 / 1,121 ± 510 - 2.00 - 63.77 ± 2.22 / 57.86 ± 1.49 - 69.23 ± 0.98 / 66.61 ± 1.42 - 38.49 ± 4.39 / 65.15 ± 4.44 - 57.03 ± 4.40 / 76.18 ± 2.59 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - - - meta-llama/Meta-Llama-3-8B-Instruct (few-shot) - 8030 - 128 - 8192 - True - 1,007 ± 316 / 162 ± 45 - 2.00 - 75.02 ± 1.31 / 69.47 ± 1.18 - 67.64 ± 1.12 / 71.04 ± 1.17 - 32.29 ± 3.05 / 64.85 ± 2.07 - 54.84 ± 2.22 / 79.10 ± 1.10 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - VAGOsolutions/SauerkrautLM-7b-HerO (few-shot, val) 7242 @@ -1190,30 +1190,13 @@ title: English NLU 🇬🇧 14.0.4 14.0.4 - - meta-llama/Llama-3.1-8B-Instruct (few-shot) - 8030 - 128 - 131200 - True - 1,005 ± 330 / 196 ± 74 - 2.03 - 76.95 ± 0.95 / 72.47 ± 0.82 - 68.12 ± 0.92 / 72.48 ± 0.53 - 34.34 ± 3.37 / 65.84 ± 1.59 - 47.88 ± 3.37 / 76.21 ± 1.69 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - meta-llama/Meta-Llama-3-8B (few-shot) 8030 128 8192 True - 1,335 ± 338 / 260 ± 88 + 1,477 ± 376 / 285 ± 97 2.03 66.31 ± 2.09 / 58.68 ± 1.95 64.30 ± 0.65 / 69.26 ± 0.50 @@ -1224,6 +1207,23 @@ title: English NLU 🇬🇧 12.6.1 12.6.1 + + meta-llama/Llama-3.1-8B-Instruct (few-shot) + 8030 + 128 + 131200 + True + 1,473 ± 377 / 283 ± 96 + 2.04 + 76.95 ± 0.95 / 72.47 ± 0.82 + 68.12 ± 0.92 / 72.48 ± 0.53 + 34.34 ± 3.37 / 65.84 ± 1.59 + 47.88 ± 3.37 / 76.21 ± 1.69 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + occiglot/occiglot-7b-de-en-instruct (few-shot) 7242 @@ -1265,7 +1265,7 @@ title: English NLU 🇬🇧 512 True 36,949 ± 10,732 / 6,211 ± 2,022 - 2.06 + 2.07 87.70 ± 0.70 / 87.39 ± 0.55 54.00 ± 1.92 / 58.25 ± 2.11 39.99 ± 7.03 / 68.38 ± 4.21 @@ -1343,23 +1343,6 @@ title: English NLU 🇬🇧 14.1.2 14.0.4 - - google/gemma-7b-it (few-shot) - 8538 - 256 - 8317 - False - 1,792 ± 249 / 668 ± 203 - 2.11 - 66.70 ± 0.99 / 61.08 ± 1.16 - 55.62 ± 2.54 / 64.98 ± 2.03 - 31.36 ± 2.63 / 65.21 ± 1.16 - 72.58 ± 0.68 / 84.67 ± 0.91 - 12.10.0 - 12.10.0 - 12.10.0 - 12.10.0 - ibm-granite/granite-8b-code-base-4k (few-shot) 8055 @@ -1394,6 +1377,23 @@ title: English NLU 🇬🇧 12.10.5 12.10.5 + + google/gemma-7b-it (few-shot) + 8538 + 256 + 8317 + False + 1,792 ± 249 / 668 ± 203 + 2.12 + 66.70 ± 0.99 / 61.08 ± 1.16 + 55.62 ± 2.54 / 64.98 ± 2.03 + 31.36 ± 2.63 / 65.21 ± 1.16 + 72.58 ± 0.68 / 84.67 ± 0.91 + 12.10.0 + 12.10.0 + 12.10.0 + 12.10.0 + meta-llama/Llama-2-13b-hf (few-shot) 13016 @@ -1428,23 +1428,6 @@ title: English NLU 🇬🇧 9.3.1 9.3.1 - - ibm-granite/granite-3.0-2b-instruct (few-shot) - 2634 - 49 - 4224 - True - 10,194 ± 2,403 / 2,193 ± 731 - 2.14 - 61.97 ± 1.74 / 54.58 ± 1.53 - 67.54 ± 1.33 / 66.16 ± 1.54 - 31.70 ± 2.00 / 65.43 ± 1.09 - 59.78 ± 1.66 / 78.74 ± 0.76 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - google-bert/bert-base-multilingual-cased 177 @@ -1462,6 +1445,23 @@ title: English NLU 🇬🇧 0.0.0 0.0.0 + + ibm-granite/granite-3.0-2b-instruct (few-shot) + 2634 + 49 + 4224 + True + 10,194 ± 2,403 / 2,193 ± 731 + 2.15 + 61.97 ± 1.74 / 54.58 ± 1.53 + 67.54 ± 1.33 / 66.16 ± 1.54 + 31.70 ± 2.00 / 65.43 ± 1.09 + 59.78 ± 1.66 / 78.74 ± 0.76 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + meta-llama/Llama-3.2-3B-Instruct (few-shot) 3213 @@ -1513,23 +1513,6 @@ title: English NLU 🇬🇧 12.3.1 12.3.1 - - senseable/WestLake-7B-v2 (few-shot) - 7242 - 32 - 32768 - False - 5,993 ± 1,028 / 1,742 ± 561 - 2.15 - 70.62 ± 0.90 / 58.92 ± 2.15 - 67.78 ± 1.03 / 72.29 ± 0.47 - 30.99 ± 2.94 / 62.20 ± 2.56 - 49.56 ± 2.85 / 76.72 ± 1.15 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - google/gemma-7b (few-shot) 8538 @@ -1547,6 +1530,23 @@ title: English NLU 🇬🇧 12.9.1 12.9.1 + + senseable/WestLake-7B-v2 (few-shot) + 7242 + 32 + 32768 + False + 5,993 ± 1,028 / 1,742 ± 561 + 2.16 + 70.62 ± 0.90 / 58.92 ± 2.15 + 67.78 ± 1.03 / 72.29 ± 0.47 + 30.99 ± 2.94 / 62.20 ± 2.56 + 49.56 ± 2.85 / 76.72 ± 1.15 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + Qwen/Qwen1.5-4B (few-shot) 3950 @@ -1673,7 +1673,7 @@ title: English NLU 🇬🇧 32768 False 2,088 ± 352 / 706 ± 214 - 2.20 + 2.21 56.90 ± 3.08 / 51.16 ± 2.56 62.10 ± 1.65 / 68.81 ± 0.76 20.17 ± 3.68 / 54.76 ± 4.24 @@ -1690,7 +1690,7 @@ title: English NLU 🇬🇧 512 True 34,520 ± 7,443 / 6,730 ± 2,224 - 2.23 + 2.24 85.88 ± 0.99 / 86.21 ± 0.87 51.46 ± 1.15 / 51.20 ± 0.50 35.83 ± 11.08 / 65.86 ± 4.95 @@ -1717,23 +1717,6 @@ title: English NLU 🇬🇧 0.0.0 0.0.0 - - Twitter/twhin-bert-large - 562 - 250 - 512 - True - 9,707 ± 1,664 / 2,549 ± 831 - 2.25 - 89.50 ± 0.47 / 89.39 ± 0.41 - 45.98 ± 2.97 / 49.81 ± 2.03 - 30.58 ± 13.07 / 61.93 ± 7.87 - 48.44 ± 1.37 / 59.47 ± 1.12 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - meta-llama/Llama-2-7b-hf (few-shot) 6738 @@ -1751,6 +1734,23 @@ title: English NLU 🇬🇧 9.2.0 12.5.1 + + Twitter/twhin-bert-large + 562 + 250 + 512 + True + 9,707 ± 1,664 / 2,549 ± 831 + 2.26 + 89.50 ± 0.47 / 89.39 ± 0.41 + 45.98 ± 2.97 / 49.81 ± 2.03 + 30.58 ± 13.07 / 61.93 ± 7.87 + 48.44 ± 1.37 / 59.47 ± 1.12 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + cstr/Spaetzle-v8-7b (few-shot, val) 7242 @@ -1809,7 +1809,7 @@ title: English NLU 🇬🇧 32768 False 2,370 ± 416 / 711 ± 242 - 2.27 + 2.28 62.11 ± 1.61 / 52.36 ± 2.00 59.91 ± 2.10 / 68.92 ± 1.21 30.66 ± 3.60 / 64.32 ± 2.03 @@ -1819,6 +1819,23 @@ title: English NLU 🇬🇧 9.3.1 12.4.0 + + occiglot/occiglot-7b-eu5 (few-shot) + 7242 + 32 + 32768 + True + 2,219 ± 427 / 717 ± 224 + 2.29 + 55.37 ± 2.94 / 51.08 ± 2.87 + 63.32 ± 1.29 / 68.50 ± 0.53 + 18.92 ± 2.39 / 57.96 ± 1.89 + 72.38 ± 2.57 / 83.46 ± 1.49 + 12.5.2 + 12.1.0 + 12.1.0 + 12.1.0 + meta-llama/Llama-2-7b-chat-hf (few-shot) 6738 @@ -1826,7 +1843,7 @@ title: English NLU 🇬🇧 4096 False 2,643 ± 455 / 800 ± 247 - 2.29 + 2.30 62.53 ± 1.35 / 53.42 ± 2.04 62.23 ± 1.29 / 68.09 ± 1.34 22.71 ± 1.81 / 60.79 ± 1.08 @@ -1843,7 +1860,7 @@ title: English NLU 🇬🇧 131072 True 7,312 ± 1,668 / 1,609 ± 525 - 2.29 + 2.30 64.09 ± 0.96 / 49.92 ± 2.47 46.77 ± 4.36 / 60.99 ± 2.15 31.62 ± 2.25 / 63.73 ± 1.79 @@ -1853,23 +1870,6 @@ title: English NLU 🇬🇧 12.9.1 12.9.1 - - occiglot/occiglot-7b-eu5 (few-shot) - 7242 - 32 - 32768 - True - 2,219 ± 427 / 717 ± 224 - 2.29 - 55.37 ± 2.94 / 51.08 ± 2.87 - 63.32 ± 1.29 / 68.50 ± 0.53 - 18.92 ± 2.39 / 57.96 ± 1.89 - 72.38 ± 2.57 / 83.46 ± 1.49 - 12.5.2 - 12.1.0 - 12.1.0 - 12.1.0 - NorwAI/NorwAI-Mixtral-8x7B (few-shot) 46998 @@ -1894,7 +1894,7 @@ title: English NLU 🇬🇧 131200 True 3,713 ± 877 / 836 ± 267 - 2.35 + 2.36 59.09 ± 1.44 / 52.03 ± 1.96 63.29 ± 1.29 / 67.82 ± 0.74 13.50 ± 4.14 / 50.33 ± 5.61 @@ -2081,7 +2081,7 @@ title: English NLU 🇬🇧 512 True 11,514 ± 2,041 / 2,862 ± 918 - 2.48 + 2.49 87.77 ± 0.51 / 87.83 ± 0.47 41.09 ± 4.38 / 48.17 ± 3.16 32.26 ± 10.79 / 61.20 ± 5.53 @@ -2091,23 +2091,6 @@ title: English NLU 🇬🇧 12.6.1 12.6.1 - - sentence-transformers/stsb-xlm-r-multilingual - 278 - 250 - 512 - True - 15,040 ± 2,953 / 3,417 ± 1,100 - 2.48 - 82.39 ± 0.62 / 83.07 ± 0.48 - 57.35 ± 1.00 / 54.82 ± 0.65 - 47.29 ± 2.10 / 71.85 ± 1.30 - 4.29 ± 0.33 / 10.53 ± 0.29 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - ibm-granite/granite-3.0-3b-a800m-base (few-shot) 3374 @@ -2125,6 +2108,23 @@ title: English NLU 🇬🇧 13.0.0 13.0.0 + + sentence-transformers/stsb-xlm-r-multilingual + 278 + 250 + 512 + True + 15,040 ± 2,953 / 3,417 ± 1,100 + 2.49 + 82.39 ± 0.62 / 83.07 ± 0.48 + 57.35 ± 1.00 / 54.82 ± 0.65 + 47.29 ± 2.10 / 71.85 ± 1.30 + 4.29 ± 0.33 / 10.53 ± 0.29 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + allenai/OLMo-1.7-7B-hf (few-shot) 6888 @@ -2210,23 +2210,6 @@ title: English NLU 🇬🇧 12.6.1 12.6.1 - - distilbert/distilbert-base-multilingual-cased - 135 - 120 - 512 - True - 26,355 ± 5,946 / 5,266 ± 1,714 - 2.55 - 87.70 ± 0.68 / 87.67 ± 0.59 - 36.48 ± 3.00 / 45.07 ± 1.32 - 40.79 ± 2.42 / 68.71 ± 2.04 - 29.00 ± 0.89 / 40.37 ± 0.77 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot) 20918 @@ -2244,6 +2227,23 @@ title: English NLU 🇬🇧 9.3.1 9.3.1 + + distilbert/distilbert-base-multilingual-cased + 135 + 120 + 512 + True + 26,355 ± 5,946 / 5,266 ± 1,714 + 2.56 + 87.70 ± 0.68 / 87.67 ± 0.59 + 36.48 ± 3.00 / 45.07 ± 1.32 + 40.79 ± 2.42 / 68.71 ± 2.04 + 29.00 ± 0.89 / 40.37 ± 0.77 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + nvidia/mistral-nemo-minitron-8b-instruct (few-shot) 8414 @@ -2278,23 +2278,6 @@ title: English NLU 🇬🇧 13.2.0 13.2.0 - - AI-Sweden-Models/gpt-sw3-20b (few-shot) - 20918 - 64 - 2048 - True - 1,875 ± 673 / 261 ± 91 - 2.62 - 45.86 ± 3.18 / 40.23 ± 2.41 - 62.08 ± 3.29 / 55.11 ± 1.68 - 6.62 ± 2.43 / 48.79 ± 3.77 - 65.29 ± 1.81 / 77.71 ± 0.98 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot) 7800 @@ -2312,6 +2295,23 @@ title: English NLU 🇬🇧 13.0.0 13.0.0 + + AI-Sweden-Models/gpt-sw3-20b (few-shot) + 20918 + 64 + 2048 + True + 1,875 ± 673 / 261 ± 91 + 2.63 + 45.86 ± 3.18 / 40.23 ± 2.41 + 62.08 ± 3.29 / 55.11 ± 1.68 + 6.62 ± 2.43 / 48.79 ± 3.77 + 65.29 ± 1.81 / 77.71 ± 0.98 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + Geotrend/distilbert-base-25lang-cased 109 @@ -2404,7 +2404,7 @@ title: English NLU 🇬🇧 8192 True 16,249 ± 3,690 / 3,689 ± 1,226 - 2.74 + 2.75 41.57 ± 4.29 / 37.51 ± 3.05 62.32 ± 1.12 / 67.09 ± 0.96 8.04 ± 3.17 / 48.16 ± 5.38 @@ -2421,7 +2421,7 @@ title: English NLU 🇬🇧 131200 False 7,436 ± 1,846 / 1,508 ± 479 - 2.74 + 2.75 56.41 ± 1.79 / 52.05 ± 1.57 59.46 ± 1.16 / 65.61 ± 1.08 8.36 ± 0.71 / 49.50 ± 2.90 @@ -2431,6 +2431,23 @@ title: English NLU 🇬🇧 13.0.0 13.0.0 + + tiiuae/Falcon3-1B-Instruct (few-shot) + 1669 + 131 + 8192 + True + 9,270 ± 2,690 / 1,434 ± 437 + 2.78 + 43.00 ± 1.94 / 39.96 ± 1.87 + 54.47 ± 1.37 / 64.39 ± 0.56 + 17.44 ± 1.67 / 50.79 ± 2.44 + 53.15 ± 1.38 / 70.06 ± 0.79 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + LumiOpen/Viking-13B (few-shot) 14030 @@ -2455,7 +2472,7 @@ title: English NLU 🇬🇧 32768 False 8,304 ± 1,846 / 1,933 ± 617 - 2.80 + 2.81 40.89 ± 2.63 / 37.44 ± 2.39 55.33 ± 1.77 / 64.53 ± 0.70 11.23 ± 1.81 / 52.85 ± 2.65 @@ -2523,7 +2540,7 @@ title: English NLU 🇬🇧 8192 True 6,087 ± 1,046 / 1,902 ± 563 - 2.88 + 2.89 19.65 ± 5.96 / 18.64 ± 5.49 62.14 ± 1.16 / 67.81 ± 0.65 8.30 ± 1.63 / 45.01 ± 3.82 @@ -2567,23 +2584,6 @@ title: English NLU 🇬🇧 12.5.2 12.5.2 - - google/gemma-2b-it (few-shot) - 2506 - 256 - 8192 - False - 6,471 ± 1,142 / 1,961 ± 584 - 2.95 - 40.05 ± 2.56 / 33.77 ± 1.94 - 48.83 ± 1.00 / 60.88 ± 0.70 - 5.83 ± 1.52 / 50.74 ± 1.73 - 63.77 ± 1.40 / 76.59 ± 0.77 - 12.5.2 - 12.1.0 - 12.1.0 - 12.4.0 - NorwAI/NorwAI-Llama2-7B (few-shot) 7033 @@ -2601,6 +2601,23 @@ title: English NLU 🇬🇧 12.10.4 12.10.4 + + google/gemma-2b-it (few-shot) + 2506 + 256 + 8192 + False + 6,471 ± 1,142 / 1,961 ± 584 + 2.96 + 40.05 ± 2.56 / 33.77 ± 1.94 + 48.83 ± 1.00 / 60.88 ± 0.70 + 5.83 ± 1.52 / 50.74 ± 1.73 + 63.77 ± 1.40 / 76.59 ± 0.77 + 12.5.2 + 12.1.0 + 12.1.0 + 12.4.0 + ibm-granite/granite-3.0-1b-a400m-base (few-shot) 1385 @@ -2659,7 +2676,7 @@ title: English NLU 🇬🇧 4096 True 3,035 ± 503 / 911 ± 300 - 3.02 + 3.03 35.84 ± 5.12 / 35.76 ± 4.29 56.87 ± 8.16 / 59.62 ± 5.61 3.08 ± 2.98 / 40.66 ± 4.63 @@ -2693,7 +2710,7 @@ title: English NLU 🇬🇧 32768 False 11,740 ± 3,000 / 2,209 ± 721 - 3.08 + 3.09 33.86 ± 2.16 / 32.80 ± 2.21 55.41 ± 2.17 / 54.48 ± 1.65 1.15 ± 1.81 / 34.47 ± 1.12 @@ -2703,23 +2720,6 @@ title: English NLU 🇬🇧 12.1.0 12.5.0 - - HuggingFaceTB/SmolLM2-360M-Instruct (few-shot) - 362 - 49 - 8192 - True - 21,777 ± 6,115 / 3,617 ± 1,211 - 3.14 - 30.73 ± 4.30 / 29.47 ± 4.10 - 59.51 ± 3.73 / 54.82 ± 2.43 - 1.55 ± 1.90 / 43.18 ± 5.08 - 49.03 ± 1.47 / 60.00 ± 1.53 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - Qwen/Qwen1.5-0.5B (few-shot) 620 @@ -2737,6 +2737,23 @@ title: English NLU 🇬🇧 12.1.0 12.1.0 + + HuggingFaceTB/SmolLM2-360M-Instruct (few-shot) + 362 + 49 + 8192 + True + 21,777 ± 6,115 / 3,617 ± 1,211 + 3.15 + 30.73 ± 4.30 / 29.47 ± 4.10 + 59.51 ± 3.73 / 54.82 ± 2.43 + 1.55 ± 1.90 / 43.18 ± 5.08 + 49.03 ± 1.47 / 60.00 ± 1.53 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + allenai/OLMo-1B (few-shot) 1177 @@ -2744,7 +2761,7 @@ title: English NLU 🇬🇧 2176 True 8,536 ± 1,926 / 1,940 ± 619 - 3.16 + 3.17 26.47 ± 6.25 / 28.27 ± 5.35 60.05 ± 3.94 / 56.18 ± 1.90 0.72 ± 1.90 / 42.84 ± 3.50 @@ -2761,7 +2778,7 @@ title: English NLU 🇬🇧 4096 False 1,254 ± 328 / 243 ± 83 - 3.17 + 3.18 50.73 ± 2.64 / 38.64 ± 1.60 27.52 ± 3.38 / 31.81 ± 3.98 2.96 ± 2.64 / 35.23 ± 1.82 @@ -2778,7 +2795,7 @@ title: English NLU 🇬🇧 512 True 33,753 ± 8,349 / 5,937 ± 1,946 - 3.17 + 3.18 81.71 ± 0.66 / 82.33 ± 0.53 50.69 ± 1.19 / 50.90 ± 0.50 2.16 ± 1.58 / 49.99 ± 1.47 @@ -2795,7 +2812,7 @@ title: English NLU 🇬🇧 512 True 26,458 ± 5,992 / 5,274 ± 1,731 - 3.17 + 3.18 81.71 ± 0.66 / 82.33 ± 0.53 50.69 ± 1.19 / 50.90 ± 0.50 2.16 ± 1.58 / 49.99 ± 1.47 @@ -2812,7 +2829,7 @@ title: English NLU 🇬🇧 32896 True 2,722 ± 495 / 766 ± 250 - 3.17 + 3.18 28.63 ± 4.74 / 27.07 ± 4.13 66.55 ± 0.72 / 58.18 ± 0.62 1.47 ± 1.57 / 45.89 ± 2.92 @@ -2880,7 +2897,7 @@ title: English NLU 🇬🇧 4096 True 1,438 ± 410 / 233 ± 79 - 3.27 + 3.28 44.48 ± 3.17 / 36.31 ± 2.23 23.69 ± 3.36 / 25.98 ± 3.59 8.52 ± 2.60 / 51.57 ± 2.62 @@ -2897,7 +2914,7 @@ title: English NLU 🇬🇧 8192 True 22,023 ± 6,203 / 3,675 ± 1,231 - 3.28 + 3.29 31.14 ± 1.79 / 28.54 ± 0.86 43.97 ± 5.28 / 55.08 ± 4.26 3.49 ± 2.49 / 46.52 ± 4.13 @@ -2924,23 +2941,6 @@ title: English NLU 🇬🇧 14.1.2 14.0.4 - - PleIAs/Pleias-Nano (few-shot) - 1195 - 66 - 2048 - True - 2,519 ± 841 / 323 ± 104 - 3.56 - 21.60 ± 4.23 / 23.22 ± 3.33 - 45.04 ± 5.19 / 50.60 ± 4.14 - -0.46 ± 1.47 / 44.56 ± 3.38 - 33.46 ± 2.84 / 44.25 ± 3.38 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 - sentence-transformers/distiluse-base-multilingual-cased-v2 135 @@ -2975,6 +2975,23 @@ title: English NLU 🇬🇧 12.6.1 12.6.1 + + PleIAs/Pleias-Nano (few-shot) + 1195 + 66 + 2048 + True + 2,519 ± 841 / 323 ± 104 + 3.57 + 21.60 ± 4.23 / 23.22 ± 3.33 + 45.04 ± 5.19 / 50.60 ± 4.14 + -0.46 ± 1.47 / 44.56 ± 3.38 + 33.46 ± 2.84 / 44.25 ± 3.38 + 14.0.4 + 14.1.2 + 14.1.2 + 14.0.4 + sentence-transformers/distiluse-base-multilingual-cased-v1 135 @@ -3050,7 +3067,7 @@ title: English NLU 🇬🇧 131072 True 3,424 ± 1,080 / 464 ± 158 - 3.95 + 3.96 3.98 ± 3.16 / 3.68 ± 2.99 39.54 ± 6.40 / 48.55 ± 7.75 3.41 ± 1.85 / 41.04 ± 4.56 @@ -3101,7 +3118,7 @@ title: English NLU 🇬🇧 8192 True 25,602 ± 7,583 / 3,953 ± 1,325 - 4.02 + 4.03 29.96 ± 3.19 / 28.98 ± 3.29 18.64 ± 8.52 / 28.83 ± 5.86 1.85 ± 1.20 / 44.03 ± 3.98 @@ -3118,7 +3135,7 @@ title: English NLU 🇬🇧 2048 True 10,242 ± 3,432 / 1,335 ± 484 - 4.03 + 4.04 31.79 ± 3.88 / 31.32 ± 2.81 19.13 ± 9.92 / 33.51 ± 6.97 -0.03 ± 1.07 / 36.37 ± 2.34 @@ -3152,7 +3169,7 @@ title: English NLU 🇬🇧 512 True 12,783 ± 2,537 / 2,712 ± 885 - 4.08 + 4.09 68.25 ± 2.50 / 70.44 ± 2.50 1.92 ± 1.68 / 28.84 ± 2.38 1.08 ± 1.34 / 47.42 ± 1.71 @@ -3203,7 +3220,7 @@ title: English NLU 🇬🇧 1024 True 11,734 ± 3,124 / 2,174 ± 720 - 4.80 + 4.81 1.55 ± 1.98 / 1.45 ± 1.82 3.71 ± 3.16 / 22.09 ± 2.08 -0.42 ± 1.56 / 40.58 ± 3.74 @@ -3220,7 +3237,7 @@ title: English NLU 🇬🇧 4096 True 3,024 ± 496 / 909 ± 301 - 4.84 + 4.85 12.34 ± 2.70 / 12.41 ± 2.54 -1.48 ± 3.09 / 21.17 ± 2.22 -0.48 ± 1.52 / 42.45 ± 3.99 @@ -3237,7 +3254,7 @@ title: English NLU 🇬🇧 8192 True 7,692 ± 1,423 / 1,960 ± 644 - 4.94 + 4.95 0.00 ± 0.00 / 0.00 ± 0.00 0.00 ± 0.00 / 19.61 ± 0.22 2.48 ± 1.94 / 34.52 ± 0.85 @@ -3254,7 +3271,7 @@ title: English NLU 🇬🇧 512 True 5,847 ± 1,029 / 1,640 ± 525 - 4.99 + 5.00 0.00 ± 0.00 / 0.00 ± 0.00 0.00 ± 0.00 / 19.61 ± 0.22 0.41 ± 0.55 / 33.46 ± 0.37 diff --git a/faroese-nlu.csv b/faroese-nlu.csv index a304928e..9a47a235 100644 --- a/faroese-nlu.csv +++ b/faroese-nlu.csv @@ -2,65 +2,65 @@ model_id,num_model_parameters,vocabulary_size,max_sequence_length,commercially_l "gpt-4-1106-preview (few-shot, val)",-1,100,128000,True,False,576,2.1,86.51,38.22,35.09,58.65 vesteinn/FoBERT,124,50,512,True,False,15623,2.19,91.31,10.69,64.39,27.09 meta-llama/Llama-3.1-405B-Instruct-FP8 (few-shot),405869,128,131072,True,False,799,2.38,81.95,53.25,14.29,60.41 -Qwen/Qwen2.5-72B-Instruct (few-shot),72706,152,32768,True,False,1219,2.42,76.56,46.61,23.45,55.28 +Qwen/Qwen2.5-72B-Instruct (few-shot),72706,152,32768,True,False,1219,2.41,76.56,46.61,23.45,55.28 meta-llama/Llama-3.1-70B-Instruct (few-shot),70554,128,131072,True,False,1409,2.42,79.04,52.33,15.72,59.08 ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot),28411,256,4096,True,False,3633,2.52,81.3,60.99,12.49,48.47 meta-llama/Llama-3.3-70B-Instruct (few-shot),70554,128,131072,True,False,1353,2.54,78.46,51.6,17.25,50.98 NbAiLab/nb-roberta-base-scandi-1e4,277,250,512,True,False,15074,2.65,90.52,11.53,44.99,25.14 -"gpt-4o-2024-05-13 (zero-shot, val)",-1,200,8191,True,False,637,2.69,68.0,27.3,28.09,58.59 +"gpt-4o-2024-05-13 (zero-shot, val)",-1,200,8191,True,False,637,2.68,68.0,27.3,28.09,58.59 CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,2.72,73.26,47.71,10.41,57.08 Qwen/QwQ-32B-Preview (few-shot),32764,152,32768,True,False,2258,2.72,77.19,45.55,16.92,47.51 -nvidia/Llama-3.1-Nemotron-70B-Instruct-HF (few-shot),70554,128,131072,True,False,1208,2.73,64.88,52.56,14.28,50.11 -sentence-transformers/use-cmlm-multilingual,470,501,512,True,False,30231,2.78,88.81,15.45,30.92,25.48 -microsoft/mdeberta-v3-base,278,251,512,True,False,20637,2.81,88.6,6.7,46.81,20.96 -"gpt-4o-mini-2024-07-18 (zero-shot, val)",-1,200,8191,True,False,908,2.86,53.92,35.05,23.12,54.99 +nvidia/Llama-3.1-Nemotron-70B-Instruct-HF (few-shot),70554,128,131072,True,False,1208,2.72,64.88,52.56,14.28,50.11 +sentence-transformers/use-cmlm-multilingual,470,501,512,True,False,30231,2.79,88.81,15.45,30.92,25.48 +microsoft/mdeberta-v3-base,278,251,512,True,False,20637,2.82,88.6,6.7,46.81,20.96 +"gpt-4o-mini-2024-07-18 (zero-shot, val)",-1,200,8191,True,False,908,2.85,53.92,35.05,23.12,54.99 "claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,2.91,72.52,8.17,32.38,45.34 -meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1007,3.02,70.61,45.78,4.58,50.67 +meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1483,3.02,70.61,45.78,4.58,50.67 setu4993/LaBSE,470,501,512,True,False,25418,3.02,89.16,21.57,22.76,30.55 -"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,3.06,73.8,39.45,34.78,0.87 -"gpt-4-1106-preview (zero-shot, val)",-1,100,8191,True,False,436,3.08,57.52,13.18,28.03,59.06 -meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1005,3.09,67.67,48.54,3.89,47.07 +"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,3.05,73.8,39.45,34.78,0.87 +"gpt-4-1106-preview (zero-shot, val)",-1,100,8191,True,False,436,3.07,57.52,13.18,28.03,59.06 +meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1473,3.09,67.67,48.54,3.89,47.07 mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,3.11,67.97,45.19,5.21,43.26 "gpt-4o-2024-05-13 (few-shot, val)",-1,200,128000,True,False,916,3.2,81.86,27.3,-0.97,56.45 +nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,3.2,66.18,39.48,5.78,44.81 pere/roberta-base-exp-32,277,250,512,True,False,15081,3.2,90.6,11.28,22.86,24.9 -nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,3.21,66.18,39.48,5.78,44.81 -mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot),341029,256,4096,True,False,1904,3.25,55.31,51.62,15.07,18.22 -vesteinn/ScandiBERT-no-faroese,124,50,512,True,False,15436,3.27,88.14,6.88,27.71,20.47 +mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot),341029,256,4096,True,False,1904,3.24,55.31,51.62,15.07,18.22 +vesteinn/ScandiBERT-no-faroese,124,50,512,True,False,15436,3.28,88.14,6.88,27.71,20.47 mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot),46703,32,32768,True,False,5535,3.29,63.43,43.99,9.38,25.35 CohereForAI/c4ai-command-r-v01 (few-shot),34981,256,8192,False,False,1919,3.38,64.53,44.01,2.39,34.84 -NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,3.4,58.62,40.74,5.58,28.04 -mideind/IceBERT-xlmr-ic3,277,250,512,True,False,11004,3.42,87.79,7.8,22.51,11.16 +NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,3.39,58.62,40.74,5.58,28.04 +mideind/IceBERT-xlmr-ic3,277,250,512,True,False,11004,3.43,87.79,7.8,22.51,11.16 google/rembert,575,250,256,True,False,11736,3.45,87.35,0.04,14.65,36.1 -intfloat/multilingual-e5-large-instruct,560,250,514,True,False,5947,3.46,88.64,23.63,2.05,24.09 +intfloat/multilingual-e5-large-instruct,560,250,514,True,False,5947,3.47,88.64,23.63,2.05,24.09 mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,3.48,65.55,29.49,2.05,47.72 -NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,3.52,62.6,31.36,5.21,34.26 -intfloat/multilingual-e5-large,559,250,512,True,False,6732,3.52,88.39,18.28,2.85,31.03 +NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,3.51,62.6,31.36,5.21,34.26 jonfd/electra-small-nordic,22,96,128,True,False,5989,3.52,85.8,-0.16,30.88,0.0 +intfloat/multilingual-e5-large,559,250,512,True,False,6732,3.53,88.39,18.28,2.85,31.03 +CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,3.54,64.72,28.57,5.12,38.83 meta-llama/Llama-3.1-8B (few-shot),8030,128,131072,True,False,2986,3.54,65.65,24.3,0.61,45.01 -CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,3.55,64.72,28.57,5.12,38.83 mistralai/Mistral-7B-v0.1 (few-shot),7242,32,32768,True,False,1446,3.55,62.63,25.57,2.84,44.06 meta-llama/Llama-3.2-3B-Instruct (few-shot),3213,128,131200,False,False,10424,3.56,58.24,32.79,1.77,45.13 -mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,3.57,61.32,26.73,1.3,44.98 +mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,3.56,61.32,26.73,1.3,44.98 Mabeck/Heidrun-Mistral-7B-chat (few-shot),7242,32,32768,False,False,1419,3.59,63.4,26.9,2.16,40.73 -mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,3.6,61.28,32.07,1.68,39.0 -utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,3.6,54.79,35.84,0.0,41.94 +mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,3.59,61.28,32.07,1.68,39.0 +utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,3.59,54.79,35.84,0.0,41.94 ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4224,True,False,2515,3.61,61.47,24.35,1.44,41.54 -Geotrend/bert-base-25lang-cased,151,85,512,True,False,13908,3.62,86.09,5.17,15.24,14.82 -NbAiLab/nb-sbert-base,177,120,512,True,False,17757,3.62,86.2,19.12,11.8,7.47 -meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1335,3.62,61.11,19.4,2.02,50.34 +meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1477,3.62,61.11,19.4,2.02,50.34 +Geotrend/bert-base-25lang-cased,151,85,512,True,False,13908,3.63,86.09,5.17,15.24,14.82 +NbAiLab/nb-sbert-base,177,120,512,True,False,17757,3.63,86.2,19.12,11.8,7.47 ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,3.63,59.96,28.33,2.24,39.52 AI-Sweden-Models/roberta-large-1160k,354,50,512,True,False,14014,3.68,88.24,6.42,1.73,35.08 ltg/norbert3-base,124,50,512,True,False,11405,3.68,86.94,1.08,12.35,24.57 mhenrichsen/hestenettetLM (few-shot),7242,32,4096,True,False,1151,3.68,61.81,24.67,1.32,35.62 -nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,3.69,67.72,42.98,6.4,0.07 +nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,3.68,67.72,42.98,6.4,0.07 intfloat/multilingual-e5-base,278,250,512,True,False,14965,3.73,87.44,10.97,7.38,14.8 mideind/IceBERT-large,406,50,512,True,False,5677,3.73,86.84,2.92,9.82,21.19 NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,3.75,65.06,48.44,3.3,0.0 -mideind/IceBERT-ic3,124,50,512,True,False,12119,3.77,87.22,15.94,6.23,2.98 -mideind/IceBERT,124,50,512,True,False,16697,3.77,86.5,8.64,10.13,5.35 ibm-granite/granite-8b-code-base-4k (few-shot),8055,49,4096,True,False,2313,3.78,66.82,21.19,-0.36,36.47 -FacebookAI/xlm-roberta-large,559,250,512,True,False,17897,3.79,87.85,5.14,1.17,27.72 +mideind/IceBERT-ic3,124,50,512,True,False,12119,3.78,87.22,15.94,6.23,2.98 +mideind/IceBERT,124,50,512,True,False,16697,3.78,86.5,8.64,10.13,5.35 vesteinn/XLMR-ENIS,125,50,512,True,False,10711,3.79,87.09,15.11,3.09,5.75 +FacebookAI/xlm-roberta-large,559,250,512,True,False,17897,3.8,87.85,5.14,1.17,27.72 ZurichNLP/unsup-simcse-xlm-roberta-base,277,250,512,True,False,34520,3.8,84.14,21.2,1.33,14.2 CohereForAI/aya-23-8B (few-shot),8028,256,8192,False,False,2707,3.81,62.22,17.34,0.01,38.7 Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,3.83,62.2,26.68,7.07,11.97 @@ -69,44 +69,44 @@ occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,3.9, NbAiLab/nb-roberta-base-scandinavian,125,50,512,True,False,14051,3.91,86.1,4.8,6.28,9.89 ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,3.91,60.46,21.59,0.51,33.54 clips/mfaq,277,250,512,True,False,5591,3.92,85.86,16.49,0.8,2.17 -KBLab/megatron-bert-base-swedish-cased-125k,135,64,512,True,False,15763,3.93,80.61,2.87,9.6,11.57 -mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32768,False,False,634,3.93,55.42,15.85,1.11,33.54 +mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32768,False,False,634,3.92,55.42,15.85,1.11,33.54 +KBLab/megatron-bert-base-swedish-cased-125k,135,64,512,True,False,15763,3.94,80.61,2.87,9.6,11.57 +occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,3.94,58.67,10.39,0.0,40.95 AI-Nordics/bert-large-swedish-cased,334,31,512,True,False,7199,3.95,83.22,2.93,6.78,15.94 KBLab/megatron-bert-large-swedish-cased-165k,369,64,512,True,False,7138,3.95,82.76,1.22,7.58,16.13 bineric/NorskGPT-Llama-7B-v0.1 (few-shot),6738,32,4096,False,False,5384,3.95,53.38,21.11,0.46,35.99 -occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,3.95,58.67,10.39,0.0,40.95 -vesteinn/IceBERT,163,50,512,True,False,12360,3.95,87.13,8.64,3.66,5.07 -mideind/IceBERT-mC4-is,163,50,512,True,False,12308,3.98,88.44,1.73,11.83,0.0 +vesteinn/IceBERT,163,50,512,True,False,12360,3.96,87.13,8.64,3.66,5.07 timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,3.98,58.96,8.97,0.0,39.2 -Geotrend/bert-base-da-cased,103,23,512,True,False,15432,3.99,86.62,2.99,3.64,13.84 mideind/IceBERT-igc,124,50,512,True,False,12551,3.99,83.82,12.12,4.93,5.17 +mideind/IceBERT-mC4-is,163,50,512,True,False,12308,3.99,88.44,1.73,11.83,0.0 +Geotrend/bert-base-da-cased,103,23,512,True,False,15432,4.0,86.62,2.99,3.64,13.84 sentence-transformers/stsb-xlm-r-multilingual,277,250,512,True,False,15040,4.0,82.97,18.07,2.93,0.0 -KBLab/megatron-bert-base-swedish-cased-600k,135,64,512,True,False,15726,4.01,81.02,7.39,4.0,12.98 +KBLab/megatron-bert-base-swedish-cased-600k,135,64,512,True,False,15726,4.02,81.02,7.39,4.0,12.98 +meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4096,False,False,2643,4.04,59.77,13.24,-0.54,31.87 KBLab/bert-base-swedish-cased-new,135,64,512,True,False,15933,4.05,84.02,0.92,5.65,7.14 -meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4096,False,False,2643,4.05,59.77,13.24,-0.54,31.87 KBLab/megatron-bert-large-swedish-cased-110k,369,64,512,True,False,7075,4.06,82.36,6.99,5.2,2.65 +MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,4.06,40.18,14.19,0.31,41.6 senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,4.07,67.42,20.01,7.02,0.65 -MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,4.08,40.18,14.19,0.31,41.6 flax-community/nordic-roberta-wiki,124,50,512,True,False,16227,4.1,82.64,5.78,8.03,0.0 sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking,135,120,512,True,False,33753,4.13,82.91,9.77,1.67,0.0 sentence-transformers/quora-distilbert-multilingual,135,120,512,True,False,26458,4.13,82.91,9.77,1.67,0.0 google-bert/bert-base-multilingual-uncased,167,106,512,True,False,13993,4.14,73.06,2.33,5.48,11.29 -patrickvonplaten/norwegian-roberta-base,124,50,512,True,False,15698,4.16,82.57,1.68,5.74,0.0 sentence-transformers/distiluse-base-multilingual-cased-v1,135,120,512,True,False,34042,4.16,75.63,9.82,3.65,0.0 sentence-transformers/distiluse-base-multilingual-cased-v2,135,120,512,True,False,33247,4.16,77.0,9.59,4.09,0.0 sentence-transformers/distiluse-base-multilingual-cased,135,120,512,True,False,19206,4.16,77.16,10.41,4.09,0.0 sentence-transformers/paraphrase-xlm-r-multilingual-v1,278,250,512,True,False,20154,4.16,80.92,12.97,1.19,0.0 Geotrend/distilbert-base-en-no-cased,69,33,512,True,False,26597,4.17,82.05,4.4,3.98,0.97 -Geotrend/distilbert-base-25lang-cased,109,85,512,True,False,26099,4.18,83.21,3.9,2.37,1.35 -"claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,4.19,51.06,-3.58,4.1,45.29 +patrickvonplaten/norwegian-roberta-base,124,50,512,True,False,15698,4.17,82.57,1.68,5.74,0.0 +"claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,4.18,51.06,-3.58,4.1,45.29 +Geotrend/distilbert-base-25lang-cased,109,85,512,True,False,26099,4.19,83.21,3.9,2.37,1.35 flax-community/swe-roberta-wiki-oscar,124,50,512,True,False,15437,4.19,80.52,2.89,6.51,0.0 -vesteinn/DanskBERT,124,50,512,True,False,15749,4.19,85.04,2.02,4.48,0.15 sentence-transformers/paraphrase-multilingual-mpnet-base-v2,278,250,512,True,False,15100,4.2,81.7,12.82,0.25,0.0 +vesteinn/DanskBERT,124,50,512,True,False,15749,4.2,85.04,2.02,4.48,0.15 DDSC/roberta-base-danish,125,50,512,True,False,15004,4.21,80.21,2.88,1.1,7.95 cardiffnlp/twitter-xlm-roberta-base,277,250,512,True,False,34475,4.21,83.96,5.16,1.05,0.0 -KB/bert-base-swedish-cased,124,50,512,True,False,16181,4.23,82.76,3.46,3.98,0.0 -Twitter/twhin-bert-large,560,250,512,True,False,9707,4.23,84.73,-0.64,1.37,4.15 DeepPavlov/rubert-base-cased,177,120,512,True,False,15785,4.24,83.15,0.56,3.21,1.53 +KB/bert-base-swedish-cased,124,50,512,True,False,16181,4.24,82.76,3.46,3.98,0.0 +Twitter/twhin-bert-large,560,250,512,True,False,9707,4.24,84.73,-0.64,1.37,4.15 openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,4.24,46.64,16.72,-1.54,18.69 sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2,118,250,512,True,False,29201,4.24,82.24,6.35,2.84,0.0 Twitter/twhin-bert-base,279,250,512,True,False,11514,4.26,84.0,3.05,1.94,0.02 @@ -115,35 +115,36 @@ FacebookAI/roberta-base,124,50,512,True,False,13354,4.28,81.78,4.85,-1.18,0.69 microsoft/xlm-align-base,277,250,512,True,False,14744,4.29,85.97,2.54,0.02,0.72 KBLab/bert-base-swedish-cased,125,50,512,True,False,16164,4.3,79.99,3.46,1.32,0.1 dbmdz/bert-base-historic-multilingual-cased,110,32,512,True,False,20047,4.3,80.45,0.9,2.52,0.58 -dbmdz/bert-medium-historic-multilingual-cased,42,32,512,True,False,24291,4.31,80.58,3.06,1.58,0.0 -microsoft/infoxlm-base,277,250,512,True,False,34735,4.34,85.58,0.37,0.35,0.0 -openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,4.35,39.78,16.03,-0.48,20.04 +dbmdz/bert-medium-historic-multilingual-cased,42,32,512,True,False,24291,4.32,80.58,3.06,1.58,0.0 +openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,4.33,39.78,16.03,-0.48,20.04 +microsoft/infoxlm-base,277,250,512,True,False,34735,4.35,85.58,0.37,0.35,0.0 Maltehb/aelaectra-danish-electra-small-uncased,14,32,128,True,False,5995,4.36,62.07,8.7,5.11,0.0 +ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,4.36,45.56,7.44,0.92,20.82 DDSC/roberta-base-scandinavian,124,50,512,True,False,14491,4.37,63.86,2.04,0.73,10.57 -ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,4.37,45.56,7.44,0.92,20.82 ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,4.39,60.88,0.16,-0.35,18.54 pdelobelle/robbert-v2-dutch-base,116,40,512,True,False,15481,4.4,78.59,3.32,0.65,0.0 3ebdola/Dialectal-Arabic-XLM-R-Base,277,250,512,True,False,12783,4.41,73.34,1.49,0.97,0.0 KBLab/albert-base-swedish-cased-alpha,14,50,512,True,False,15925,4.41,73.8,1.09,0.81,0.0 -ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,4.43,56.88,3.8,-0.21,13.72 -ltg/norbert2,125,50,512,True,False,15523,4.44,60.57,2.58,4.16,7.84 -EuropeanParliament/EUBERT,94,66,512,True,False,20070,4.47,59.5,0.94,3.25,5.56 +ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,4.42,56.88,3.8,-0.21,13.72 +ltg/norbert2,125,50,512,True,False,15523,4.43,60.57,2.58,4.16,7.84 +EuropeanParliament/EUBERT,94,66,512,True,False,20070,4.46,59.5,0.94,3.25,5.56 dbmdz/bert-tiny-historic-multilingual-cased,5,32,512,True,False,78027,4.47,72.08,-1.81,2.65,0.0 Maltehb/aelaectra-danish-electra-small-cased,14,32,512,True,False,4593,4.53,58.52,3.76,1.09,0.0 sarnikowski/convbert-small-da-cased,13,29,512,True,False,14273,4.54,58.5,-0.24,5.96,0.0 -ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,4.57,41.27,5.4,-0.2,19.69 +ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,4.56,41.27,5.4,-0.2,19.69 sarnikowski/convbert-medium-small-da-cased,24,29,512,True,False,13821,4.58,59.66,-1.39,4.58,0.0 -HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,4.7,40.28,3.94,-0.26,10.68 +HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,4.69,40.28,3.94,-0.26,10.68 fresh-xlm-roberta-base,277,250,512,True,False,2214,4.75,48.7,1.07,2.37,0.0 -utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,4.81,33.53,4.25,-2.32,15.41 -HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,4.87,27.91,0.77,-0.48,16.56 -PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,4.88,38.91,-1.72,0.66,4.82 -HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,5.05,26.85,3.07,-0.12,1.39 -PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,5.05,31.99,0.0,0.48,0.29 -PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,5.07,20.76,-1.78,0.87,3.58 -HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,5.08,23.22,3.78,0.41,0.54 -HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,5.09,28.14,-0.56,-0.06,2.43 -PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,5.1,22.75,-0.03,-0.78,7.75 -HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,5.12,25.51,-0.24,0.46,0.15 -PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,5.13,22.55,0.67,0.87,0.27 -ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,5.52,0.0,0.74,0.0,0.0 +utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,4.8,33.53,4.25,-2.32,15.41 +HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,4.86,27.91,0.77,-0.48,16.56 +PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,4.87,38.91,-1.72,0.66,4.82 +HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,5.03,26.85,3.07,-0.12,1.39 +PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,5.03,31.99,0.0,0.48,0.29 +HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,5.06,23.22,3.78,0.41,0.54 +PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,5.06,20.76,-1.78,0.87,3.58 +HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,5.08,28.14,-0.56,-0.06,2.43 +PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,5.08,22.75,-0.03,-0.78,7.75 +HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,5.1,25.51,-0.24,0.46,0.15 +tiiuae/Falcon3-1B-Instruct (few-shot),1669,131,8192,True,False,9270,5.1,14.18,2.36,2.29,6.35 +PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,5.11,22.55,0.67,0.87,0.27 +ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,5.49,0.0,0.74,0.0,0.0 diff --git a/faroese-nlu.md b/faroese-nlu.md index 71bece7b..9c549d1f 100644 --- a/faroese-nlu.md +++ b/faroese-nlu.md @@ -3,7 +3,7 @@ layout: leaderboard title: Faroese NLU 🇫🇴 --- -
Last updated: 10/01/2025 12:30:26 CET
+
Last updated: 11/01/2025 11:03:36 CET
@@ -92,7 +92,7 @@ title: Faroese NLU 🇫🇴 32768 True 1,219 ± 412 / 158 ± 53 - 2.42 + 2.41 76.56 ± 1.09 / 58.59 ± 3.63 46.61 ± 1.58 / 47.25 ± 1.29 23.45 ± 2.41 / 59.84 ± 1.99 @@ -177,7 +177,7 @@ title: Faroese NLU 🇫🇴 8191 True 637 ± 306 / 92 ± 31 - 2.69 + 2.68 68.00 ± 1.08 / 37.75 ± 1.09 27.30 ± 4.69 / 30.12 ± 3.11 28.09 ± 3.31 / 54.91 ± 2.24 @@ -228,7 +228,7 @@ title: Faroese NLU 🇫🇴 131072 True 1,208 ± 412 / 156 ± 53 - 2.73 + 2.72 64.88 ± 1.71 / 37.91 ± 1.50 52.56 ± 2.14 / 49.94 ± 1.21 14.28 ± 1.04 / 45.93 ± 2.64 @@ -245,7 +245,7 @@ title: Faroese NLU 🇫🇴 512 True 30,231 ± 8,171 / 4,863 ± 1,598 - 2.78 + 2.79 88.81 ± 0.65 / 89.12 ± 0.56 15.45 ± 3.96 / 40.77 ± 4.75 30.92 ± 8.65 / 63.05 ± 4.66 @@ -262,7 +262,7 @@ title: Faroese NLU 🇫🇴 512 True 20,637 ± 3,925 / 4,497 ± 1,502 - 2.81 + 2.82 88.60 ± 0.60 / 89.37 ± 0.54 6.70 ± 4.85 / 25.36 ± 7.25 46.81 ± 2.12 / 72.76 ± 1.40 @@ -279,7 +279,7 @@ title: Faroese NLU 🇫🇴 8191 True 908 ± 303 / 96 ± 36 - 2.86 + 2.85 53.92 ± 1.55 / 28.59 ± 1.28 35.05 ± 3.84 / 34.41 ± 2.93 23.12 ± 4.18 / 60.20 ± 2.01 @@ -312,7 +312,7 @@ title: Faroese NLU 🇫🇴 128 8192 True - 1,007 ± 316 / 162 ± 45 + 1,483 ± 377 / 287 ± 97 3.02 70.61 ± 1.12 / 68.27 ± 1.76 45.78 ± 2.05 / 47.72 ± 1.15 @@ -347,7 +347,7 @@ title: Faroese NLU 🇫🇴 8191 True 784 ± 310 / 95 ± 28 - 3.06 + 3.05 73.80 ± 2.44 / 63.06 ± 3.84 39.45 ± 8.53 / 62.38 ± 6.69 34.78 ± 4.81 / 63.14 ± 2.83 @@ -364,7 +364,7 @@ title: Faroese NLU 🇫🇴 8191 True 436 ± 152 / 57 ± 21 - 3.08 + 3.07 57.52 ± 2.17 / 36.36 ± 1.53 13.18 ± 5.13 / 19.41 ± 3.41 28.03 ± 3.90 / 62.26 ± 2.03 @@ -380,7 +380,7 @@ title: Faroese NLU 🇫🇴 128 131072 True - 1,005 ± 330 / 196 ± 74 + 1,473 ± 377 / 283 ± 96 3.09 67.67 ± 1.31 / 59.86 ± 2.12 48.54 ± 2.43 / 49.17 ± 1.48 @@ -425,6 +425,23 @@ title: Faroese NLU 🇫🇴 12.10.0 13.0.0 + + nvidia/mistral-nemo-minitron-8b-base (few-shot) + 8414 + 131 + 8192 + True + 2,470 ± 836 / 326 ± 111 + 3.20 + 66.18 ± 1.87 / 62.76 ± 2.46 + 39.48 ± 3.86 / 56.83 ± 3.61 + 5.78 ± 1.69 / 47.83 ± 3.37 + 44.81 ± 2.15 / 63.16 ± 2.38 + 14.1.1 + 14.1.1 + 14.1.1 + 14.1.1 + pere/roberta-base-exp-32 277 @@ -442,23 +459,6 @@ title: Faroese NLU 🇫🇴 0.0.0 13.0.0 - - nvidia/mistral-nemo-minitron-8b-base (few-shot) - 8414 - 131 - 8192 - True - 2,470 ± 836 / 326 ± 111 - 3.21 - 66.18 ± 1.87 / 62.76 ± 2.46 - 39.48 ± 3.86 / 56.83 ± 3.61 - 5.78 ± 1.69 / 47.83 ± 3.37 - 44.81 ± 2.15 / 63.16 ± 2.38 - 14.1.1 - 14.1.1 - 14.1.1 - 14.1.1 - mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot) 341029 @@ -466,7 +466,7 @@ title: Faroese NLU 🇫🇴 4096 True 1,904 ± 475 / 361 ± 121 - 3.25 + 3.24 55.31 ± 9.55 / 44.19 ± 6.59 51.62 ± 10.21 / 62.71 ± 10.54 15.07 ± 1.77 / 54.46 ± 2.11 @@ -483,7 +483,7 @@ title: Faroese NLU 🇫🇴 512 True 15,436 ± 2,820 / 3,704 ± 1,187 - 3.27 + 3.28 88.14 ± 0.58 / 88.89 ± 0.52 6.88 ± 5.56 / 24.40 ± 8.37 27.71 ± 9.67 / 61.60 ± 5.69 @@ -534,7 +534,7 @@ title: Faroese NLU 🇫🇴 32768 True 9,015 ± 2,966 / 1,121 ± 510 - 3.40 + 3.39 58.62 ± 3.34 / 53.80 ± 3.15 40.74 ± 5.37 / 57.33 ± 4.26 5.58 ± 2.68 / 45.68 ± 4.79 @@ -551,7 +551,7 @@ title: Faroese NLU 🇫🇴 512 True 11,004 ± 2,244 / 2,324 ± 761 - 3.42 + 3.43 87.79 ± 0.40 / 88.46 ± 0.31 7.80 ± 5.19 / 24.52 ± 6.96 22.51 ± 10.65 / 55.58 ± 8.05 @@ -585,7 +585,7 @@ title: Faroese NLU 🇫🇴 514 True 5,947 ± 1,301 / 1,129 ± 374 - 3.46 + 3.47 88.64 ± 0.34 / 89.11 ± 0.29 23.63 ± 6.26 / 44.37 ± 7.53 2.05 ± 2.30 / 47.88 ± 2.17 @@ -619,7 +619,7 @@ title: Faroese NLU 🇫🇴 32768 True 2,368 ± 793 / 317 ± 108 - 3.52 + 3.51 62.60 ± 3.09 / 54.91 ± 2.40 31.36 ± 5.40 / 50.13 ± 5.67 5.21 ± 3.15 / 46.32 ± 4.42 @@ -629,23 +629,6 @@ title: Faroese NLU 🇫🇴 14.0.4 14.0.4 - - intfloat/multilingual-e5-large - 559 - 250 - 512 - True - 6,732 ± 1,273 / 1,633 ± 523 - 3.52 - 88.39 ± 0.86 / 88.75 ± 0.75 - 18.28 ± 3.38 / 39.08 ± 2.62 - 2.85 ± 1.32 / 48.43 ± 2.29 - 31.03 ± 1.94 / 43.72 ± 2.89 - 0.0.0 - 13.3.0 - 0.0.0 - 13.0.0 - jonfd/electra-small-nordic 22 @@ -664,21 +647,21 @@ title: Faroese NLU 🇫🇴 13.0.0 - meta-llama/Llama-3.1-8B (few-shot) - 8030 - 128 - 131072 + intfloat/multilingual-e5-large + 559 + 250 + 512 True - 2,986 ± 823 / 276 ± 94 - 3.54 - 65.65 ± 3.31 / 63.81 ± 2.72 - 24.30 ± 7.03 / 35.85 ± 7.35 - 0.61 ± 1.75 / 35.51 ± 2.23 - 45.01 ± 1.79 / 65.74 ± 1.54 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 + 6,732 ± 1,273 / 1,633 ± 523 + 3.53 + 88.39 ± 0.86 / 88.75 ± 0.75 + 18.28 ± 3.38 / 39.08 ± 2.62 + 2.85 ± 1.32 / 48.43 ± 2.29 + 31.03 ± 1.94 / 43.72 ± 2.89 + 0.0.0 + 13.3.0 + 0.0.0 + 13.0.0 CohereForAI/aya-expanse-8b (few-shot) @@ -687,7 +670,7 @@ title: Faroese NLU 🇫🇴 8192 False 2,686 ± 685 / 491 ± 164 - 3.55 + 3.54 64.72 ± 1.73 / 47.25 ± 4.90 28.57 ± 4.08 / 50.22 ± 2.92 5.12 ± 1.16 / 51.09 ± 0.89 @@ -697,6 +680,23 @@ title: Faroese NLU 🇫🇴 14.0.4 14.0.4 + + meta-llama/Llama-3.1-8B (few-shot) + 8030 + 128 + 131072 + True + 2,986 ± 823 / 276 ± 94 + 3.54 + 65.65 ± 3.31 / 63.81 ± 2.72 + 24.30 ± 7.03 / 35.85 ± 7.35 + 0.61 ± 1.75 / 35.51 ± 2.23 + 45.01 ± 1.79 / 65.74 ± 1.54 + 14.0.4 + 14.1.2 + 14.1.2 + 14.0.4 + mistralai/Mistral-7B-v0.1 (few-shot) 7242 @@ -738,7 +738,7 @@ title: Faroese NLU 🇫🇴 32768 True 1,364 ± 343 / 266 ± 90 - 3.57 + 3.56 61.32 ± 4.26 / 59.28 ± 4.43 26.73 ± 4.70 / 45.03 ± 5.25 1.30 ± 1.64 / 45.10 ± 3.27 @@ -772,7 +772,7 @@ title: Faroese NLU 🇫🇴 32768 False 2,370 ± 416 / 711 ± 242 - 3.60 + 3.59 61.28 ± 2.98 / 54.02 ± 3.55 32.07 ± 3.55 / 51.69 ± 3.46 1.68 ± 1.41 / 50.06 ± 1.22 @@ -789,7 +789,7 @@ title: Faroese NLU 🇫🇴 4096 False 1,483 ± 321 / 379 ± 158 - 3.60 + 3.59 54.79 ± 2.33 / 46.30 ± 4.57 35.84 ± 8.64 / 48.22 ± 7.58 0.00 ± 0.00 / 33.26 ± 0.54 @@ -816,6 +816,23 @@ title: Faroese NLU 🇫🇴 13.0.0 13.0.0 + + meta-llama/Meta-Llama-3-8B (few-shot) + 8030 + 128 + 8192 + True + 1,477 ± 376 / 285 ± 97 + 3.62 + 61.11 ± 4.21 / 58.55 ± 4.19 + 19.40 ± 8.13 / 32.14 ± 7.75 + 2.02 ± 1.68 / 39.88 ± 3.56 + 50.34 ± 1.74 / 71.74 ± 1.27 + 12.6.1 + 14.1.2 + 12.6.1 + 13.0.0 + Geotrend/bert-base-25lang-cased 151 @@ -823,7 +840,7 @@ title: Faroese NLU 🇫🇴 512 True 13,908 ± 3,201 / 2,700 ± 872 - 3.62 + 3.63 86.09 ± 1.03 / 86.85 ± 1.02 5.17 ± 4.32 / 26.72 ± 6.11 15.24 ± 6.84 / 50.54 ± 4.85 @@ -840,7 +857,7 @@ title: Faroese NLU 🇫🇴 512 True 17,757 ± 3,883 / 3,864 ± 1,237 - 3.62 + 3.63 86.20 ± 0.50 / 86.91 ± 0.46 19.12 ± 2.94 / 41.88 ± 2.47 11.80 ± 7.52 / 53.06 ± 5.04 @@ -850,23 +867,6 @@ title: Faroese NLU 🇫🇴 12.10.5 13.0.0 - - meta-llama/Meta-Llama-3-8B (few-shot) - 8030 - 128 - 8192 - True - 1,335 ± 338 / 260 ± 88 - 3.62 - 61.11 ± 4.21 / 58.55 ± 4.19 - 19.40 ± 8.13 / 32.14 ± 7.75 - 2.02 ± 1.68 / 39.88 ± 3.56 - 50.34 ± 1.74 / 71.74 ± 1.27 - 12.6.1 - 14.1.2 - 12.6.1 - 13.0.0 - ibm-granite/granite-3.0-8b-instruct (few-shot) 8171 @@ -942,7 +942,7 @@ title: Faroese NLU 🇫🇴 8192 True 3,161 ± 676 / 1,247 ± 481 - 3.69 + 3.68 67.72 ± 1.04 / 51.81 ± 3.32 42.98 ± 3.22 / 60.44 ± 2.81 6.40 ± 1.61 / 51.02 ± 1.86 @@ -1003,6 +1003,23 @@ title: Faroese NLU 🇫🇴 14.0.4 14.0.4 + + ibm-granite/granite-8b-code-base-4k (few-shot) + 8055 + 49 + 4096 + True + 2,313 ± 423 / 682 ± 210 + 3.78 + 66.82 ± 2.36 / 65.84 ± 2.35 + 21.19 ± 5.19 / 32.96 ± 5.05 + -0.36 ± 1.67 / 37.57 ± 3.35 + 36.47 ± 0.96 / 51.89 ± 1.05 + 13.0.0 + 14.1.2 + 13.0.0 + 13.0.0 + mideind/IceBERT-ic3 124 @@ -1010,7 +1027,7 @@ title: Faroese NLU 🇫🇴 512 True 12,119 ± 1,576 / 3,812 ± 1,242 - 3.77 + 3.78 87.22 ± 0.68 / 87.92 ± 0.59 15.94 ± 5.79 / 37.27 ± 7.66 6.23 ± 6.96 / 48.55 ± 4.87 @@ -1027,7 +1044,7 @@ title: Faroese NLU 🇫🇴 512 True 16,697 ± 2,113 / 5,432 ± 1,749 - 3.77 + 3.78 86.50 ± 0.96 / 87.11 ± 0.90 8.64 ± 5.25 / 29.80 ± 8.39 10.13 ± 8.55 / 47.31 ± 7.04 @@ -1038,20 +1055,20 @@ title: Faroese NLU 🇫🇴 13.0.0 - ibm-granite/granite-8b-code-base-4k (few-shot) - 8055 - 49 - 4096 + vesteinn/XLMR-ENIS + 125 + 50 + 512 True - 2,313 ± 423 / 682 ± 210 - 3.78 - 66.82 ± 2.36 / 65.84 ± 2.35 - 21.19 ± 5.19 / 32.96 ± 5.05 - -0.36 ± 1.67 / 37.57 ± 3.35 - 36.47 ± 0.96 / 51.89 ± 1.05 - 13.0.0 - 14.1.2 - 13.0.0 + 10,711 ± 2,333 / 2,141 ± 689 + 3.79 + 87.09 ± 0.76 / 87.71 ± 0.73 + 15.11 ± 7.40 / 38.19 ± 7.66 + 3.09 ± 1.98 / 39.41 ± 4.45 + 5.75 ± 2.66 / 8.73 ± 4.07 + 0.0.0 + 13.3.0 + 0.0.0 13.0.0 @@ -1061,7 +1078,7 @@ title: Faroese NLU 🇫🇴 512 True 17,897 ± 3,921 / 3,463 ± 1,141 - 3.79 + 3.80 87.85 ± 0.95 / 88.21 ± 0.87 5.14 ± 5.42 / 22.03 ± 5.69 1.17 ± 2.11 / 40.94 ± 5.07 @@ -1071,23 +1088,6 @@ title: Faroese NLU 🇫🇴 0.0.0 13.0.0 - - vesteinn/XLMR-ENIS - 125 - 50 - 512 - True - 10,711 ± 2,333 / 2,141 ± 689 - 3.79 - 87.09 ± 0.76 / 87.71 ± 0.73 - 15.11 ± 7.40 / 38.19 ± 7.66 - 3.09 ± 1.98 / 39.41 ± 4.45 - 5.75 ± 2.66 / 8.73 ± 4.07 - 0.0.0 - 13.3.0 - 0.0.0 - 13.0.0 - ZurichNLP/unsup-simcse-xlm-roberta-base 277 @@ -1224,6 +1224,23 @@ title: Faroese NLU 🇫🇴 12.7.0 13.0.0 + + mistralai/Mistral-7B-Instruct-v0.1 (few-shot) + 7242 + 32 + 32768 + False + 634 ± 179 / 110 ± 35 + 3.92 + 55.42 ± 2.12 / 46.41 ± 2.50 + 15.85 ± 6.84 / 36.28 ± 7.13 + 1.11 ± 2.41 / 36.79 ± 4.00 + 33.54 ± 1.29 / 50.80 ± 1.31 + 9.3.1 + 13.3.0 + 9.3.1 + 13.0.0 + KBLab/megatron-bert-base-swedish-cased-125k 135 @@ -1231,7 +1248,7 @@ title: Faroese NLU 🇫🇴 512 True 15,763 ± 2,523 / 4,238 ± 1,370 - 3.93 + 3.94 80.61 ± 0.91 / 81.31 ± 0.89 2.87 ± 3.48 / 19.80 ± 4.75 9.60 ± 3.82 / 52.30 ± 1.73 @@ -1242,20 +1259,20 @@ title: Faroese NLU 🇫🇴 13.0.0 - mistralai/Mistral-7B-Instruct-v0.1 (few-shot) + occiglot/occiglot-7b-eu5 (few-shot) 7242 32 32768 - False - 634 ± 179 / 110 ± 35 - 3.93 - 55.42 ± 2.12 / 46.41 ± 2.50 - 15.85 ± 6.84 / 36.28 ± 7.13 - 1.11 ± 2.41 / 36.79 ± 4.00 - 33.54 ± 1.29 / 50.80 ± 1.31 - 9.3.1 - 13.3.0 - 9.3.1 + True + 2,219 ± 427 / 717 ± 224 + 3.94 + 58.67 ± 3.95 / 58.47 ± 3.96 + 10.39 ± 7.55 / 30.73 ± 6.92 + 0.00 ± 0.00 / 33.26 ± 0.34 + 40.95 ± 1.80 / 55.82 ± 1.95 + 12.5.2 + 14.1.2 + 12.1.0 13.0.0 @@ -1310,30 +1327,13 @@ title: Faroese NLU 🇫🇴 13.0.0 - occiglot/occiglot-7b-eu5 (few-shot) - 7242 - 32 - 32768 - True - 2,219 ± 427 / 717 ± 224 - 3.95 - 58.67 ± 3.95 / 58.47 ± 3.96 - 10.39 ± 7.55 / 30.73 ± 6.92 - 0.00 ± 0.00 / 33.26 ± 0.34 - 40.95 ± 1.80 / 55.82 ± 1.95 - 12.5.2 - 14.1.2 - 12.1.0 - 13.0.0 - - - vesteinn/IceBERT - 163 - 50 - 512 + vesteinn/IceBERT + 163 + 50 + 512 True 12,360 ± 1,611 / 3,858 ± 1,246 - 3.95 + 3.96 87.13 ± 0.58 / 87.70 ± 0.45 8.64 ± 5.25 / 29.80 ± 8.39 3.66 ± 3.33 / 40.81 ± 4.34 @@ -1343,23 +1343,6 @@ title: Faroese NLU 🇫🇴 0.0.0 13.0.0 - - mideind/IceBERT-mC4-is - 163 - 50 - 512 - True - 12,308 ± 1,614 / 3,851 ± 1,254 - 3.98 - 88.44 ± 0.35 / 89.11 ± 0.38 - 1.73 ± 3.39 / 17.03 ± 2.53 - 11.83 ± 4.90 / 48.95 ± 6.81 - 0.00 ± 0.00 / 0.00 ± 0.00 - 0.0.0 - 13.3.0 - 0.0.0 - 13.0.0 - timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot) 7242 @@ -1378,34 +1361,51 @@ title: Faroese NLU 🇫🇴 13.0.0 - Geotrend/bert-base-da-cased - 103 - 23 + mideind/IceBERT-igc + 124 + 50 512 True - 15,432 ± 2,838 / 3,642 ± 1,189 + 12,551 ± 1,656 / 3,918 ± 1,274 3.99 - 86.62 ± 0.43 / 87.31 ± 0.44 - 2.99 ± 2.41 / 22.15 ± 2.57 - 3.64 ± 3.82 / 49.77 ± 2.26 - 13.84 ± 1.99 / 21.44 ± 2.62 + 83.82 ± 0.98 / 84.34 ± 0.88 + 12.12 ± 5.60 / 32.60 ± 6.48 + 4.93 ± 3.16 / 45.85 ± 3.66 + 5.17 ± 2.95 / 7.88 ± 4.65 0.0.0 13.3.0 0.0.0 13.0.0 - mideind/IceBERT-igc - 124 + mideind/IceBERT-mC4-is + 163 50 512 True - 12,551 ± 1,656 / 3,918 ± 1,274 + 12,308 ± 1,614 / 3,851 ± 1,254 3.99 - 83.82 ± 0.98 / 84.34 ± 0.88 - 12.12 ± 5.60 / 32.60 ± 6.48 - 4.93 ± 3.16 / 45.85 ± 3.66 - 5.17 ± 2.95 / 7.88 ± 4.65 + 88.44 ± 0.35 / 89.11 ± 0.38 + 1.73 ± 3.39 / 17.03 ± 2.53 + 11.83 ± 4.90 / 48.95 ± 6.81 + 0.00 ± 0.00 / 0.00 ± 0.00 + 0.0.0 + 13.3.0 + 0.0.0 + 13.0.0 + + + Geotrend/bert-base-da-cased + 103 + 23 + 512 + True + 15,432 ± 2,838 / 3,642 ± 1,189 + 4.00 + 86.62 ± 0.43 / 87.31 ± 0.44 + 2.99 ± 2.41 / 22.15 ± 2.57 + 3.64 ± 3.82 / 49.77 ± 2.26 + 13.84 ± 1.99 / 21.44 ± 2.62 0.0.0 13.3.0 0.0.0 @@ -1435,7 +1435,7 @@ title: Faroese NLU 🇫🇴 512 True 15,726 ± 2,508 / 4,234 ± 1,365 - 4.01 + 4.02 81.02 ± 0.75 / 81.74 ± 0.75 7.39 ± 3.22 / 27.62 ± 2.64 4.00 ± 2.04 / 50.51 ± 1.11 @@ -1445,6 +1445,23 @@ title: Faroese NLU 🇫🇴 12.7.0 13.0.0 + + meta-llama/Llama-2-7b-chat-hf (few-shot) + 6738 + 32 + 4096 + False + 2,643 ± 455 / 800 ± 247 + 4.04 + 59.77 ± 3.38 / 56.97 ± 4.30 + 13.24 ± 5.37 / 38.49 ± 4.97 + -0.54 ± 1.61 / 36.94 ± 2.79 + 31.87 ± 2.20 / 46.21 ± 1.15 + 9.3.1 + 13.3.0 + 9.3.1 + 13.0.0 + KBLab/bert-base-swedish-cased-new 135 @@ -1462,23 +1479,6 @@ title: Faroese NLU 🇫🇴 12.7.0 13.0.0 - - meta-llama/Llama-2-7b-chat-hf (few-shot) - 6738 - 32 - 4096 - False - 2,643 ± 455 / 800 ± 247 - 4.05 - 59.77 ± 3.38 / 56.97 ± 4.30 - 13.24 ± 5.37 / 38.49 ± 4.97 - -0.54 ± 1.61 / 36.94 ± 2.79 - 31.87 ± 2.20 / 46.21 ± 1.15 - 9.3.1 - 13.3.0 - 9.3.1 - 13.0.0 - KBLab/megatron-bert-large-swedish-cased-110k 369 @@ -1496,6 +1496,23 @@ title: Faroese NLU 🇫🇴 0.0.0 13.0.0 + + MaLA-LM/emma-500-llama2-7b (few-shot) + 6738 + 32 + 4096 + True + 6,275 ± 1,193 / 1,755 ± 578 + 4.06 + 40.18 ± 3.60 / 39.95 ± 3.38 + 14.19 ± 8.12 / 28.15 ± 8.15 + 0.31 ± 1.71 / 41.35 ± 4.05 + 41.60 ± 2.49 / 60.78 ± 2.12 + 13.0.0 + 13.3.0 + 13.0.0 + 13.0.0 + senseable/WestLake-7B-v2 (few-shot) 7242 @@ -1513,23 +1530,6 @@ title: Faroese NLU 🇫🇴 12.6.1 13.0.0 - - MaLA-LM/emma-500-llama2-7b (few-shot) - 6738 - 32 - 4096 - True - 6,275 ± 1,193 / 1,755 ± 578 - 4.08 - 40.18 ± 3.60 / 39.95 ± 3.38 - 14.19 ± 8.12 / 28.15 ± 8.15 - 0.31 ± 1.71 / 41.35 ± 4.05 - 41.60 ± 2.49 / 60.78 ± 2.12 - 13.0.0 - 13.3.0 - 13.0.0 - 13.0.0 - flax-community/nordic-roberta-wiki 124 @@ -1598,23 +1598,6 @@ title: Faroese NLU 🇫🇴 0.0.0 13.0.0 - - patrickvonplaten/norwegian-roberta-base - 124 - 50 - 512 - True - 15,698 ± 2,699 / 3,891 ± 1,278 - 4.16 - 82.57 ± 0.93 / 83.35 ± 0.91 - 1.68 ± 4.03 / 18.08 ± 3.73 - 5.74 ± 3.53 / 48.75 ± 3.22 - 0.00 ± 0.00 / 0.00 ± 0.00 - 0.0.0 - 13.3.0 - 0.0.0 - 13.0.0 - sentence-transformers/distiluse-base-multilingual-cased-v1 135 @@ -1701,20 +1684,20 @@ title: Faroese NLU 🇫🇴 13.0.0 - Geotrend/distilbert-base-25lang-cased - 109 - 85 + patrickvonplaten/norwegian-roberta-base + 124 + 50 512 True - 26,099 ± 5,881 / 5,178 ± 1,665 - 4.18 - 83.21 ± 0.53 / 83.81 ± 0.45 - 3.90 ± 4.05 / 25.25 ± 5.25 - 2.37 ± 4.63 / 48.46 ± 3.19 - 1.35 ± 1.03 / 2.54 ± 1.88 - 12.6.1 + 15,698 ± 2,699 / 3,891 ± 1,278 + 4.17 + 82.57 ± 0.93 / 83.35 ± 0.91 + 1.68 ± 4.03 / 18.08 ± 3.73 + 5.74 ± 3.53 / 48.75 ± 3.22 + 0.00 ± 0.00 / 0.00 ± 0.00 + 0.0.0 13.3.0 - 12.6.1 + 0.0.0 13.0.0 @@ -1724,7 +1707,7 @@ title: Faroese NLU 🇫🇴 200000 True 277 ± 77 / 70 ± 25 - 4.19 + 4.18 51.06 ± 1.45 / 30.27 ± 1.00 -3.58 ± 4.03 / 14.94 ± 2.38 4.10 ± 4.03 / 37.28 ± 1.78 @@ -1735,34 +1718,34 @@ title: Faroese NLU 🇫🇴 14.0.3 - flax-community/swe-roberta-wiki-oscar - 124 - 50 + Geotrend/distilbert-base-25lang-cased + 109 + 85 512 True - 15,437 ± 2,628 / 3,834 ± 1,252 + 26,099 ± 5,881 / 5,178 ± 1,665 4.19 - 80.52 ± 0.76 / 81.35 ± 0.69 - 2.89 ± 3.05 / 27.56 ± 2.96 - 6.51 ± 3.60 / 51.81 ± 1.95 - 0.00 ± 0.00 / 0.00 ± 0.00 - 0.0.0 + 83.21 ± 0.53 / 83.81 ± 0.45 + 3.90 ± 4.05 / 25.25 ± 5.25 + 2.37 ± 4.63 / 48.46 ± 3.19 + 1.35 ± 1.03 / 2.54 ± 1.88 + 12.6.1 13.3.0 - 0.0.0 + 12.6.1 13.0.0 - vesteinn/DanskBERT + flax-community/swe-roberta-wiki-oscar 124 50 512 True - 15,749 ± 2,665 / 4,014 ± 1,281 + 15,437 ± 2,628 / 3,834 ± 1,252 4.19 - 85.04 ± 0.57 / 85.72 ± 0.50 - 2.02 ± 4.21 / 21.25 ± 5.15 - 4.48 ± 1.63 / 44.45 ± 4.77 - 0.15 ± 0.29 / 0.21 ± 0.39 + 80.52 ± 0.76 / 81.35 ± 0.69 + 2.89 ± 3.05 / 27.56 ± 2.96 + 6.51 ± 3.60 / 51.81 ± 1.95 + 0.00 ± 0.00 / 0.00 ± 0.00 0.0.0 13.3.0 0.0.0 @@ -1785,6 +1768,23 @@ title: Faroese NLU 🇫🇴 0.0.0 13.0.0 + + vesteinn/DanskBERT + 124 + 50 + 512 + True + 15,749 ± 2,665 / 4,014 ± 1,281 + 4.20 + 85.04 ± 0.57 / 85.72 ± 0.50 + 2.02 ± 4.21 / 21.25 ± 5.15 + 4.48 ± 1.63 / 44.45 ± 4.77 + 0.15 ± 0.29 / 0.21 ± 0.39 + 0.0.0 + 13.3.0 + 0.0.0 + 13.0.0 + DDSC/roberta-base-danish 125 @@ -1819,6 +1819,23 @@ title: Faroese NLU 🇫🇴 0.0.0 13.0.0 + + DeepPavlov/rubert-base-cased + 177 + 120 + 512 + True + 15,785 ± 2,658 / 3,983 ± 1,289 + 4.24 + 83.15 ± 0.73 / 83.90 ± 0.74 + 0.56 ± 4.44 / 29.23 ± 3.77 + 3.21 ± 2.89 / 47.97 ± 3.56 + 1.53 ± 1.23 / 2.95 ± 2.23 + 0.0.0 + 13.3.0 + 0.0.0 + 13.0.0 + KB/bert-base-swedish-cased 124 @@ -1826,7 +1843,7 @@ title: Faroese NLU 🇫🇴 512 True 16,181 ± 2,451 / 4,620 ± 1,507 - 4.23 + 4.24 82.76 ± 1.26 / 83.50 ± 1.20 3.46 ± 2.02 / 20.63 ± 3.68 3.98 ± 2.70 / 47.46 ± 2.35 @@ -1843,7 +1860,7 @@ title: Faroese NLU 🇫🇴 512 True 9,707 ± 1,664 / 2,549 ± 831 - 4.23 + 4.24 84.73 ± 1.49 / 85.19 ± 1.59 -0.64 ± 3.87 / 22.47 ± 3.96 1.37 ± 2.46 / 39.78 ± 3.24 @@ -1853,23 +1870,6 @@ title: Faroese NLU 🇫🇴 0.0.0 13.0.0 - - DeepPavlov/rubert-base-cased - 177 - 120 - 512 - True - 15,785 ± 2,658 / 3,983 ± 1,289 - 4.24 - 83.15 ± 0.73 / 83.90 ± 0.74 - 0.56 ± 4.44 / 29.23 ± 3.77 - 3.21 ± 2.89 / 47.97 ± 3.56 - 1.53 ± 1.23 / 2.95 ± 2.23 - 0.0.0 - 13.3.0 - 0.0.0 - 13.0.0 - openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot) 7453 @@ -2013,7 +2013,7 @@ title: Faroese NLU 🇫🇴 512 True 24,291 ± 4,887 / 5,096 ± 1,655 - 4.31 + 4.32 80.58 ± 0.45 / 81.29 ± 0.46 3.06 ± 2.93 / 25.95 ± 1.54 1.58 ± 2.34 / 49.16 ± 2.33 @@ -2023,23 +2023,6 @@ title: Faroese NLU 🇫🇴 0.0.0 13.0.0 - - microsoft/infoxlm-base - 277 - 250 - 512 - True - 34,735 ± 7,558 / 6,846 ± 2,312 - 4.34 - 85.58 ± 1.04 / 86.23 ± 1.03 - 0.37 ± 2.84 / 16.66 ± 1.24 - 0.35 ± 2.36 / 43.55 ± 4.58 - 0.00 ± 0.00 / 0.00 ± 0.00 - 0.0.0 - 13.3.0 - 0.0.0 - 13.0.0 - openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot) 7453 @@ -2047,7 +2030,7 @@ title: Faroese NLU 🇫🇴 4096 True 1,438 ± 410 / 233 ± 79 - 4.35 + 4.33 39.78 ± 1.79 / 31.09 ± 2.58 16.03 ± 6.81 / 39.68 ± 7.16 -0.48 ± 1.85 / 33.39 ± 0.49 @@ -2057,6 +2040,23 @@ title: Faroese NLU 🇫🇴 14.0.4 14.0.4 + + microsoft/infoxlm-base + 277 + 250 + 512 + True + 34,735 ± 7,558 / 6,846 ± 2,312 + 4.35 + 85.58 ± 1.04 / 86.23 ± 1.03 + 0.37 ± 2.84 / 16.66 ± 1.24 + 0.35 ± 2.36 / 43.55 ± 4.58 + 0.00 ± 0.00 / 0.00 ± 0.00 + 0.0.0 + 13.3.0 + 0.0.0 + 13.0.0 + Maltehb/aelaectra-danish-electra-small-uncased 14 @@ -2074,6 +2074,23 @@ title: Faroese NLU 🇫🇴 0.0.0 13.0.0 + + ibm-granite/granite-3.0-3b-a800m-instruct (few-shot) + 3374 + 49 + 4096 + True + 10,246 ± 3,021 / 1,629 ± 550 + 4.36 + 45.56 ± 1.92 / 42.78 ± 1.78 + 7.44 ± 3.71 / 28.90 ± 5.97 + 0.92 ± 2.19 / 40.29 ± 3.87 + 20.82 ± 1.22 / 31.49 ± 1.49 + 13.0.0 + 13.3.0 + 13.0.0 + 13.0.0 + DDSC/roberta-base-scandinavian 124 @@ -2091,23 +2108,6 @@ title: Faroese NLU 🇫🇴 0.0.0 13.0.0 - - ibm-granite/granite-3.0-3b-a800m-instruct (few-shot) - 3374 - 49 - 4096 - True - 10,246 ± 3,021 / 1,629 ± 550 - 4.37 - 45.56 ± 1.92 / 42.78 ± 1.78 - 7.44 ± 3.71 / 28.90 ± 5.97 - 0.92 ± 2.19 / 40.29 ± 3.87 - 20.82 ± 1.22 / 31.49 ± 1.49 - 13.0.0 - 13.3.0 - 13.0.0 - 13.0.0 - ibm-granite/granite-3b-code-base-2k (few-shot) 3483 @@ -2183,7 +2183,7 @@ title: Faroese NLU 🇫🇴 2048 True 9,059 ± 1,947 / 2,201 ± 728 - 4.43 + 4.42 56.88 ± 2.51 / 57.68 ± 2.14 3.80 ± 5.87 / 28.41 ± 4.70 -0.21 ± 2.20 / 34.20 ± 1.19 @@ -2200,7 +2200,7 @@ title: Faroese NLU 🇫🇴 512 True 15,523 ± 2,863 / 3,690 ± 1,195 - 4.44 + 4.43 60.57 ± 0.86 / 60.42 ± 0.90 2.58 ± 3.12 / 27.61 ± 2.89 4.16 ± 2.63 / 47.13 ± 3.43 @@ -2217,7 +2217,7 @@ title: Faroese NLU 🇫🇴 512 True 20,070 ± 3,977 / 4,400 ± 1,435 - 4.47 + 4.46 59.50 ± 1.56 / 58.98 ± 1.55 0.94 ± 4.40 / 23.64 ± 4.57 3.25 ± 2.04 / 49.77 ± 1.40 @@ -2285,7 +2285,7 @@ title: Faroese NLU 🇫🇴 4096 True 10,504 ± 3,028 / 1,678 ± 559 - 4.57 + 4.56 41.27 ± 4.31 / 40.85 ± 4.31 5.40 ± 3.00 / 17.90 ± 2.95 -0.20 ± 2.17 / 43.01 ± 3.39 @@ -2319,7 +2319,7 @@ title: Faroese NLU 🇫🇴 8192 True 15,971 ± 3,654 / 3,609 ± 1,197 - 4.70 + 4.69 40.28 ± 3.29 / 42.15 ± 2.85 3.94 ± 3.10 / 30.51 ± 3.43 -0.26 ± 1.92 / 38.29 ± 4.03 @@ -2353,7 +2353,7 @@ title: Faroese NLU 🇫🇴 4096 True 15,009 ± 4,072 / 2,702 ± 878 - 4.81 + 4.80 33.53 ± 3.87 / 35.41 ± 3.77 4.25 ± 5.04 / 26.21 ± 4.24 -2.32 ± 2.11 / 45.19 ± 2.78 @@ -2370,7 +2370,7 @@ title: Faroese NLU 🇫🇴 8192 True 16,249 ± 3,690 / 3,689 ± 1,226 - 4.87 + 4.86 27.91 ± 4.97 / 30.98 ± 4.28 0.77 ± 4.14 / 23.56 ± 3.58 -0.48 ± 0.89 / 33.97 ± 1.00 @@ -2387,7 +2387,7 @@ title: Faroese NLU 🇫🇴 2048 True 10,756 ± 3,589 / 1,157 ± 670 - 4.88 + 4.87 38.91 ± 4.57 / 39.53 ± 3.74 -1.72 ± 2.43 / 16.58 ± 3.29 0.66 ± 0.95 / 33.60 ± 0.66 @@ -2404,7 +2404,7 @@ title: Faroese NLU 🇫🇴 8192 True 21,777 ± 6,115 / 3,617 ± 1,211 - 5.05 + 5.03 26.85 ± 3.99 / 27.35 ± 3.60 3.07 ± 2.88 / 28.68 ± 2.63 -0.12 ± 1.62 / 45.84 ± 3.59 @@ -2421,7 +2421,7 @@ title: Faroese NLU 🇫🇴 2048 True 10,242 ± 3,432 / 1,335 ± 484 - 5.05 + 5.03 31.99 ± 3.53 / 32.31 ± 3.75 0.00 ± 0.00 / 13.57 ± 0.43 0.48 ± 1.60 / 37.69 ± 4.37 @@ -2431,23 +2431,6 @@ title: Faroese NLU 🇫🇴 14.1.2 14.0.4 - - PleIAs/Pleias-Nano (few-shot) - 1195 - 66 - 2048 - True - 2,519 ± 841 / 323 ± 104 - 5.07 - 20.76 ± 6.32 / 22.79 ± 5.85 - -1.78 ± 2.38 / 16.36 ± 2.94 - 0.87 ± 1.94 / 34.86 ± 1.71 - 3.58 ± 2.75 / 6.25 ± 4.69 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 - HuggingFaceTB/SmolLM2-135M-Instruct (few-shot) 135 @@ -2455,7 +2438,7 @@ title: Faroese NLU 🇫🇴 8192 True 25,602 ± 7,583 / 3,953 ± 1,325 - 5.08 + 5.06 23.22 ± 3.22 / 24.30 ± 2.81 3.78 ± 2.95 / 27.36 ± 2.62 0.41 ± 2.39 / 39.64 ± 3.47 @@ -2465,6 +2448,23 @@ title: Faroese NLU 🇫🇴 13.1.0 13.1.0 + + PleIAs/Pleias-Nano (few-shot) + 1195 + 66 + 2048 + True + 2,519 ± 841 / 323 ± 104 + 5.06 + 20.76 ± 6.32 / 22.79 ± 5.85 + -1.78 ± 2.38 / 16.36 ± 2.94 + 0.87 ± 1.94 / 34.86 ± 1.71 + 3.58 ± 2.75 / 6.25 ± 4.69 + 14.0.4 + 14.1.2 + 14.1.2 + 14.0.4 + HuggingFaceTB/SmolLM2-360M (few-shot) 362 @@ -2472,7 +2472,7 @@ title: Faroese NLU 🇫🇴 8192 True 22,023 ± 6,203 / 3,675 ± 1,231 - 5.09 + 5.08 28.14 ± 2.42 / 28.12 ± 2.55 -0.56 ± 5.13 / 23.83 ± 3.35 -0.06 ± 0.67 / 33.85 ± 0.68 @@ -2489,7 +2489,7 @@ title: Faroese NLU 🇫🇴 4096 True 6,513 ± 1,241 / 1,282 ± 644 - 5.10 + 5.08 22.75 ± 5.63 / 23.52 ± 4.86 -0.03 ± 6.54 / 23.66 ± 3.61 -0.78 ± 1.59 / 33.93 ± 0.87 @@ -2506,7 +2506,7 @@ title: Faroese NLU 🇫🇴 8192 True 26,346 ± 7,812 / 4,082 ± 1,372 - 5.12 + 5.10 25.51 ± 2.40 / 26.43 ± 1.77 -0.24 ± 2.92 / 26.18 ± 4.52 0.46 ± 1.43 / 36.19 ± 3.30 @@ -2516,6 +2516,23 @@ title: Faroese NLU 🇫🇴 13.1.0 13.1.0 + + tiiuae/Falcon3-1B-Instruct (few-shot) + 1669 + 131 + 8192 + True + 9,270 ± 2,690 / 1,434 ± 437 + 5.10 + 14.18 ± 3.48 / 13.89 ± 3.46 + 2.36 ± 2.76 / 17.70 ± 4.11 + 2.29 ± 2.66 / 48.34 ± 3.37 + 6.35 ± 1.25 / 12.91 ± 1.66 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + PleIAs/Pleias-Pico (few-shot) 353 @@ -2523,7 +2540,7 @@ title: Faroese NLU 🇫🇴 2048 True 2,331 ± 787 / 301 ± 97 - 5.13 + 5.11 22.55 ± 5.84 / 22.62 ± 6.04 0.67 ± 1.52 / 13.76 ± 0.51 0.87 ± 1.83 / 38.38 ± 4.75 @@ -2540,7 +2557,7 @@ title: Faroese NLU 🇫🇴 8192 True 7,692 ± 1,423 / 1,960 ± 644 - 5.52 + 5.49 0.00 ± 0.00 / 0.00 ± 0.00 0.74 ± 1.78 / 13.97 ± 0.80 0.00 ± 0.00 / 33.40 ± 0.34 diff --git a/german-nlg.csv b/german-nlg.csv index 609c1324..a637deed 100644 --- a/german-nlg.csv +++ b/german-nlg.csv @@ -8,43 +8,42 @@ Qwen/Qwen2.5-72B-Instruct (few-shot),72706,152,32768,True,False,1219,1.44,68.94, "gpt-4o-2024-05-13 (few-shot, val)",-1,200,127903,True,False,916,1.44,69.99,54.82,43.66,30.06,63.8,74.13,88.18 meta-llama/Llama-3.1-70B-Instruct (few-shot),70554,128,131072,True,False,1409,1.44,72.11,59.87,46.82,30.64,66.2,68.8,73.49 meta-llama/Llama-3.3-70B-Instruct (few-shot),70554,128,131072,True,False,1353,1.55,70.47,60.55,50.13,20.42,65.2,69.32,78.33 -google/gemma-2-27b-it (few-shot),27227,256,8224,True,False,1516,1.56,64.13,60.28,46.69,28.54,67.87,59.43,71.59 upstage/SOLAR-10.7B-v1.0 (few-shot),10732,32,4096,True,False,3780,1.56,68.11,59.79,35.45,37.27,69.31,41.2,72.65 +google/gemma-2-27b-it (few-shot),27227,256,8224,True,False,1516,1.57,64.13,60.28,46.69,28.54,67.87,59.43,71.59 "gpt-4-1106-preview (zero-shot, val)",-1,100,8191,True,False,436,1.6,57.47,58.67,54.55,27.02,62.91,69.23,71.17 "meta-llama/Meta-Llama-3-70B-Instruct (few-shot, val)",70554,128,8221,True,False,1673,1.61,75.2,54.38,36.59,26.9,68.63,62.69,69.18 "gpt-4o-2024-05-13 (zero-shot, val)",-1,200,8191,True,False,637,1.65,67.18,50.12,44.98,27.01,63.1,74.22,69.31 ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot),28411,256,4096,True,False,3633,1.66,73.31,58.02,45.12,24.67,64.4,58.7,68.9 google/gemma-2-9b (few-shot),9242,256,8224,True,False,2038,1.67,47.39,62.89,37.22,39.11,70.44,53.28,54.51 google/gemma-2-9b-it (few-shot),9242,256,8224,True,False,2062,1.7,50.35,58.6,45.78,30.46,67.99,53.61,68.16 -"meta-llama/Llama-2-70b-hf (few-shot, val)",68977,32,4125,True,False,1892,1.7,63.71,58.17,36.33,36.06,69.82,46.44,48.89 -"152334H/miqu-1-70b-sf (few-shot, val)",68977,32,32793,True,False,2126,1.72,65.19,59.8,41.86,25.51,66.8,55.18,61.68 +"152334H/miqu-1-70b-sf (few-shot, val)",68977,32,32793,True,False,2126,1.71,65.19,59.8,41.86,25.51,66.8,55.18,61.68 +"meta-llama/Llama-2-70b-hf (few-shot, val)",68977,32,4125,True,False,1892,1.71,63.71,58.17,36.33,36.06,69.82,46.44,48.89 mistralai/Mistral-Nemo-Instruct-2407 (few-shot),12248,131,1024032,True,False,7095,1.74,66.27,57.7,35.54,34.45,68.65,47.05,52.68 -"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,1.78,65.51,55.16,44.6,21.87,63.5,60.83,67.98 +"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,1.77,65.51,55.16,44.6,21.87,63.5,60.83,67.98 skole-gpt-mixtral (few-shot),-1,32,32768,False,False,3583,1.78,57.82,59.45,36.75,33.55,69.37,50.96,39.28 "gpt-4o-mini-2024-07-18 (zero-shot, val)",-1,200,8191,True,False,908,1.82,59.19,52.66,46.66,26.02,63.3,58.21,65.19 CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,1.85,68.62,56.2,43.36,28.5,64.52,50.16,51.13 -"gpt-3.5-turbo-0613 (few-shot, val)",-1,100,4095,True,False,921,1.96,61.5,55.5,38.96,30.2,64.9,35.39,56.88 -meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131104,True,False,1005,1.98,67.61,58.07,28.25,28.79,66.87,40.0,45.93 +"gpt-3.5-turbo-0613 (few-shot, val)",-1,100,4095,True,False,921,1.95,61.5,55.5,38.96,30.2,64.9,35.39,56.88 "VAGOsolutions/SauerkrautLM-7b-HerO (few-shot, val)",7242,32,32768,False,True,2477,2.0,59.7,60.22,35.99,29.68,66.73,30.53,45.02 mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot),46703,32,32768,True,False,5535,2.03,58.07,52.18,41.45,26.87,64.1,47.93,51.72 nvidia/Llama-3.1-Nemotron-70B-Instruct-HF (few-shot),70554,128,131072,True,False,1208,2.04,61.98,57.07,45.61,24.89,43.28,66.68,75.48 -VAGOsolutions/SauerkrautLM-7b-LaserChat (few-shot),7242,32,4096,False,False,4413,2.06,62.37,49.59,35.57,31.74,64.55,34.68,49.51 -mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,2.06,60.5,50.39,30.86,30.53,64.03,39.61,60.04 -mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,2.06,59.1,55.3,37.84,31.71,64.23,47.89,28.53 -google/gemma-7b (few-shot),8538,256,8096,True,False,1378,2.07,39.88,56.23,32.71,36.58,69.41,41.56,30.77 -meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1007,2.07,68.18,58.33,29.12,28.68,65.23,38.44,37.69 -"mlabonne/NeuralBeagle14-7B (few-shot, val)",7242,32,8192,False,True,2549,2.07,64.81,59.6,27.06,25.22,67.31,35.84,49.13 -utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.07,40.85,56.53,24.74,38.2,69.44,39.43,37.67 -senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.08,64.38,54.44,26.03,25.68,68.16,33.84,50.99 -"cstr/Spaetzle-v8-7b (few-shot, val)",7242,32,32768,False,True,5980,2.09,58.9,61.34,31.58,24.91,67.25,34.62,48.7 +mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,2.05,60.5,50.39,30.86,30.53,64.03,39.61,60.04 +"mlabonne/NeuralBeagle14-7B (few-shot, val)",7242,32,8192,False,True,2549,2.05,64.81,59.6,27.06,25.22,67.31,35.84,49.13 +VAGOsolutions/SauerkrautLM-7b-LaserChat (few-shot),7242,32,4096,False,False,4413,2.07,62.37,49.59,35.57,31.74,64.55,34.68,49.51 +meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1483,2.07,68.18,58.33,29.12,28.68,65.23,38.44,37.69 +mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,2.07,59.1,55.3,37.84,31.71,64.23,47.89,28.53 +google/gemma-7b (few-shot),8538,256,8096,True,False,1378,2.08,39.88,56.23,32.71,36.58,69.41,41.56,30.77 +utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.08,40.85,56.53,24.74,38.2,69.44,39.43,37.67 +senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.09,64.38,54.44,26.03,25.68,68.16,33.84,50.99 VAGOsolutions/FC-SauerkrautLM-7b-beta (few-shot),-1,32,8192,False,False,2160,2.1,56.7,53.39,35.64,34.22,64.79,32.85,39.0 -meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1335,2.1,56.0,56.4,22.01,35.39,68.92,38.12,31.37 +"cstr/Spaetzle-v8-7b (few-shot, val)",7242,32,32768,False,True,5980,2.1,58.9,61.34,31.58,24.91,67.25,34.62,48.7 +meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1477,2.1,56.0,56.4,22.01,35.39,68.92,38.12,31.37 mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot),341029,256,4096,True,False,1904,2.1,50.17,43.41,40.59,19.75,60.34,52.07,74.99 microsoft/Phi-3-mini-128k-instruct (few-shot),3821,32,130976,True,False,7312,2.1,55.36,53.05,23.08,31.55,67.33,39.57,51.26 CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,2.12,59.95,55.39,30.59,26.94,66.11,38.63,48.53 "meta-llama/Llama-2-70b-chat-hf (few-shot, val)",68977,32,4125,True,False,1979,2.12,62.39,53.16,31.81,28.99,66.98,35.72,35.26 +meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1473,2.12,62.03,58.15,30.18,26.48,64.68,39.36,47.21 occiglot/occiglot-7b-de-en-instruct (few-shot),7242,32,32768,False,False,1584,2.15,55.76,55.91,22.47,35.95,68.08,33.77,33.15 -meta-llama/Llama-3.1-8B (few-shot),8030,128,131072,True,False,2986,2.18,59.45,53.39,23.87,27.14,69.58,38.77,34.29 "mlabonne/AlphaMonarch-7B (few-shot, val)",7242,32,8192,False,True,5340,2.18,63.36,59.8,22.98,20.96,67.58,36.08,47.99 ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,2.21,54.68,55.48,26.89,31.27,66.33,33.98,34.59 nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,2.22,59.82,50.22,30.43,30.22,62.56,43.09,33.4 @@ -58,27 +57,28 @@ occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,2.27 occiglot/occiglot-7b-de-en (few-shot),7242,32,32768,True,False,1992,2.28,48.11,54.96,21.57,31.49,68.88,32.39,29.84 meta-llama/Llama-2-13b-hf (few-shot),13016,32,4000,True,False,2898,2.32,52.08,46.38,22.39,33.43,69.5,28.79,21.5 seedboxai/KafkaLM-7B-German-V0.1-DPO (few-shot),7242,32,4096,False,False,6070,2.32,48.92,52.57,20.74,32.87,68.88,31.38,29.83 -neuralmagic/Sparse-Llama-3.1-8B-2of4 (few-shot),8030,128,131072,True,False,2996,2.34,40.63,50.44,10.79,34.67,69.06,31.77,41.12 +meta-llama/Llama-3.1-8B (few-shot),8030,128,131072,True,False,2986,2.34,59.45,53.39,23.87,27.14,64.01,36.56,30.05 occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,2.34,51.39,47.3,21.83,31.55,69.31,32.49,22.25 seedboxai/KafkaLM-7B-German-V0.1 (few-shot),7242,32,32768,True,False,6065,2.34,48.35,52.51,20.36,32.88,68.82,31.36,29.98 -"claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,2.35,61.83,61.59,46.4,23.77,62.69,17.15,9.7 -NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,2.36,56.14,53.33,29.49,18.49,64.84,44.31,30.72 +neuralmagic/Sparse-Llama-3.1-8B-2of4 (few-shot),8030,128,131072,True,False,2996,2.35,40.63,50.44,10.79,34.67,69.06,31.77,41.12 +"claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,2.36,61.83,61.59,46.4,23.77,62.69,17.15,9.7 meta-llama/Llama-2-13b-chat-hf (few-shot),13016,32,4096,True,False,2849,2.36,57.57,49.4,23.32,30.24,68.01,27.06,26.96 -ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4096,True,False,2515,2.37,50.43,57.84,22.58,27.96,65.08,35.21,26.48 -NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,2.45,57.26,54.57,26.52,19.96,61.1,43.3,23.82 +NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,2.37,56.14,53.33,29.49,18.49,64.84,44.31,30.72 +ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4096,True,False,2515,2.38,50.43,57.84,22.58,27.96,65.08,35.21,26.48 seedboxai/KafkaLM-13B-German-V0.1-DPO (few-shot),13016,32,8221,False,False,789,2.45,52.45,49.73,20.72,30.18,68.97,27.71,21.44 +NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,2.46,57.26,54.57,26.52,19.96,61.1,43.3,23.82 mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,2.46,55.1,47.69,24.14,23.93,67.51,26.06,31.09 RuterNorway/Llama-2-13b-chat-norwegian (few-shot),-1,32,4125,False,False,3254,2.47,57.02,49.75,19.8,27.86,66.83,25.99,24.9 -nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,2.48,60.69,53.77,38.53,0.0,62.73,45.91,47.37 +nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,2.49,60.69,53.77,38.53,0.0,62.73,45.91,47.37 timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,2.52,50.66,54.79,20.17,27.86,65.53,27.04,17.47 google/gemma-2-2b-it (few-shot),2614,256,8224,True,False,5374,2.54,37.31,46.23,23.26,28.01,66.35,30.11,35.25 -"mayflowergmbh/Wiedervereinigung-7b-dpo (few-shot, val)",7242,32,32768,False,True,2374,2.54,52.17,51.92,29.06,14.59,63.78,35.38,34.16 +"mayflowergmbh/Wiedervereinigung-7b-dpo (few-shot, val)",7242,32,32768,False,True,2374,2.55,52.17,51.92,29.06,14.59,63.78,35.38,34.16 mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32797,False,False,634,2.55,47.19,47.26,22.32,24.36,67.75,26.79,20.33 CohereForAI/aya-23-8B (few-shot),8028,256,8192,False,False,2707,2.56,51.2,47.79,18.04,29.46,62.58,24.21,30.55 -meta-llama/Llama-3.2-3B (few-shot),3213,128,131104,True,False,3713,2.56,49.85,54.65,3.17,29.37,67.96,29.56,16.15 01-ai/Yi-1.5-6B (few-shot),6061,64,4128,True,False,2867,2.57,48.2,47.12,12.39,30.5,65.48,30.83,24.9 DiscoResearch/DiscoLM_German_7b_v1 (few-shot),7242,32,32768,False,False,1972,2.58,42.39,48.67,8.72,36.12,68.47,23.99,20.76 meta-llama/Llama-3.2-3B-Instruct (few-shot),3213,128,131104,False,False,10424,2.58,55.52,50.52,9.87,20.2,66.4,33.58,28.97 +meta-llama/Llama-3.2-3B (few-shot),3213,128,131104,True,False,3713,2.58,49.85,54.65,3.17,29.37,67.96,29.56,16.15 ibm-granite/granite-8b-code-base-4k (few-shot),8055,49,4096,True,False,2313,2.6,59.07,49.75,14.71,29.45,67.78,16.39,10.68 ibm-granite/granite-3.0-2b-base (few-shot),2534,49,4128,True,False,10187,2.61,45.81,34.61,16.19,28.25,67.9,28.28,28.25 ibm-granite/granite-3.0-2b-instruct (few-shot),2634,49,4128,True,False,10194,2.62,53.3,44.95,18.67,28.1,65.95,23.84,16.86 @@ -86,63 +86,64 @@ meta-llama/Llama-2-7b-hf (few-shot),6738,32,4125,True,False,930,2.68,43.02,50.21 DiscoResearch/Llama3-German-8B (few-shot),8030,128,8192,True,False,2383,2.69,54.34,58.32,25.7,0.5,62.61,38.63,33.51 NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,2.7,67.63,55.79,24.45,1.17,55.51,44.2,31.71 meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4125,False,False,2643,2.71,50.09,46.52,15.23,25.54,67.62,20.12,13.98 +microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,2047,True,False,8681,2.71,59.14,51.72,33.78,24.82,41.45,38.78,22.32 Qwen/Qwen1.5-4B-Chat (few-shot),3950,152,32768,False,False,4347,2.72,42.08,41.52,12.78,29.35,65.56,23.76,20.92 ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,2.72,47.31,48.28,14.08,28.37,61.97,22.99,20.06 ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,2.73,54.45,43.62,15.24,26.0,66.68,15.81,9.6 -microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,2047,True,False,3194,2.74,59.33,51.5,33.3,24.83,41.45,38.82,22.6 -ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,2.82,39.38,49.83,4.03,30.2,64.38,20.76,20.66 +ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,2.83,39.38,49.83,4.03,30.2,64.38,20.76,20.66 allenai/OLMo-1.7-7B-hf (few-shot),6888,50,4096,True,False,3371,2.85,39.41,49.42,6.02,27.69,66.75,20.77,10.47 "claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,2.87,55.59,43.73,23.74,21.36,62.97,16.53,-1.78 -TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot),7800,100,4096,True,False,6197,2.89,39.21,58.36,7.03,27.02,68.59,3.52,0.06 google/gemma-2-2b (few-shot),2614,256,8224,True,False,5235,2.89,19.69,50.36,9.07,27.06,68.52,26.5,9.36 google/gemma-7b-it (few-shot),8538,256,8221,False,False,1792,2.89,54.2,15.43,17.49,28.68,64.87,22.1,18.58 -ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,2.91,40.61,31.86,5.36,25.99,66.77,22.17,22.61 -Qwen/Qwen1.5-4B (few-shot),3950,152,32768,True,False,3248,2.93,31.52,39.91,3.27,27.55,65.88,21.32,21.35 -01-ai/Yi-6B (few-shot),6061,64,4125,True,False,6435,2.95,0.0,52.66,7.33,30.05,66.09,30.33,22.89 +TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot),7800,100,4096,True,False,6197,2.92,39.21,58.36,7.03,27.02,68.59,3.52,0.06 +ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,2.92,40.61,31.86,5.36,25.99,66.77,22.17,22.61 +Qwen/Qwen1.5-4B (few-shot),3950,152,32768,True,False,3248,2.95,31.52,39.91,3.27,27.55,65.88,21.32,21.35 +01-ai/Yi-6B (few-shot),6061,64,4125,True,False,6435,2.96,0.0,52.66,7.33,30.05,66.09,30.33,22.89 ibm-granite/granite-7b-base (few-shot),6738,32,2048,True,False,4405,3.02,37.68,46.0,0.83,26.65,64.14,18.12,9.92 -stabilityai/stablelm-2-1_6b (few-shot),1645,100,4096,True,False,7259,3.08,34.81,51.01,0.0,25.4,63.53,11.23,7.25 +stabilityai/stablelm-2-1_6b (few-shot),1645,100,4096,True,False,7259,3.09,34.81,51.01,0.0,25.4,63.53,11.23,7.25 ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,3.16,49.16,35.17,9.79,22.48,60.81,6.89,4.79 openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,3.18,39.39,23.6,7.68,25.3,63.84,14.99,14.99 Rijgersberg/GEITje-7B (few-shot),7242,32,32797,True,False,5887,3.19,0.0,47.67,9.67,26.23,65.42,19.03,9.87 meta-llama/Llama-3.2-1B-Instruct (few-shot),1236,128,131104,False,False,7436,3.23,42.51,38.26,5.48,19.43,60.81,16.06,6.64 VAGOsolutions/SauerkrautLM-Gemma-2b (few-shot),2506,256,8192,False,False,3607,3.29,12.21,44.84,2.02,24.59,62.87,16.35,8.93 -utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,3.29,28.49,43.18,2.92,23.26,64.64,2.6,1.46 -MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,3.31,32.33,26.39,1.44,28.15,58.62,14.94,8.7 +utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,3.3,28.49,43.18,2.92,23.26,64.64,2.6,1.46 google/gemma-2b (few-shot),2506,256,8192,True,False,6087,3.32,16.95,44.96,0.77,17.92,66.8,12.11,7.32 -google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,3.34,36.62,28.54,1.15,23.39,63.02,12.27,6.57 +MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,3.33,32.33,26.39,1.44,28.15,58.62,14.94,8.7 +google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,3.33,36.62,28.54,1.15,23.39,63.02,12.27,6.57 openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.37,38.81,10.59,0.91,22.54,64.28,15.69,14.18 Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,3.38,28.04,36.21,3.12,16.33,61.47,13.44,8.31 HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,3.41,32.54,27.03,8.95,18.38,59.91,12.3,6.28 -ibm-granite/granite-3.0-1b-a400m-instruct (few-shot),1335,49,4096,True,False,7964,3.43,36.56,40.65,4.68,19.45,57.01,4.68,0.48 -"seedboxai/KafkaLM-70B-German-V0.1 (few-shot, val)",68977,32,4125,True,False,294,3.43,59.23,-5.01,3.19,19.84,62.42,18.4,1.52 -HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,3.45,28.67,19.69,5.07,18.43,64.25,15.15,6.23 -LumiOpen/Viking-13B (few-shot),14030,131,4128,True,False,840,3.45,34.53,42.9,1.51,15.83,61.4,-1.84,-0.12 +ibm-granite/granite-3.0-1b-a400m-instruct (few-shot),1335,49,4096,True,False,7964,3.44,36.56,40.65,4.68,19.45,57.01,4.68,0.48 meta-llama/Llama-3.2-1B (few-shot),1236,128,131104,True,False,7577,3.45,24.79,47.65,2.39,13.39,61.07,3.94,0.9 -ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,3.48,49.38,41.72,7.67,13.7,45.88,8.73,6.18 -NorwAI/NorwAI-Llama2-7B (few-shot),7033,68,4096,True,False,4438,3.49,25.69,33.48,3.73,14.82,63.85,2.96,1.88 +"seedboxai/KafkaLM-70B-German-V0.1 (few-shot, val)",68977,32,4125,True,False,294,3.45,59.23,-5.01,3.19,19.84,62.42,18.4,1.52 +HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,3.46,28.67,19.69,5.07,18.43,64.25,15.15,6.23 +LumiOpen/Viking-13B (few-shot),14030,131,4128,True,False,840,3.46,34.53,42.9,1.51,15.83,61.4,-1.84,-0.12 +ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,3.49,49.38,41.72,7.67,13.7,45.88,8.73,6.18 +NorwAI/NorwAI-Llama2-7B (few-shot),7033,68,4096,True,False,4438,3.51,25.69,33.48,3.73,14.82,63.85,2.96,1.88 +AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot),20918,64,2077,True,False,1831,3.52,24.35,43.35,2.38,15.56,57.47,4.67,1.77 +Qwen/Qwen1.5-1.8B (few-shot),1837,152,32768,True,False,5666,3.52,9.23,38.3,0.39,16.67,61.74,13.65,8.86 RuterNorway/Llama-2-7b-chat-norwegian (few-shot),6738,32,4096,False,False,10890,3.52,27.22,33.61,0.45,20.44,60.5,-0.1,-1.0 -Qwen/Qwen1.5-1.8B (few-shot),1837,152,32768,True,False,5666,3.53,9.23,38.3,0.39,16.67,61.74,13.65,8.86 -AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot),20918,64,2077,True,False,1831,3.54,24.35,43.35,2.38,15.56,57.47,4.67,1.77 NorwAI/NorwAI-Mistral-7B (few-shot),7537,68,4096,True,False,3035,3.56,19.29,43.88,5.63,17.02,54.94,5.89,4.11 +tiiuae/Falcon3-1B-Instruct (few-shot),1669,131,8192,True,False,9270,3.6,32.54,22.27,7.18,16.72,58.08,6.57,5.16 AI-Sweden-Models/gpt-sw3-20b (few-shot),20918,64,2077,True,False,1875,3.64,36.17,34.17,2.21,13.6,51.97,3.68,0.64 ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,3.65,19.94,34.66,0.7,14.66,61.26,-0.48,0.28 "DiscoResearch/DiscoLM-70b (few-shot, val)",68977,32,8221,False,False,291,3.71,62.16,27.56,0.59,0.0,52.77,4.04,7.33 Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,3.81,27.34,10.64,0.33,11.81,59.71,6.34,2.94 -Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,3.9,24.67,9.31,1.11,13.6,56.42,2.75,3.41 PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.91,23.08,7.41,0.89,17.32,56.57,-0.63,-0.17 +Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,3.91,24.67,9.31,1.11,13.6,56.42,2.75,3.41 PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,3.93,34.68,21.76,0.85,14.3,43.98,0.36,1.32 allenai/OLMo-7B (few-shot),6888,50,2080,True,False,5403,3.93,30.85,49.77,2.67,4.09,42.64,1.94,1.24 HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,3.98,19.94,19.64,0.0,8.78,57.06,0.32,-0.65 -state-spaces/mamba-2.8b-hf (few-shot),2768,50,32800,True,False,2722,3.98,21.96,18.66,0.16,7.08,56.47,1.65,0.02 +state-spaces/mamba-2.8b-hf (few-shot),2768,50,32800,True,False,2722,3.99,21.96,18.66,0.16,7.08,56.47,1.65,0.02 allenai/OLMo-7B-Twin-2T (few-shot),6888,50,2080,True,False,5484,4.17,14.06,28.07,2.31,6.89,43.42,1.66,1.5 -HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.19,18.77,12.59,1.64,9.27,46.31,0.84,0.01 -PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,4.19,24.32,15.58,1.25,6.82,44.9,-0.19,1.18 -PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.26,25.3,13.9,-0.25,6.12,41.9,-1.81,0.0 +HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.21,18.77,12.59,1.64,9.27,46.31,0.84,0.01 +PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,4.21,24.32,15.58,1.25,6.82,44.9,-0.19,1.18 +PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.27,25.3,13.9,-0.25,6.12,41.9,-1.81,0.0 PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.28,21.03,10.99,0.13,3.61,43.73,-0.77,0.11 -HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.3,16.89,2.74,-0.34,0.28,54.79,-0.32,0.18 -RJuro/kanelsnegl-v0.1 (few-shot),7242,32,541,True,False,5847,4.38,0.0,0.0,0.0,0.0,59.26,1.16,0.31 +HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.31,16.89,2.74,-0.34,0.28,54.79,-0.32,0.18 allenai/OLMo-1B (few-shot),1177,50,2080,True,False,8536,4.38,21.46,21.03,0.13,0.71,39.77,-0.08,-0.63 HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.4,15.54,2.51,0.36,1.77,49.41,-1.46,-0.18 -NorwAI/NorwAI-Mistral-7B-pretrain (few-shot),7537,68,4096,True,False,3024,4.73,5.8,4.45,-0.48,0.08,35.92,0.77,-0.34 -ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.78,0.0,0.0,0.0,0.0,37.66,0.87,1.95 -ai-forever/mGPT (few-shot),-1,100,2077,True,False,11734,4.94,0.0,0.19,-0.12,0.0,29.43,-0.69,0.15 +RJuro/kanelsnegl-v0.1 (few-shot),7242,32,541,True,False,5847,4.4,0.0,0.0,0.0,0.0,59.26,1.16,0.31 +NorwAI/NorwAI-Mistral-7B-pretrain (few-shot),7537,68,4096,True,False,3024,4.75,5.8,4.45,-0.48,0.08,35.92,0.77,-0.34 +ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.79,0.0,0.0,0.0,0.0,37.66,0.87,1.95 +ai-forever/mGPT (few-shot),-1,100,2077,True,False,11734,4.96,0.0,0.19,-0.12,0.0,29.43,-0.69,0.15 diff --git a/german-nlg.md b/german-nlg.md index 8a1489e0..81f114c5 100644 --- a/german-nlg.md +++ b/german-nlg.md @@ -3,7 +3,7 @@ layout: leaderboard title: German NLG 🇩🇪 --- -
Last updated: 10/01/2025 12:30:31 CET
+
Last updated: 11/01/2025 11:03:41 CET
@@ -247,29 +247,6 @@ title: German NLG 🇩🇪 14.0.3 14.0.3 - - google/gemma-2-27b-it (few-shot) - 27227 - 256 - 8224 - True - 1,516 ± 257 / 480 ± 148 - 1.56 - 64.13 ± 1.65 / 55.46 ± 2.00 - 60.28 ± 1.75 / 73.37 ± 1.43 - 46.69 ± 1.99 / 71.96 ± 0.73 - 28.54 ± 1.38 / 59.38 ± 1.85 - 67.87 ± 0.99 / 24.26 ± 2.65 - 59.43 ± 0.82 / 69.41 ± 0.61 - 71.59 ± 1.49 / 78.30 ± 1.23 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - upstage/SOLAR-10.7B-v1.0 (few-shot) 10732 @@ -293,6 +270,29 @@ title: German NLG 🇩🇪 12.5.3 12.5.3 + + google/gemma-2-27b-it (few-shot) + 27227 + 256 + 8224 + True + 1,516 ± 257 / 480 ± 148 + 1.57 + 64.13 ± 1.65 / 55.46 ± 2.00 + 60.28 ± 1.75 / 73.37 ± 1.43 + 46.69 ± 1.99 / 71.96 ± 0.73 + 28.54 ± 1.38 / 59.38 ± 1.85 + 67.87 ± 0.99 / 24.26 ± 2.65 + 59.43 ± 0.82 / 69.41 ± 0.61 + 71.59 ± 1.49 / 78.30 ± 1.23 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + gpt-4-1106-preview (zero-shot, val) unknown @@ -432,20 +432,20 @@ title: German NLG 🇩🇪 13.0.0 - meta-llama/Llama-2-70b-hf (few-shot, val) + 152334H/miqu-1-70b-sf (few-shot, val) 68977 32 - 4125 + 32793 True - 1,892 ± 650 / 318 ± 105 - 1.70 - 63.71 ± 2.43 / 57.08 ± 2.70 - 58.17 ± 2.51 / 71.34 ± 1.62 - 36.33 ± 5.00 / 64.51 ± 3.38 - 36.06 ± 2.89 / 69.62 ± 2.81 - 69.82 ± 1.03 / 30.79 ± 2.46 - 46.44 ± 2.49 / 59.88 ± 1.74 - 48.89 ± 2.85 / 61.33 ± 2.06 + 2,126 ± 676 / 319 ± 104 + 1.71 + 65.19 ± 2.58 / 56.17 ± 3.57 + 59.80 ± 2.15 / 71.98 ± 1.46 + 41.86 ± 5.44 / 69.70 ± 2.31 + 25.51 ± 3.79 / 63.19 ± 2.48 + 66.80 ± 0.76 / 20.65 ± 1.59 + 55.18 ± 3.35 / 66.48 ± 2.49 + 61.68 ± 3.07 / 70.78 ± 2.37 12.7.0 12.7.0 12.7.0 @@ -455,20 +455,20 @@ title: German NLG 🇩🇪 12.7.0 - 152334H/miqu-1-70b-sf (few-shot, val) + meta-llama/Llama-2-70b-hf (few-shot, val) 68977 32 - 32793 + 4125 True - 2,126 ± 676 / 319 ± 104 - 1.72 - 65.19 ± 2.58 / 56.17 ± 3.57 - 59.80 ± 2.15 / 71.98 ± 1.46 - 41.86 ± 5.44 / 69.70 ± 2.31 - 25.51 ± 3.79 / 63.19 ± 2.48 - 66.80 ± 0.76 / 20.65 ± 1.59 - 55.18 ± 3.35 / 66.48 ± 2.49 - 61.68 ± 3.07 / 70.78 ± 2.37 + 1,892 ± 650 / 318 ± 105 + 1.71 + 63.71 ± 2.43 / 57.08 ± 2.70 + 58.17 ± 2.51 / 71.34 ± 1.62 + 36.33 ± 5.00 / 64.51 ± 3.38 + 36.06 ± 2.89 / 69.62 ± 2.81 + 69.82 ± 1.03 / 30.79 ± 2.46 + 46.44 ± 2.49 / 59.88 ± 1.74 + 48.89 ± 2.85 / 61.33 ± 2.06 12.7.0 12.7.0 12.7.0 @@ -507,7 +507,7 @@ title: German NLG 🇩🇪 8191 True 784 ± 310 / 95 ± 28 - 1.78 + 1.77 65.51 ± 1.91 / 48.61 ± 3.73 55.16 ± 3.05 / 69.81 ± 1.96 44.60 ± 3.19 / 67.35 ± 2.53 @@ -599,7 +599,7 @@ title: German NLG 🇩🇪 4095 True 921 ± 293 / 113 ± 37 - 1.96 + 1.95 61.50 ± 2.96 / 46.22 ± 3.41 55.50 ± 2.58 / 68.96 ± 2.00 38.96 ± 4.39 / 68.89 ± 2.54 @@ -615,29 +615,6 @@ title: German NLG 🇩🇪 0.0.0 0.0.0 - - meta-llama/Llama-3.1-8B-Instruct (few-shot) - 8030 - 128 - 131104 - True - 1,005 ± 330 / 196 ± 74 - 1.98 - 67.61 ± 1.23 / 60.39 ± 1.02 - 58.07 ± 2.32 / 70.76 ± 1.84 - 28.25 ± 3.57 / 59.54 ± 3.88 - 28.79 ± 2.02 / 55.82 ± 3.28 - 66.87 ± 0.54 / 21.31 ± 1.47 - 40.00 ± 0.97 / 54.88 ± 0.70 - 45.93 ± 1.38 / 58.95 ± 1.06 - 13.0.0 - 14.0.4 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - VAGOsolutions/SauerkrautLM-7b-HerO (few-shot, val) 7242 @@ -707,6 +684,52 @@ title: German NLG 🇩🇪 14.0.4 14.0.4 + + mistralai/Ministral-8B-Instruct-2410 (few-shot) + 8020 + 131 + 32768 + True + 1,302 ± 323 / 253 ± 86 + 2.05 + 60.50 ± 1.22 / 40.72 ± 1.61 + 50.39 ± 2.45 / 66.39 ± 1.62 + 30.86 ± 1.37 / 53.78 ± 1.61 + 30.53 ± 1.21 / 58.26 ± 1.93 + 64.03 ± 0.05 / 12.90 ± 0.19 + 39.61 ± 0.90 / 54.41 ± 0.67 + 60.04 ± 1.94 / 69.43 ± 1.54 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + + + mlabonne/NeuralBeagle14-7B (few-shot, val) + 7242 + 32 + 8192 + False + 2,549 ± 472 / 784 ± 245 + 2.05 + 64.81 ± 3.03 / 53.01 ± 3.41 + 59.60 ± 2.81 / 72.42 ± 1.83 + 27.06 ± 4.53 / 63.33 ± 2.30 + 25.22 ± 3.84 / 60.93 ± 2.99 + 67.31 ± 1.05 / 24.72 ± 2.95 + 35.84 ± 2.16 / 51.64 ± 1.56 + 49.13 ± 2.71 / 61.68 ± 2.03 + 9.3.2 + 9.3.2 + 9.3.2 + 12.5.2 + 9.3.2 + 9.3.2 + 9.3.2 + VAGOsolutions/SauerkrautLM-7b-LaserChat (few-shot) 7242 @@ -714,7 +737,7 @@ title: German NLG 🇩🇪 4096 False 4,413 ± 1,265 / 551 ± 184 - 2.06 + 2.07 62.37 ± 0.98 / 39.99 ± 2.90 49.59 ± 1.99 / 65.96 ± 1.43 35.57 ± 1.53 / 66.87 ± 0.69 @@ -731,27 +754,27 @@ title: German NLG 🇩🇪 14.1.2 - mistralai/Ministral-8B-Instruct-2410 (few-shot) - 8020 - 131 - 32768 + meta-llama/Meta-Llama-3-8B-Instruct (few-shot) + 8030 + 128 + 8192 True - 1,302 ± 323 / 253 ± 86 - 2.06 - 60.50 ± 1.22 / 40.72 ± 1.61 - 50.39 ± 2.45 / 66.39 ± 1.62 - 30.86 ± 1.37 / 53.78 ± 1.61 - 30.53 ± 1.21 / 58.26 ± 1.93 - 64.03 ± 0.05 / 12.90 ± 0.19 - 39.61 ± 0.90 / 54.41 ± 0.67 - 60.04 ± 1.94 / 69.43 ± 1.54 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 + 1,483 ± 377 / 287 ± 97 + 2.07 + 68.18 ± 0.95 / 57.72 ± 1.15 + 58.33 ± 2.83 / 69.31 ± 3.16 + 29.12 ± 3.17 / 63.60 ± 1.63 + 28.68 ± 1.99 / 56.42 ± 3.34 + 65.23 ± 0.49 / 16.56 ± 0.94 + 38.44 ± 0.81 / 53.38 ± 0.60 + 37.69 ± 1.00 / 51.24 ± 0.73 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 mistralai/Mixtral-8x7B-v0.1 (few-shot) @@ -760,7 +783,7 @@ title: German NLG 🇩🇪 32768 True 2,363 ± 794 / 311 ± 105 - 2.06 + 2.07 59.10 ± 1.24 / 46.00 ± 3.08 55.30 ± 4.28 / 67.32 ± 5.07 37.84 ± 4.60 / 67.45 ± 2.49 @@ -783,7 +806,7 @@ title: German NLG 🇩🇪 8096 True 1,378 ± 260 / 387 ± 119 - 2.07 + 2.08 39.88 ± 2.56 / 35.40 ± 2.63 56.23 ± 3.17 / 68.87 ± 2.73 32.71 ± 1.60 / 64.55 ± 1.54 @@ -799,52 +822,6 @@ title: German NLG 🇩🇪 12.9.1 12.10.0 - - meta-llama/Meta-Llama-3-8B-Instruct (few-shot) - 8030 - 128 - 8192 - True - 1,007 ± 316 / 162 ± 45 - 2.07 - 68.18 ± 0.95 / 57.72 ± 1.15 - 58.33 ± 2.83 / 69.31 ± 3.16 - 29.12 ± 3.17 / 63.60 ± 1.63 - 28.68 ± 1.99 / 56.42 ± 3.34 - 65.23 ± 0.49 / 16.56 ± 0.94 - 38.44 ± 0.81 / 53.38 ± 0.60 - 37.69 ± 1.00 / 51.24 ± 0.73 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - - - mlabonne/NeuralBeagle14-7B (few-shot, val) - 7242 - 32 - 8192 - False - 2,549 ± 472 / 784 ± 245 - 2.07 - 64.81 ± 3.03 / 53.01 ± 3.41 - 59.60 ± 2.81 / 72.42 ± 1.83 - 27.06 ± 4.53 / 63.33 ± 2.30 - 25.22 ± 3.84 / 60.93 ± 2.99 - 67.31 ± 1.05 / 24.72 ± 2.95 - 35.84 ± 2.16 / 51.64 ± 1.56 - 49.13 ± 2.71 / 61.68 ± 2.03 - 9.3.2 - 9.3.2 - 9.3.2 - 12.5.2 - 9.3.2 - 9.3.2 - 9.3.2 - utter-project/EuroLLM-9B-Instruct (few-shot) 9152 @@ -852,7 +829,7 @@ title: German NLG 🇩🇪 4096 False 1,483 ± 321 / 379 ± 158 - 2.07 + 2.08 40.85 ± 2.21 / 34.64 ± 2.00 56.53 ± 2.49 / 69.48 ± 1.70 24.74 ± 4.45 / 56.60 ± 5.56 @@ -875,7 +852,7 @@ title: German NLG 🇩🇪 32768 False 5,993 ± 1,028 / 1,742 ± 561 - 2.08 + 2.09 64.38 ± 1.60 / 50.26 ± 2.53 54.44 ± 1.45 / 69.32 ± 1.02 26.03 ± 2.23 / 61.88 ± 1.38 @@ -891,29 +868,6 @@ title: German NLG 🇩🇪 12.6.1 12.6.1 - - cstr/Spaetzle-v8-7b (few-shot, val) - 7242 - 32 - 32768 - False - 5,980 ± 1,031 / 1,714 ± 552 - 2.09 - 58.90 ± 2.30 / 45.55 ± 3.30 - 61.34 ± 1.90 / 72.98 ± 1.30 - 31.58 ± 4.39 / 65.51 ± 2.23 - 24.91 ± 3.98 / 60.88 ± 3.31 - 67.25 ± 1.06 / 22.95 ± 2.64 - 34.62 ± 2.20 / 50.43 ± 1.52 - 48.70 ± 2.47 / 61.05 ± 1.79 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - VAGOsolutions/FC-SauerkrautLM-7b-beta (few-shot) unknown @@ -937,13 +891,36 @@ title: German NLG 🇩🇪 12.6.1 12.6.1 + + cstr/Spaetzle-v8-7b (few-shot, val) + 7242 + 32 + 32768 + False + 5,980 ± 1,031 / 1,714 ± 552 + 2.10 + 58.90 ± 2.30 / 45.55 ± 3.30 + 61.34 ± 1.90 / 72.98 ± 1.30 + 31.58 ± 4.39 / 65.51 ± 2.23 + 24.91 ± 3.98 / 60.88 ± 3.31 + 67.25 ± 1.06 / 22.95 ± 2.64 + 34.62 ± 2.20 / 50.43 ± 1.52 + 48.70 ± 2.47 / 61.05 ± 1.79 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + meta-llama/Meta-Llama-3-8B (few-shot) 8030 128 8192 True - 1,335 ± 338 / 260 ± 88 + 1,477 ± 376 / 285 ± 97 2.10 56.00 ± 1.94 / 43.49 ± 2.05 56.40 ± 3.89 / 70.17 ± 2.91 @@ -1052,6 +1029,29 @@ title: German NLG 🇩🇪 12.7.0 12.7.0 + + meta-llama/Llama-3.1-8B-Instruct (few-shot) + 8030 + 128 + 131072 + True + 1,473 ± 377 / 283 ± 96 + 2.12 + 62.03 ± 1.17 / 45.31 ± 1.89 + 58.15 ± 2.40 / 70.81 ± 1.86 + 30.18 ± 1.92 / 63.41 ± 1.19 + 26.48 ± 0.98 / 52.41 ± 1.48 + 64.68 ± 0.10 / 15.03 ± 0.29 + 39.36 ± 0.79 / 54.48 ± 0.57 + 47.21 ± 1.02 / 60.39 ± 0.77 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + occiglot/occiglot-7b-de-en-instruct (few-shot) 7242 @@ -1075,29 +1075,6 @@ title: German NLG 🇩🇪 12.3.1 12.3.1 - - meta-llama/Llama-3.1-8B (few-shot) - 8030 - 128 - 131072 - True - 2,986 ± 823 / 276 ± 94 - 2.18 - 59.45 ± 1.64 / 46.60 ± 2.02 - 53.39 ± 5.35 / 65.74 ± 5.74 - 23.87 ± 5.74 / 57.17 ± 6.01 - 27.14 ± 2.19 / 54.63 ± 2.74 - 69.58 ± 1.37 / 28.95 ± 2.96 - 38.77 ± 0.95 / 53.99 ± 0.67 - 34.29 ± 1.05 / 49.79 ± 0.88 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 13.0.0 - 13.0.0 - 13.0.0 - mlabonne/AlphaMonarch-7B (few-shot, val) 7242 @@ -1398,27 +1375,27 @@ title: German NLG 🇩🇪 12.3.2 - neuralmagic/Sparse-Llama-3.1-8B-2of4 (few-shot) + meta-llama/Llama-3.1-8B (few-shot) 8030 128 131072 True - 2,996 ± 817 / 284 ± 96 + 2,986 ± 823 / 276 ± 94 2.34 - 40.63 ± 2.51 / 38.86 ± 1.98 - 50.44 ± 2.55 / 62.82 ± 2.61 - 10.79 ± 1.67 / 40.21 ± 2.83 - 34.67 ± 1.48 / 61.01 ± 1.78 - 69.06 ± 1.32 / 29.21 ± 3.24 - 31.77 ± 0.71 / 48.72 ± 0.47 - 41.12 ± 1.38 / 53.80 ± 1.47 - 13.2.0 - 13.2.0 - 13.2.0 - 13.2.0 - 13.2.0 - 13.2.0 - 13.2.0 + 59.45 ± 1.64 / 46.60 ± 2.02 + 53.39 ± 5.35 / 65.74 ± 5.74 + 23.87 ± 5.74 / 57.17 ± 6.01 + 27.14 ± 2.19 / 54.63 ± 2.74 + 64.01 ± 1.18 / 17.85 ± 1.99 + 36.56 ± 0.73 / 52.11 ± 0.56 + 30.05 ± 2.57 / 46.82 ± 2.11 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 occiglot/occiglot-7b-eu5 (few-shot) @@ -1466,6 +1443,29 @@ title: German NLG 🇩🇪 12.3.2 12.3.2 + + neuralmagic/Sparse-Llama-3.1-8B-2of4 (few-shot) + 8030 + 128 + 131072 + True + 2,996 ± 817 / 284 ± 96 + 2.35 + 40.63 ± 2.51 / 38.86 ± 1.98 + 50.44 ± 2.55 / 62.82 ± 2.61 + 10.79 ± 1.67 / 40.21 ± 2.83 + 34.67 ± 1.48 / 61.01 ± 1.78 + 69.06 ± 1.32 / 29.21 ± 3.24 + 31.77 ± 0.71 / 48.72 ± 0.47 + 41.12 ± 1.38 / 53.80 ± 1.47 + 13.2.0 + 13.2.0 + 13.2.0 + 13.2.0 + 13.2.0 + 13.2.0 + 13.2.0 + claude-3-5-sonnet-20241022 (zero-shot, val) unknown @@ -1473,7 +1473,7 @@ title: German NLG 🇩🇪 200000 True 193 ± 87 / 55 ± 19 - 2.35 + 2.36 61.83 ± 1.50 / 46.40 ± 1.54 61.59 ± 4.13 / 73.65 ± 2.62 46.40 ± 3.16 / 69.51 ± 1.58 @@ -1489,29 +1489,6 @@ title: German NLG 🇩🇪 14.0.3 14.0.3 - - NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot) - 46998 - 68 - 32768 - True - 9,015 ± 2,966 / 1,121 ± 510 - 2.36 - 56.14 ± 1.69 / 44.49 ± 3.25 - 53.33 ± 4.66 / 64.05 ± 6.18 - 29.49 ± 6.46 / 59.01 ± 6.80 - 18.49 ± 4.03 / 55.32 ± 2.95 - 64.84 ± 1.06 / 21.23 ± 1.72 - 44.31 ± 0.88 / 57.84 ± 0.67 - 30.72 ± 4.07 / 46.97 ± 3.37 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - meta-llama/Llama-2-13b-chat-hf (few-shot) 13016 @@ -1535,6 +1512,29 @@ title: German NLG 🇩🇪 12.10.4 12.10.4 + + NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot) + 46998 + 68 + 32768 + True + 9,015 ± 2,966 / 1,121 ± 510 + 2.37 + 56.14 ± 1.69 / 44.49 ± 3.25 + 53.33 ± 4.66 / 64.05 ± 6.18 + 29.49 ± 6.46 / 59.01 ± 6.80 + 18.49 ± 4.03 / 55.32 ± 2.95 + 64.84 ± 1.06 / 21.23 ± 1.72 + 44.31 ± 0.88 / 57.84 ± 0.67 + 30.72 ± 4.07 / 46.97 ± 3.37 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + ibm-granite/granite-3.0-8b-base (few-shot) 8171 @@ -1542,7 +1542,7 @@ title: German NLG 🇩🇪 4096 True 2,515 ± 625 / 476 ± 159 - 2.37 + 2.38 50.43 ± 1.80 / 41.26 ± 3.08 57.84 ± 3.59 / 71.20 ± 2.77 22.58 ± 5.59 / 53.17 ± 6.23 @@ -1558,29 +1558,6 @@ title: German NLG 🇩🇪 14.1.2 14.1.2 - - NorwAI/NorwAI-Mixtral-8x7B (few-shot) - 46998 - 68 - 32768 - True - 2,368 ± 793 / 317 ± 108 - 2.45 - 57.26 ± 1.41 / 42.54 ± 2.77 - 54.57 ± 4.98 / 68.40 ± 4.08 - 26.52 ± 4.63 / 59.83 ± 5.12 - 19.96 ± 1.50 / 43.10 ± 2.18 - 61.10 ± 0.91 / 14.12 ± 1.59 - 43.30 ± 1.06 / 57.22 ± 0.87 - 23.82 ± 3.21 / 41.16 ± 2.65 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - seedboxai/KafkaLM-13B-German-V0.1-DPO (few-shot) 13016 @@ -1604,6 +1581,29 @@ title: German NLG 🇩🇪 12.10.0 12.10.0 + + NorwAI/NorwAI-Mixtral-8x7B (few-shot) + 46998 + 68 + 32768 + True + 2,368 ± 793 / 317 ± 108 + 2.46 + 57.26 ± 1.41 / 42.54 ± 2.77 + 54.57 ± 4.98 / 68.40 ± 4.08 + 26.52 ± 4.63 / 59.83 ± 5.12 + 19.96 ± 1.50 / 43.10 ± 2.18 + 61.10 ± 0.91 / 14.12 ± 1.59 + 43.30 ± 1.06 / 57.22 ± 0.87 + 23.82 ± 3.21 / 41.16 ± 2.65 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + mistralai/Mistral-7B-Instruct-v0.2 (few-shot) 7242 @@ -1657,7 +1657,7 @@ title: German NLG 🇩🇪 8192 True 3,161 ± 676 / 1,247 ± 481 - 2.48 + 2.49 60.69 ± 1.19 / 45.82 ± 1.42 53.77 ± 2.21 / 66.24 ± 2.25 38.53 ± 2.04 / 65.65 ± 2.18 @@ -1726,7 +1726,7 @@ title: German NLG 🇩🇪 32768 False 2,374 ± 432 / 744 ± 230 - 2.54 + 2.55 52.17 ± 2.87 / 40.26 ± 2.43 51.92 ± 3.19 / 67.12 ± 2.11 29.06 ± 5.04 / 62.77 ± 2.22 @@ -1788,29 +1788,6 @@ title: German NLG 🇩🇪 14.1.2 14.1.2 - - meta-llama/Llama-3.2-3B (few-shot) - 3213 - 128 - 131104 - True - 3,713 ± 877 / 836 ± 267 - 2.56 - 49.85 ± 1.96 / 41.04 ± 2.44 - 54.65 ± 1.58 / 65.94 ± 2.42 - 3.17 ± 5.20 / 36.54 ± 5.71 - 29.37 ± 3.48 / 58.09 ± 4.16 - 67.96 ± 1.38 / 25.30 ± 2.54 - 29.56 ± 0.63 / 46.21 ± 0.55 - 16.15 ± 1.34 / 35.40 ± 1.27 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 01-ai/Yi-1.5-6B (few-shot) 6061 @@ -1880,6 +1857,29 @@ title: German NLG 🇩🇪 13.0.0 13.0.0 + + meta-llama/Llama-3.2-3B (few-shot) + 3213 + 128 + 131104 + True + 3,713 ± 877 / 836 ± 267 + 2.58 + 49.85 ± 1.96 / 41.04 ± 2.44 + 54.65 ± 1.58 / 65.94 ± 2.42 + 3.17 ± 5.20 / 36.54 ± 5.71 + 29.37 ± 3.48 / 58.09 ± 4.16 + 67.96 ± 1.38 / 25.30 ± 2.54 + 29.56 ± 0.63 / 46.21 ± 0.55 + 16.15 ± 1.34 / 35.40 ± 1.27 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + ibm-granite/granite-8b-code-base-4k (few-shot) 8055 @@ -2041,6 +2041,29 @@ title: German NLG 🇩🇪 12.10.0 12.10.0 + + microsoft/Phi-3-mini-4k-instruct (few-shot) + 3821 + 32 + 2047 + True + 8,681 ± 1,650 / 2,177 ± 717 + 2.71 + 59.14 ± 1.40 / 43.16 ± 2.75 + 51.72 ± 2.44 / 65.74 ± 2.10 + 33.78 ± 2.17 / 65.03 ± 1.74 + 24.82 ± 1.35 / 53.54 ± 2.38 + 41.45 ± 11.63 / 10.13 ± 2.62 + 38.78 ± 0.85 / 53.77 ± 0.65 + 22.32 ± 9.88 / 39.90 ± 7.49 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + Qwen/Qwen1.5-4B-Chat (few-shot) 3950 @@ -2110,29 +2133,6 @@ title: German NLG 🇩🇪 13.0.0 13.0.0 - - microsoft/Phi-3-mini-4k-instruct (few-shot) - 3821 - 32 - 2047 - True - 3,194 ± 687 / 650 ± 216 - 2.74 - 59.33 ± 1.30 / 43.34 ± 2.86 - 51.50 ± 2.48 / 65.62 ± 2.13 - 33.30 ± 2.11 / 64.80 ± 1.68 - 24.83 ± 1.38 / 53.63 ± 2.42 - 41.45 ± 11.62 / 10.13 ± 2.61 - 38.82 ± 0.82 / 53.79 ± 0.61 - 22.60 ± 9.95 / 40.08 ± 7.53 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - ibm-granite/granite-7b-instruct (few-shot) 6738 @@ -2140,7 +2140,7 @@ title: German NLG 🇩🇪 4096 True 3,136 ± 558 / 942 ± 290 - 2.82 + 2.83 39.38 ± 0.94 / 30.79 ± 1.08 49.83 ± 1.61 / 66.00 ± 1.36 4.03 ± 1.54 / 40.66 ± 3.14 @@ -2202,29 +2202,6 @@ title: German NLG 🇩🇪 14.0.3 14.0.3 - - TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot) - 7800 - 100 - 4096 - True - 6,197 ± 1,118 / 1,730 ± 577 - 2.89 - 39.21 ± 2.29 / 36.08 ± 2.06 - 58.36 ± 1.80 / 71.98 ± 1.17 - 7.03 ± 3.09 / 50.18 ± 3.74 - 27.02 ± 3.09 / 51.94 ± 4.17 - 68.59 ± 0.76 / 25.49 ± 2.17 - 3.52 ± 0.84 / 26.38 ± 0.62 - 0.06 ± 0.82 / 24.56 ± 0.78 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - google/gemma-2-2b (few-shot) 2614 @@ -2271,6 +2248,29 @@ title: German NLG 🇩🇪 12.10.0 12.10.0 + + TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot) + 7800 + 100 + 4096 + True + 6,197 ± 1,118 / 1,730 ± 577 + 2.92 + 39.21 ± 2.29 / 36.08 ± 2.06 + 58.36 ± 1.80 / 71.98 ± 1.17 + 7.03 ± 3.09 / 50.18 ± 3.74 + 27.02 ± 3.09 / 51.94 ± 4.17 + 68.59 ± 0.76 / 25.49 ± 2.17 + 3.52 ± 0.84 / 26.38 ± 0.62 + 0.06 ± 0.82 / 24.56 ± 0.78 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + ibm-granite/granite-3.0-3b-a800m-base (few-shot) 3374 @@ -2278,7 +2278,7 @@ title: German NLG 🇩🇪 4096 True 10,504 ± 3,028 / 1,678 ± 559 - 2.91 + 2.92 40.61 ± 2.18 / 28.49 ± 2.11 31.86 ± 3.60 / 42.96 ± 3.98 5.36 ± 3.96 / 37.83 ± 4.03 @@ -2301,7 +2301,7 @@ title: German NLG 🇩🇪 32768 True 3,248 ± 739 / 761 ± 252 - 2.93 + 2.95 31.52 ± 2.96 / 29.20 ± 1.88 39.91 ± 3.29 / 53.66 ± 3.20 3.27 ± 2.51 / 34.30 ± 1.29 @@ -2324,7 +2324,7 @@ title: German NLG 🇩🇪 4125 True 6,435 ± 1,316 / 1,632 ± 549 - 2.95 + 2.96 0.00 ± 0.00 / 0.00 ± 0.00 52.66 ± 2.45 / 67.63 ± 1.87 7.33 ± 2.53 / 37.69 ± 2.51 @@ -2370,7 +2370,7 @@ title: German NLG 🇩🇪 4096 True 7,259 ± 2,120 / 1,240 ± 432 - 3.08 + 3.09 34.81 ± 2.51 / 30.33 ± 2.95 51.01 ± 2.18 / 65.35 ± 2.23 0.00 ± 0.00 / 33.34 ± 0.31 @@ -2508,7 +2508,7 @@ title: German NLG 🇩🇪 4096 True 15,009 ± 4,072 / 2,702 ± 878 - 3.29 + 3.30 28.49 ± 2.30 / 24.73 ± 1.76 43.18 ± 2.89 / 58.22 ± 3.50 2.92 ± 1.40 / 44.65 ± 3.51 @@ -2524,29 +2524,6 @@ title: German NLG 🇩🇪 13.1.0 13.1.0 - - MaLA-LM/emma-500-llama2-7b (few-shot) - 6738 - 32 - 4096 - True - 6,275 ± 1,193 / 1,755 ± 578 - 3.31 - 32.33 ± 2.48 / 30.20 ± 1.92 - 26.39 ± 5.23 / 36.06 ± 6.62 - 1.44 ± 1.38 / 33.60 ± 0.42 - 28.15 ± 5.57 / 54.13 ± 6.75 - 58.62 ± 3.80 / 13.54 ± 2.20 - 14.94 ± 1.09 / 35.44 ± 0.89 - 8.70 ± 0.96 / 31.09 ± 0.58 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - google/gemma-2b (few-shot) 2506 @@ -2570,6 +2547,29 @@ title: German NLG 🇩🇪 12.1.0 12.1.0 + + MaLA-LM/emma-500-llama2-7b (few-shot) + 6738 + 32 + 4096 + True + 6,275 ± 1,193 / 1,755 ± 578 + 3.33 + 32.33 ± 2.48 / 30.20 ± 1.92 + 26.39 ± 5.23 / 36.06 ± 6.62 + 1.44 ± 1.38 / 33.60 ± 0.42 + 28.15 ± 5.57 / 54.13 ± 6.75 + 58.62 ± 3.80 / 13.54 ± 2.20 + 14.94 ± 1.09 / 35.44 ± 0.89 + 8.70 ± 0.96 / 31.09 ± 0.58 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + google/gemma-2b-it (few-shot) 2506 @@ -2577,7 +2577,7 @@ title: German NLG 🇩🇪 8192 False 6,471 ± 1,142 / 1,961 ± 584 - 3.34 + 3.33 36.62 ± 1.56 / 28.22 ± 1.66 28.54 ± 2.70 / 50.10 ± 1.65 1.15 ± 1.66 / 38.16 ± 2.78 @@ -2669,7 +2669,7 @@ title: German NLG 🇩🇪 4096 True 7,964 ± 2,255 / 1,299 ± 433 - 3.43 + 3.44 36.56 ± 2.08 / 25.71 ± 1.73 40.65 ± 2.11 / 58.49 ± 1.94 4.68 ± 1.88 / 45.72 ± 3.59 @@ -2685,6 +2685,29 @@ title: German NLG 🇩🇪 13.2.0 13.2.0 + + meta-llama/Llama-3.2-1B (few-shot) + 1236 + 128 + 131104 + True + 7,577 ± 1,884 / 1,555 ± 492 + 3.45 + 24.79 ± 6.48 / 22.92 ± 5.74 + 47.65 ± 2.85 / 63.11 ± 2.17 + 2.39 ± 1.46 / 39.92 ± 4.38 + 13.39 ± 4.13 / 33.76 ± 5.50 + 61.07 ± 4.19 / 16.72 ± 4.33 + 3.94 ± 1.35 / 27.78 ± 1.07 + 0.90 ± 0.98 / 25.40 ± 0.65 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + seedboxai/KafkaLM-70B-German-V0.1 (few-shot, val) 68977 @@ -2692,7 +2715,7 @@ title: German NLG 🇩🇪 4125 True 294 ± 21 / 168 ± 42 - 3.43 + 3.45 59.23 ± 2.95 / 52.06 ± 3.03 -5.01 ± 4.03 / 18.00 ± 0.71 3.19 ± 4.99 / 37.63 ± 3.16 @@ -2715,7 +2738,7 @@ title: German NLG 🇩🇪 8192 True 16,249 ± 3,690 / 3,689 ± 1,226 - 3.45 + 3.46 28.67 ± 3.31 / 25.27 ± 2.68 19.69 ± 2.50 / 29.00 ± 2.22 5.07 ± 0.89 / 47.60 ± 2.39 @@ -2738,7 +2761,7 @@ title: German NLG 🇩🇪 4128 True 840 ± 79 / 400 ± 124 - 3.45 + 3.46 34.53 ± 1.24 / 29.89 ± 1.96 42.90 ± 2.66 / 56.64 ± 4.71 1.51 ± 1.64 / 43.36 ± 4.05 @@ -2754,29 +2777,6 @@ title: German NLG 🇩🇪 12.5.2 12.5.2 - - meta-llama/Llama-3.2-1B (few-shot) - 1236 - 128 - 131104 - True - 7,577 ± 1,884 / 1,555 ± 492 - 3.45 - 24.79 ± 6.48 / 22.92 ± 5.74 - 47.65 ± 2.85 / 63.11 ± 2.17 - 2.39 ± 1.46 / 39.92 ± 4.38 - 13.39 ± 4.13 / 33.76 ± 5.50 - 61.07 ± 4.19 / 16.72 ± 4.33 - 3.94 ± 1.35 / 27.78 ± 1.07 - 0.90 ± 0.98 / 25.40 ± 0.65 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - ibm-granite/granite-3b-code-base-2k (few-shot) 3483 @@ -2784,7 +2784,7 @@ title: German NLG 🇩🇪 2048 True 2,732 ± 868 / 662 ± 238 - 3.48 + 3.49 49.38 ± 2.20 / 42.66 ± 3.24 41.72 ± 4.07 / 60.45 ± 3.07 7.67 ± 1.52 / 46.66 ± 3.23 @@ -2807,7 +2807,7 @@ title: German NLG 🇩🇪 4096 True 4,438 ± 1,128 / 1,028 ± 346 - 3.49 + 3.51 25.69 ± 1.43 / 25.95 ± 1.23 33.48 ± 2.83 / 47.14 ± 4.43 3.73 ± 1.14 / 44.43 ± 4.17 @@ -2824,27 +2824,27 @@ title: German NLG 🇩🇪 12.10.4 - RuterNorway/Llama-2-7b-chat-norwegian (few-shot) - 6738 - 32 - 4096 - False - 10,890 ± 2,686 / 2,186 ± 750 + AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot) + 20918 + 64 + 2077 + True + 1,831 ± 587 / 268 ± 90 3.52 - 27.22 ± 1.38 / 24.48 ± 1.76 - 33.61 ± 5.06 / 49.68 ± 5.74 - 0.45 ± 0.91 / 35.24 ± 3.71 - 20.44 ± 3.29 / 45.50 ± 3.33 - 60.50 ± 0.63 / 13.71 ± 0.75 - -0.10 ± 0.93 / 25.16 ± 1.17 - -1.00 ± 1.03 / 24.94 ± 1.00 - 9.3.1 - 12.10.0 - 9.3.1 - 12.5.2 - 12.4.0 - 9.3.1 - 9.3.1 + 24.35 ± 1.72 / 21.90 ± 0.85 + 43.35 ± 3.81 / 60.49 ± 3.18 + 2.38 ± 1.21 / 37.27 ± 1.09 + 15.56 ± 2.24 / 34.68 ± 3.15 + 57.47 ± 0.69 / 14.10 ± 0.73 + 4.67 ± 0.91 / 27.88 ± 0.81 + 1.77 ± 1.29 / 25.47 ± 0.78 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 Qwen/Qwen1.5-1.8B (few-shot) @@ -2853,7 +2853,7 @@ title: German NLG 🇩🇪 32768 True 5,666 ± 1,328 / 1,256 ± 408 - 3.53 + 3.52 9.23 ± 4.86 / 10.43 ± 3.83 38.30 ± 2.90 / 56.94 ± 2.83 0.39 ± 1.17 / 33.47 ± 0.34 @@ -2870,27 +2870,27 @@ title: German NLG 🇩🇪 12.1.0 - AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot) - 20918 - 64 - 2077 - True - 1,831 ± 587 / 268 ± 90 - 3.54 - 24.35 ± 1.72 / 21.90 ± 0.85 - 43.35 ± 3.81 / 60.49 ± 3.18 - 2.38 ± 1.21 / 37.27 ± 1.09 - 15.56 ± 2.24 / 34.68 ± 3.15 - 57.47 ± 0.69 / 14.10 ± 0.73 - 4.67 ± 0.91 / 27.88 ± 0.81 - 1.77 ± 1.29 / 25.47 ± 0.78 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 + RuterNorway/Llama-2-7b-chat-norwegian (few-shot) + 6738 + 32 + 4096 + False + 10,890 ± 2,686 / 2,186 ± 750 + 3.52 + 27.22 ± 1.38 / 24.48 ± 1.76 + 33.61 ± 5.06 / 49.68 ± 5.74 + 0.45 ± 0.91 / 35.24 ± 3.71 + 20.44 ± 3.29 / 45.50 ± 3.33 + 60.50 ± 0.63 / 13.71 ± 0.75 + -0.10 ± 0.93 / 25.16 ± 1.17 + -1.00 ± 1.03 / 24.94 ± 1.00 + 9.3.1 + 12.10.0 + 9.3.1 + 12.5.2 + 12.4.0 + 9.3.1 + 9.3.1 NorwAI/NorwAI-Mistral-7B (few-shot) @@ -2915,6 +2915,29 @@ title: German NLG 🇩🇪 12.10.4 12.10.4 + + tiiuae/Falcon3-1B-Instruct (few-shot) + 1669 + 131 + 8192 + True + 9,270 ± 2,690 / 1,434 ± 437 + 3.60 + 32.54 ± 1.23 / 30.63 ± 1.12 + 22.27 ± 4.97 / 36.09 ± 3.66 + 7.18 ± 1.13 / 51.77 ± 1.56 + 16.72 ± 0.88 / 38.07 ± 0.76 + 58.08 ± 0.37 / 10.60 ± 0.37 + 6.57 ± 1.03 / 29.10 ± 0.93 + 5.16 ± 1.64 / 28.12 ± 1.34 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + AI-Sweden-Models/gpt-sw3-20b (few-shot) 20918 @@ -3007,29 +3030,6 @@ title: German NLG 🇩🇪 12.1.0 12.1.0 - - Qwen/Qwen1.5-0.5B-Chat (few-shot) - 620 - 152 - 32768 - False - 11,740 ± 3,000 / 2,209 ± 721 - 3.90 - 24.67 ± 0.99 / 23.98 ± 0.73 - 9.31 ± 2.97 / 21.50 ± 2.70 - 1.11 ± 1.69 / 37.88 ± 4.05 - 13.60 ± 1.60 / 29.10 ± 1.94 - 56.42 ± 7.64 / 11.68 ± 1.75 - 2.75 ± 0.91 / 27.17 ± 0.72 - 3.41 ± 1.30 / 27.45 ± 0.79 - 12.5.2 - 11.0.0 - 12.1.0 - 12.5.0 - 12.5.0 - 12.1.0 - 12.1.0 - PleIAs/Pleias-3b-Preview (few-shot) 3212 @@ -3053,6 +3053,29 @@ title: German NLG 🇩🇪 14.0.4 14.0.4 + + Qwen/Qwen1.5-0.5B-Chat (few-shot) + 620 + 152 + 32768 + False + 11,740 ± 3,000 / 2,209 ± 721 + 3.91 + 24.67 ± 0.99 / 23.98 ± 0.73 + 9.31 ± 2.97 / 21.50 ± 2.70 + 1.11 ± 1.69 / 37.88 ± 4.05 + 13.60 ± 1.60 / 29.10 ± 1.94 + 56.42 ± 7.64 / 11.68 ± 1.75 + 2.75 ± 0.91 / 27.17 ± 0.72 + 3.41 ± 1.30 / 27.45 ± 0.79 + 12.5.2 + 11.0.0 + 12.1.0 + 12.5.0 + 12.5.0 + 12.1.0 + 12.1.0 + PleIAs/Pleias-1.2b-Preview (few-shot) 1195 @@ -3129,7 +3152,7 @@ title: German NLG 🇩🇪 32800 True 2,722 ± 495 / 766 ± 250 - 3.98 + 3.99 21.96 ± 1.53 / 18.48 ± 1.53 18.66 ± 3.01 / 35.11 ± 2.93 0.16 ± 1.78 / 37.84 ± 2.92 @@ -3175,7 +3198,7 @@ title: German NLG 🇩🇪 8192 True 21,777 ± 6,115 / 3,617 ± 1,211 - 4.19 + 4.21 18.77 ± 3.96 / 18.65 ± 3.26 12.59 ± 3.85 / 22.64 ± 2.25 1.64 ± 1.30 / 34.84 ± 2.12 @@ -3198,7 +3221,7 @@ title: German NLG 🇩🇪 2048 True 2,519 ± 841 / 323 ± 104 - 4.19 + 4.21 24.32 ± 2.91 / 22.08 ± 1.97 15.58 ± 3.73 / 29.45 ± 4.32 1.25 ± 1.70 / 37.57 ± 3.82 @@ -3221,7 +3244,7 @@ title: German NLG 🇩🇪 2048 True 10,242 ± 3,432 / 1,335 ± 484 - 4.26 + 4.27 25.30 ± 2.32 / 24.21 ± 2.13 13.90 ± 5.43 / 30.42 ± 6.17 -0.25 ± 1.90 / 39.25 ± 4.50 @@ -3267,7 +3290,7 @@ title: German NLG 🇩🇪 8192 True 26,346 ± 7,812 / 4,082 ± 1,372 - 4.30 + 4.31 16.89 ± 1.62 / 16.63 ± 1.80 2.74 ± 3.46 / 23.30 ± 3.11 -0.34 ± 1.06 / 39.21 ± 4.13 @@ -3283,29 +3306,6 @@ title: German NLG 🇩🇪 13.1.0 13.1.0 - - RJuro/kanelsnegl-v0.1 (few-shot) - 7242 - 32 - 541 - True - 5,847 ± 1,029 / 1,640 ± 525 - 4.38 - 0.00 ± 0.00 / 0.00 ± 0.00 - 0.00 ± 0.00 / 17.05 ± 0.35 - 0.00 ± 0.00 / 33.34 ± 0.31 - 0.00 ± 0.00 / 14.17 ± 0.79 - 59.26 ± 0.09 / 9.32 ± 0.09 - 1.16 ± 0.57 / 22.46 ± 0.56 - 0.31 ± 0.50 / 24.18 ± 0.53 - 12.10.0 - 12.10.0 - 12.10.0 - 12.10.0 - 12.10.0 - 12.10.0 - 12.10.0 - allenai/OLMo-1B (few-shot) 1177 @@ -3352,6 +3352,29 @@ title: German NLG 🇩🇪 13.1.0 13.1.0 + + RJuro/kanelsnegl-v0.1 (few-shot) + 7242 + 32 + 541 + True + 5,847 ± 1,029 / 1,640 ± 525 + 4.40 + 0.00 ± 0.00 / 0.00 ± 0.00 + 0.00 ± 0.00 / 17.05 ± 0.35 + 0.00 ± 0.00 / 33.34 ± 0.31 + 0.00 ± 0.00 / 14.17 ± 0.79 + 59.26 ± 0.09 / 9.32 ± 0.09 + 1.16 ± 0.57 / 22.46 ± 0.56 + 0.31 ± 0.50 / 24.18 ± 0.53 + 12.10.0 + 12.10.0 + 12.10.0 + 12.10.0 + 12.10.0 + 12.10.0 + 12.10.0 + NorwAI/NorwAI-Mistral-7B-pretrain (few-shot) 7537 @@ -3359,7 +3382,7 @@ title: German NLG 🇩🇪 4096 True 3,024 ± 496 / 909 ± 301 - 4.73 + 4.75 5.80 ± 1.56 / 5.41 ± 1.56 4.45 ± 1.73 / 29.26 ± 3.66 -0.48 ± 1.33 / 43.09 ± 3.56 @@ -3382,7 +3405,7 @@ title: German NLG 🇩🇪 8192 True 7,692 ± 1,423 / 1,960 ± 644 - 4.78 + 4.79 0.00 ± 0.00 / 0.00 ± 0.00 0.00 ± 0.00 / 17.05 ± 0.35 0.00 ± 0.00 / 33.34 ± 0.31 @@ -3405,7 +3428,7 @@ title: German NLG 🇩🇪 2077 True 11,734 ± 3,124 / 2,174 ± 720 - 4.94 + 4.96 0.00 ± 0.00 / 0.00 ± 0.00 0.19 ± 1.24 / 17.20 ± 1.22 -0.12 ± 0.91 / 36.65 ± 3.92 diff --git a/german-nlu.csv b/german-nlu.csv index 46836a04..4501b86b 100644 --- a/german-nlu.csv +++ b/german-nlu.csv @@ -17,62 +17,62 @@ gwlms/teams-base-dewiki-v1-discriminator,111,32,512,True,False,30608,1.73,79.59, Qwen/Qwen2.5-72B-Instruct (few-shot),72706,152,32768,True,False,1219,1.74,68.94,58.78,52.66,27.62 upstage/SOLAR-10.7B-v1.0 (few-shot),10732,32,4096,True,False,3780,1.74,68.11,59.79,35.45,37.27 microsoft/mdeberta-v3-base,278,251,512,True,False,20637,1.79,77.42,50.9,59.38,20.28 -google/gemma-2-27b-it (few-shot),27227,256,8224,True,False,1516,1.85,64.13,60.28,46.69,28.54 -"meta-llama/Llama-2-70b-hf (few-shot, val)",68977,32,4125,True,False,1892,1.85,63.71,58.17,36.33,36.06 dbmdz/bert-base-german-cased,111,31,512,True,False,37150,1.86,78.54,53.91,59.23,13.71 -"gpt-4-1106-preview (zero-shot, val)",-1,100,8191,True,False,436,1.86,57.47,58.67,54.55,27.02 +google/gemma-2-27b-it (few-shot),27227,256,8224,True,False,1516,1.86,64.13,60.28,46.69,28.54 gwlms/bert-base-dewiki-v1,111,32,512,True,False,30650,1.86,80.53,45.61,67.09,16.61 mistralai/Mistral-Nemo-Instruct-2407 (few-shot),12248,131,1024032,True,False,7095,1.86,66.27,57.7,35.54,34.45 +"gpt-4-1106-preview (zero-shot, val)",-1,100,8191,True,False,436,1.87,57.47,58.67,54.55,27.02 "gpt-4o-2024-05-13 (few-shot, val)",-1,200,127903,True,False,916,1.87,69.99,54.82,43.66,30.06 +"meta-llama/Llama-2-70b-hf (few-shot, val)",68977,32,4125,True,False,1892,1.87,63.71,58.17,36.33,36.06 dbmdz/bert-base-german-uncased,111,31,512,True,False,36020,1.89,77.55,56.48,63.49,12.39 CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,1.9,68.62,56.2,43.36,28.5 ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot),28411,256,4096,True,False,3633,1.9,73.31,58.02,45.12,24.67 google/gemma-2-9b (few-shot),9242,256,8224,True,False,2038,1.92,47.39,62.89,37.22,39.11 meta-llama/Llama-3.3-70B-Instruct (few-shot),70554,128,131072,True,False,1353,1.92,70.47,60.55,50.13,20.42 -"152334H/miqu-1-70b-sf (few-shot, val)",68977,32,32793,True,False,2126,1.93,65.19,59.8,41.86,25.51 -deepset/gbert-base,111,31,512,True,False,37268,1.93,80.09,59.8,47.48,14.39 "claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,1.94,61.83,61.59,46.4,23.77 +deepset/gbert-base,111,31,512,True,False,37268,1.94,80.09,59.8,47.48,14.39 "meta-llama/Meta-Llama-3-70B-Instruct (few-shot, val)",70554,128,8221,True,False,1673,1.94,75.2,54.38,36.59,26.9 skole-gpt-mixtral (few-shot),-1,32,32768,False,False,3583,1.94,57.82,59.45,36.75,33.55 +"152334H/miqu-1-70b-sf (few-shot, val)",68977,32,32793,True,False,2126,1.96,65.19,59.8,41.86,25.51 intfloat/multilingual-e5-base,278,250,512,True,False,14965,1.98,74.79,63.29,45.32,16.42 "VAGOsolutions/SauerkrautLM-7b-HerO (few-shot, val)",7242,32,32768,False,True,2477,1.99,59.7,60.22,35.99,29.68 google/gemma-2-9b-it (few-shot),9242,256,8224,True,False,2062,2.0,50.35,58.6,45.78,30.46 "gpt-3.5-turbo-0613 (few-shot, val)",-1,100,4095,True,False,921,2.02,61.5,55.5,38.96,30.2 nvidia/Llama-3.1-Nemotron-70B-Instruct-HF (few-shot),70554,128,131072,True,False,1208,2.02,61.98,57.07,45.61,24.89 VAGOsolutions/FC-SauerkrautLM-7b-beta (few-shot),-1,32,8192,False,False,2160,2.03,56.7,53.39,35.64,34.22 -"gpt-4o-mini-2024-07-18 (zero-shot, val)",-1,200,8191,True,False,908,2.03,59.19,52.66,46.66,26.02 -"gpt-4o-2024-05-13 (zero-shot, val)",-1,200,8191,True,False,637,2.05,67.18,50.12,44.98,27.01 -meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1007,2.06,68.18,58.33,29.12,28.68 +"gpt-4o-2024-05-13 (zero-shot, val)",-1,200,8191,True,False,637,2.06,67.18,50.12,44.98,27.01 +"gpt-4o-mini-2024-07-18 (zero-shot, val)",-1,200,8191,True,False,908,2.06,59.19,52.66,46.66,26.02 +"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,2.06,65.51,55.16,44.6,21.87 +meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1483,2.06,68.18,58.33,29.12,28.68 facebook/xlm-v-base,778,902,512,True,False,25396,2.07,76.45,58.25,34.43,21.08 mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,2.07,59.1,55.3,37.84,31.71 -"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,2.08,65.51,55.16,44.6,21.87 -meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131104,True,False,1005,2.08,67.61,58.07,28.25,28.79 google-bert/bert-base-multilingual-uncased,168,106,512,True,False,13993,2.12,77.5,43.25,51.74,17.83 -meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1335,2.13,56.0,56.4,22.01,35.39 -occiglot/occiglot-7b-de-en-instruct (few-shot),7242,32,32768,False,False,1584,2.13,55.76,55.91,22.47,35.95 +meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1477,2.14,56.0,56.4,22.01,35.39 +occiglot/occiglot-7b-de-en-instruct (few-shot),7242,32,32768,False,False,1584,2.14,55.76,55.91,22.47,35.95 VAGOsolutions/SauerkrautLM-7b-LaserChat (few-shot),7242,32,4096,False,False,4413,2.15,62.37,49.59,35.57,31.74 -"cstr/Spaetzle-v8-7b (few-shot, val)",7242,32,32768,False,True,5980,2.15,58.9,61.34,31.58,24.91 -"meta-llama/Llama-2-70b-chat-hf (few-shot, val)",68977,32,4125,True,False,1979,2.15,62.39,53.16,31.81,28.99 +"cstr/Spaetzle-v8-7b (few-shot, val)",7242,32,32768,False,True,5980,2.16,58.9,61.34,31.58,24.91 +"meta-llama/Llama-2-70b-chat-hf (few-shot, val)",68977,32,4125,True,False,1979,2.16,62.39,53.16,31.81,28.99 google/gemma-7b (few-shot),8538,256,8096,True,False,1378,2.17,39.88,56.23,32.71,36.58 +mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,2.17,60.5,50.39,30.86,30.53 mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot),46703,32,32768,True,False,5535,2.17,58.07,52.18,41.45,26.87 "mlabonne/NeuralBeagle14-7B (few-shot, val)",7242,32,8192,False,True,2549,2.19,64.81,59.6,27.06,25.22 -mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,2.2,60.5,50.39,30.86,30.53 +meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1473,2.2,62.03,58.15,30.18,26.48 utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.2,40.85,56.53,24.74,38.2 -nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,2.22,59.82,50.22,30.43,30.22 -senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.22,64.38,54.44,26.03,25.68 +nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,2.23,59.82,50.22,30.43,30.22 CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,2.24,59.95,55.39,30.59,26.94 -ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,2.25,54.68,55.48,26.89,31.27 +senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.25,64.38,54.44,26.03,25.68 +ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,2.26,54.68,55.48,26.89,31.27 mistralai/Mistral-7B-v0.1 (few-shot),7242,32,32768,True,False,1446,2.26,55.37,54.27,23.12,31.89 +microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,2047,True,False,8681,2.28,59.14,51.72,33.78,24.82 ZurichNLP/unsup-simcse-xlm-roberta-base,277,250,512,True,False,34520,2.29,74.5,58.23,34.74,11.19 microsoft/Phi-3-mini-128k-instruct (few-shot),3821,32,130976,True,False,7312,2.3,55.36,53.05,23.08,31.55 mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,2.3,55.41,52.58,24.1,31.52 google-bert/bert-base-multilingual-cased,179,120,512,True,False,14083,2.31,79.11,42.97,42.66,14.98 -jhu-clsp/bernice,277,250,128,True,False,5567,2.31,72.25,62.0,48.1,0.0 -microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,2047,True,False,3194,2.31,59.33,51.5,33.3,24.83 -microsoft/xlm-align-base,277,250,512,True,False,14744,2.31,79.38,58.58,15.34,16.58 -meta-llama/Llama-3.1-8B (few-shot),8030,128,131072,True,False,2986,2.32,59.45,53.39,23.87,27.14 -"mlabonne/AlphaMonarch-7B (few-shot, val)",7242,32,8192,False,True,5340,2.32,63.36,59.8,22.98,20.96 +jhu-clsp/bernice,277,250,128,True,False,5567,2.32,72.25,62.0,48.1,0.0 +microsoft/xlm-align-base,277,250,512,True,False,14744,2.32,79.38,58.58,15.34,16.58 +meta-llama/Llama-3.1-8B (few-shot),8030,128,131072,True,False,2986,2.33,59.45,53.39,23.87,27.14 Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,2.34,61.37,51.38,35.58,19.92 +"mlabonne/AlphaMonarch-7B (few-shot, val)",7242,32,8192,False,True,5340,2.34,63.36,59.8,22.98,20.96 alpindale/Mistral-7B-v0.2-hf (few-shot),7242,32,32768,True,False,1841,2.35,55.32,52.49,24.34,31.54 ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4096,True,False,2515,2.36,50.43,57.84,22.58,27.96 meta-llama/Llama-2-13b-chat-hf (few-shot),13016,32,4096,True,False,2849,2.37,57.57,49.4,23.32,30.24 @@ -83,17 +83,17 @@ meta-llama/Llama-2-13b-hf (few-shot),13016,32,4000,True,False,2898,2.42,52.08,46 seedboxai/KafkaLM-7B-German-V0.1-DPO (few-shot),7242,32,4096,False,False,6070,2.42,48.92,52.57,20.74,32.87 seedboxai/KafkaLM-7B-German-V0.1 (few-shot),7242,32,32768,True,False,6065,2.42,48.35,52.51,20.36,32.88 cardiffnlp/twitter-xlm-roberta-base,277,250,512,True,False,34475,2.43,74.89,63.01,36.6,0.65 -occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,2.43,51.39,47.3,21.83,31.55 VAGOsolutions/SauerkrautLM-Gemma-7b (few-shot),8538,256,8221,False,False,1410,2.44,41.41,52.78,27.75,29.24 +occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,2.44,51.39,47.3,21.83,31.55 Twitter/twhin-bert-base,279,250,512,True,False,11514,2.45,70.35,55.03,43.87,2.81 timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,2.45,50.66,54.79,20.17,27.86 -NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,2.46,56.14,53.33,29.49,18.49 NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,2.46,57.26,54.57,26.52,19.96 +NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,2.47,56.14,53.33,29.49,18.49 RuterNorway/Llama-2-13b-chat-norwegian (few-shot),-1,32,4125,False,False,3254,2.47,57.02,49.75,19.8,27.86 -Twitter/twhin-bert-large,562,250,512,True,False,9707,2.48,74.36,53.52,22.26,11.68 -clips/mfaq,277,250,128,True,False,5591,2.49,76.68,59.51,32.54,1.53 +Twitter/twhin-bert-large,562,250,512,True,False,9707,2.49,74.36,53.52,22.26,11.68 ibm-granite/granite-8b-code-base-4k (few-shot),8055,49,4096,True,False,2313,2.49,59.07,49.75,14.71,29.45 seedboxai/KafkaLM-13B-German-V0.1-DPO (few-shot),13016,32,8221,False,False,789,2.49,52.45,49.73,20.72,30.18 +clips/mfaq,277,250,128,True,False,5591,2.5,76.68,59.51,32.54,1.53 CohereForAI/aya-23-8B (few-shot),8028,256,8192,False,False,2707,2.51,51.2,47.79,18.04,29.46 mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,2.53,55.1,47.69,24.14,23.93 mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot),341029,256,4096,True,False,1904,2.54,50.17,43.41,40.59,19.75 @@ -103,92 +103,93 @@ mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32797,False,False,634,2.62 01-ai/Yi-1.5-6B (few-shot),6061,64,4128,True,False,2867,2.63,48.2,47.12,12.39,30.5 DiscoResearch/DiscoLM_German_7b_v1 (few-shot),7242,32,32768,False,False,1972,2.63,42.39,48.67,8.72,36.12 Geotrend/distilbert-base-25lang-cased,109,85,512,True,False,26099,2.63,72.97,41.51,45.39,1.89 -meta-llama/Llama-3.2-3B (few-shot),3213,128,131104,True,False,3713,2.63,49.85,54.65,3.17,29.37 ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,2.65,47.31,48.28,14.08,28.37 -"mayflowergmbh/Wiedervereinigung-7b-dpo (few-shot, val)",7242,32,32768,False,True,2374,2.65,52.17,51.92,29.06,14.59 meta-llama/Llama-2-7b-hf (few-shot),6738,32,4125,True,False,930,2.65,43.02,50.21,15.79,28.57 +"mayflowergmbh/Wiedervereinigung-7b-dpo (few-shot, val)",7242,32,32768,False,True,2374,2.66,52.17,51.92,29.06,14.59 +meta-llama/Llama-3.2-3B (few-shot),3213,128,131104,True,False,3713,2.66,49.85,54.65,3.17,29.37 "claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,2.69,55.59,43.73,23.74,21.36 google/gemma-2-2b-it (few-shot),2614,256,8224,True,False,5374,2.69,37.31,46.23,23.26,28.01 -ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,2.7,54.45,43.62,15.24,26.0 -meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4125,False,False,2643,2.7,50.09,46.52,15.23,25.54 -NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,2.71,67.63,55.79,24.45,1.17 meta-llama/Llama-3.2-3B-Instruct (few-shot),3213,128,131104,False,False,10424,2.71,55.52,50.52,9.87,20.2 -TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot),7800,100,4096,True,False,6197,2.73,39.21,58.36,7.03,27.02 -sentence-transformers/stsb-xlm-r-multilingual,277,250,512,True,False,15040,2.74,67.47,52.85,29.59,0.73 +NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,2.72,67.63,55.79,24.45,1.17 +ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,2.72,54.45,43.62,15.24,26.0 +meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4125,False,False,2643,2.72,50.09,46.52,15.23,25.54 nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,2.75,60.69,53.77,38.53,0.0 sentence-transformers/paraphrase-xlm-r-multilingual-v1,278,250,512,True,False,20154,2.75,69.45,57.94,21.81,0.33 -AI-Sweden-Models/roberta-large-1160k,355,50,512,True,False,14014,2.78,68.93,46.81,3.39,18.62 -microsoft/infoxlm-base,277,250,512,True,False,34735,2.78,77.84,59.16,3.66,3.67 +sentence-transformers/stsb-xlm-r-multilingual,277,250,512,True,False,15040,2.75,67.47,52.85,29.59,0.73 +TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot),7800,100,4096,True,False,6197,2.76,39.21,58.36,7.03,27.02 Qwen/Qwen1.5-4B-Chat (few-shot),3950,152,32768,False,False,4347,2.79,42.08,41.52,12.78,29.35 +AI-Sweden-Models/roberta-large-1160k,355,50,512,True,False,14014,2.81,68.93,46.81,3.39,18.62 +microsoft/infoxlm-base,277,250,512,True,False,34735,2.82,77.84,59.16,3.66,3.67 AI-Sweden-Models/roberta-large-1350k,354,50,512,True,False,5744,2.83,67.24,45.84,2.28,18.17 -ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,2.84,39.38,49.83,4.03,30.2 ibm-granite/granite-3.0-2b-base (few-shot),2534,49,4128,True,False,10187,2.85,45.81,34.61,16.19,28.25 -allenai/OLMo-1.7-7B-hf (few-shot),6888,50,4096,True,False,3371,2.87,39.41,49.42,6.02,27.69 -sentence-transformers/paraphrase-multilingual-mpnet-base-v2,278,250,512,True,False,15100,2.87,65.28,60.29,16.83,0.11 +ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,2.85,39.38,49.83,4.03,30.2 +allenai/OLMo-1.7-7B-hf (few-shot),6888,50,4096,True,False,3371,2.86,39.41,49.42,6.02,27.69 +sentence-transformers/paraphrase-multilingual-mpnet-base-v2,278,250,512,True,False,15100,2.88,65.28,60.29,16.83,0.11 DiscoResearch/Llama3-German-8B (few-shot),8030,128,8192,True,False,2383,2.89,54.34,58.32,25.7,0.5 -stabilityai/stablelm-2-1_6b (few-shot),1645,100,4096,True,False,7259,2.97,34.81,51.01,0.0,25.4 +ibm-granite/granite-7b-base (few-shot),6738,32,2048,True,False,4405,3.01,37.68,46.0,0.83,26.65 +stabilityai/stablelm-2-1_6b (few-shot),1645,100,4096,True,False,7259,3.01,34.81,51.01,0.0,25.4 ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,3.02,49.16,35.17,9.79,22.48 -ibm-granite/granite-7b-base (few-shot),6738,32,2048,True,False,4405,3.02,37.68,46.0,0.83,26.65 google/gemma-2-2b (few-shot),2614,256,8224,True,False,5235,3.06,19.69,50.36,9.07,27.06 google/gemma-7b-it (few-shot),8538,256,8221,False,False,1792,3.08,54.2,15.43,17.49,28.68 dbmdz/bert-base-historic-multilingual-cased,111,32,512,True,False,20047,3.09,65.35,37.77,16.07,5.67 ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,3.11,49.38,41.72,7.67,13.7 -ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,3.12,40.61,31.86,5.36,25.99 -Qwen/Qwen1.5-4B (few-shot),3950,152,32768,True,False,3248,3.15,31.52,39.91,3.27,27.55 dbmdz/bert-medium-historic-multilingual-cased,42,32,512,True,False,24291,3.15,61.09,39.91,17.16,3.26 +ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,3.15,40.61,31.86,5.36,25.99 EuropeanParliament/EUBERT,94,66,512,True,False,20070,3.17,49.95,40.29,25.88,2.59 +Qwen/Qwen1.5-4B (few-shot),3950,152,32768,True,False,3248,3.18,31.52,39.91,3.27,27.55 01-ai/Yi-6B (few-shot),6061,64,4125,True,False,6435,3.2,0.0,52.66,7.33,30.05 meta-llama/Llama-3.2-1B-Instruct (few-shot),1236,128,131104,False,False,7436,3.2,42.51,38.26,5.48,19.43 -sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2,118,250,512,True,False,29201,3.21,60.54,54.99,0.52,0.8 -sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking,135,120,512,True,False,33753,3.25,63.78,49.69,0.74,0.02 +sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2,118,250,512,True,False,29201,3.2,60.54,54.99,0.52,0.8 sentence-transformers/quora-distilbert-multilingual,135,120,512,True,False,26458,3.25,64.12,49.66,0.58,0.05 -utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,3.27,28.49,43.18,2.92,23.26 +utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,3.26,28.49,43.18,2.92,23.26 +sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking,135,120,512,True,False,33753,3.27,63.78,49.69,0.74,0.02 ibm-granite/granite-3.0-1b-a400m-instruct (few-shot),1335,49,4096,True,False,7964,3.28,36.56,40.65,4.68,19.45 openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,3.33,39.39,23.6,7.68,25.3 -MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,3.35,32.33,26.39,1.44,28.15 -Rijgersberg/GEITje-7B (few-shot),7242,32,32797,True,False,5887,3.35,0.0,47.67,9.67,26.23 -LumiOpen/Viking-13B (few-shot),14030,131,4128,True,False,840,3.36,34.53,42.9,1.51,15.83 -meta-llama/Llama-3.2-1B (few-shot),1236,128,131104,True,False,7577,3.36,24.79,47.65,2.39,13.39 -VAGOsolutions/SauerkrautLM-Gemma-2b (few-shot),2506,256,8192,False,False,3607,3.39,12.21,44.84,2.02,24.59 -NorwAI/NorwAI-Mistral-7B (few-shot),7537,68,4096,True,False,3035,3.42,19.29,43.88,5.63,17.02 -google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,3.44,36.62,28.54,1.15,23.39 +meta-llama/Llama-3.2-1B (few-shot),1236,128,131104,True,False,7577,3.35,24.79,47.65,2.39,13.39 +MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,3.36,32.33,26.39,1.44,28.15 +LumiOpen/Viking-13B (few-shot),14030,131,4128,True,False,840,3.37,34.53,42.9,1.51,15.83 +Rijgersberg/GEITje-7B (few-shot),7242,32,32797,True,False,5887,3.37,0.0,47.67,9.67,26.23 +VAGOsolutions/SauerkrautLM-Gemma-2b (few-shot),2506,256,8192,False,False,3607,3.4,12.21,44.84,2.02,24.59 +NorwAI/NorwAI-Mistral-7B (few-shot),7537,68,4096,True,False,3035,3.43,19.29,43.88,5.63,17.02 +google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,3.43,36.62,28.54,1.15,23.39 +AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot),20918,64,2077,True,False,1831,3.47,24.35,43.35,2.38,15.56 HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,3.47,32.54,27.03,8.95,18.38 -sentence-transformers/distiluse-base-multilingual-cased-v2,135,120,512,True,False,33247,3.47,41.82,49.38,4.77,0.05 -sentence-transformers/distiluse-base-multilingual-cased,135,120,512,True,False,19206,3.47,40.2,48.71,5.53,0.06 -AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot),20918,64,2077,True,False,1831,3.48,24.35,43.35,2.38,15.56 -google/gemma-2b (few-shot),2506,256,8192,True,False,6087,3.48,16.95,44.96,0.77,17.92 -RuterNorway/Llama-2-7b-chat-norwegian (few-shot),6738,32,4096,False,False,10890,3.5,27.22,33.61,0.45,20.44 -AI-Sweden-Models/gpt-sw3-20b (few-shot),20918,64,2077,True,False,1875,3.52,36.17,34.17,2.21,13.6 -Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,3.53,28.04,36.21,3.12,16.33 +google/gemma-2b (few-shot),2506,256,8192,True,False,6087,3.47,16.95,44.96,0.77,17.92 +sentence-transformers/distiluse-base-multilingual-cased-v2,135,120,512,True,False,33247,3.48,41.82,49.38,4.77,0.05 +sentence-transformers/distiluse-base-multilingual-cased,135,120,512,True,False,19206,3.48,40.2,48.71,5.53,0.06 +AI-Sweden-Models/gpt-sw3-20b (few-shot),20918,64,2077,True,False,1875,3.51,36.17,34.17,2.21,13.6 +RuterNorway/Llama-2-7b-chat-norwegian (few-shot),6738,32,4096,False,False,10890,3.51,27.22,33.61,0.45,20.44 +Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,3.52,28.04,36.21,3.12,16.33 allenai/OLMo-7B (few-shot),6888,50,2080,True,False,5403,3.55,30.85,49.77,2.67,4.09 -NorwAI/NorwAI-Llama2-7B (few-shot),7033,68,4096,True,False,4438,3.58,25.69,33.48,3.73,14.82 -"DiscoResearch/DiscoLM-70b (few-shot, val)",68977,32,8221,False,False,291,3.64,62.16,27.56,0.59,0.0 +NorwAI/NorwAI-Llama2-7B (few-shot),7033,68,4096,True,False,4438,3.61,25.69,33.48,3.73,14.82 +tiiuae/Falcon3-1B-Instruct (few-shot),1669,131,8192,True,False,9270,3.62,32.54,22.27,7.18,16.72 +"DiscoResearch/DiscoLM-70b (few-shot, val)",68977,32,8221,False,False,291,3.63,62.16,27.56,0.59,0.0 sentence-transformers/distiluse-base-multilingual-cased-v1,135,120,512,True,False,34042,3.64,28.29,51.7,2.12,0.03 -"seedboxai/KafkaLM-70B-German-V0.1 (few-shot, val)",68977,32,4125,True,False,294,3.67,59.23,-5.01,3.19,19.84 -ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,3.69,19.94,34.66,0.7,14.66 -openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.69,38.81,10.59,0.91,22.54 -HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,3.71,28.67,19.69,5.07,18.43 -PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,3.71,34.68,21.76,0.85,14.3 -Qwen/Qwen1.5-1.8B (few-shot),1837,152,32768,True,False,5666,3.71,9.23,38.3,0.39,16.67 -dbmdz/bert-mini-historic-multilingual-cased,12,32,512,True,False,47122,3.8,38.71,34.21,1.59,0.0 -dbmdz/bert-tiny-historic-multilingual-cased,5,32,512,True,False,78027,3.87,33.18,33.61,1.83,0.0 -3ebdola/Dialectal-Arabic-XLM-R-Base,277,250,512,True,False,12783,3.88,30.18,32.66,2.1,0.46 -Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,3.99,27.34,10.64,0.33,11.81 -PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,4.0,23.08,7.41,0.89,17.32 +ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,3.68,19.94,34.66,0.7,14.66 +openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.68,38.81,10.59,0.91,22.54 +PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,3.7,34.68,21.76,0.85,14.3 +"seedboxai/KafkaLM-70B-German-V0.1 (few-shot, val)",68977,32,4125,True,False,294,3.7,59.23,-5.01,3.19,19.84 +HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,3.72,28.67,19.69,5.07,18.43 +Qwen/Qwen1.5-1.8B (few-shot),1837,152,32768,True,False,5666,3.72,9.23,38.3,0.39,16.67 +dbmdz/bert-mini-historic-multilingual-cased,12,32,512,True,False,47122,3.82,38.71,34.21,1.59,0.0 +3ebdola/Dialectal-Arabic-XLM-R-Base,277,250,512,True,False,12783,3.89,30.18,32.66,2.1,0.46 +dbmdz/bert-tiny-historic-multilingual-cased,5,32,512,True,False,78027,3.89,33.18,33.61,1.83,0.0 +PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.99,23.08,7.41,0.89,17.32 +Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,4.0,27.34,10.64,0.33,11.81 allenai/OLMo-7B-Twin-2T (few-shot),6888,50,2080,True,False,5484,4.02,14.06,28.07,2.31,6.89 -Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,4.07,24.67,9.31,1.11,13.6 -HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.1,19.94,19.64,0.0,8.78 -PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.11,25.3,13.9,-0.25,6.12 -PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,4.13,24.32,15.58,1.25,6.82 -state-spaces/mamba-2.8b-hf (few-shot),2768,50,32800,True,False,2722,4.13,21.96,18.66,0.16,7.08 -HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.14,18.77,12.59,1.64,9.27 -allenai/OLMo-1B (few-shot),1177,50,2080,True,False,8536,4.2,21.46,21.03,0.13,0.71 -PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.26,21.03,10.99,0.13,3.61 -fresh-xlm-roberta-base,277,250,512,True,False,2214,4.39,8.03,23.44,-0.17,0.0 -fresh-electra-small,13,31,512,True,False,7840,4.5,9.53,13.29,-0.15,0.0 -HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.55,15.54,2.51,0.36,1.77 -HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.58,16.89,2.74,-0.34,0.28 -NorwAI/NorwAI-Mistral-7B-pretrain (few-shot),7537,68,4096,True,False,3024,4.72,5.8,4.45,-0.48,0.08 -RJuro/kanelsnegl-v0.1 (few-shot),7242,32,541,True,False,5847,4.86,0.0,0.0,0.0,0.0 -ai-forever/mGPT (few-shot),-1,100,2077,True,False,11734,4.86,0.0,0.19,-0.12,0.0 -ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.86,0.0,0.0,0.0,0.0 +Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,4.05,24.67,9.31,1.11,13.6 +HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.11,19.94,19.64,0.0,8.78 +PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.12,25.3,13.9,-0.25,6.12 +state-spaces/mamba-2.8b-hf (few-shot),2768,50,32800,True,False,2722,4.14,21.96,18.66,0.16,7.08 +PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,4.15,24.32,15.58,1.25,6.82 +HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.16,18.77,12.59,1.64,9.27 +allenai/OLMo-1B (few-shot),1177,50,2080,True,False,8536,4.22,21.46,21.03,0.13,0.71 +PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.27,21.03,10.99,0.13,3.61 +fresh-xlm-roberta-base,277,250,512,True,False,2214,4.4,8.03,23.44,-0.17,0.0 +fresh-electra-small,13,31,512,True,False,7840,4.51,9.53,13.29,-0.15,0.0 +HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.56,15.54,2.51,0.36,1.77 +HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.59,16.89,2.74,-0.34,0.28 +NorwAI/NorwAI-Mistral-7B-pretrain (few-shot),7537,68,4096,True,False,3024,4.73,5.8,4.45,-0.48,0.08 +RJuro/kanelsnegl-v0.1 (few-shot),7242,32,541,True,False,5847,4.88,0.0,0.0,0.0,0.0 +ai-forever/mGPT (few-shot),-1,100,2077,True,False,11734,4.88,0.0,0.19,-0.12,0.0 +ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.88,0.0,0.0,0.0,0.0 diff --git a/german-nlu.md b/german-nlu.md index c41c825a..1909586e 100644 --- a/german-nlu.md +++ b/german-nlu.md @@ -3,7 +3,7 @@ layout: leaderboard title: German NLU 🇩🇪 --- -
Last updated: 10/01/2025 12:30:28 CET
+
Last updated: 11/01/2025 11:03:38 CET
@@ -340,40 +340,6 @@ title: German NLU 🇩🇪 12.7.0 12.7.0 - - google/gemma-2-27b-it (few-shot) - 27227 - 256 - 8224 - True - 1,516 ± 257 / 480 ± 148 - 1.85 - 64.13 ± 1.65 / 55.46 ± 2.00 - 60.28 ± 1.75 / 73.37 ± 1.43 - 46.69 ± 1.99 / 71.96 ± 0.73 - 28.54 ± 1.38 / 59.38 ± 1.85 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - - - meta-llama/Llama-2-70b-hf (few-shot, val) - 68977 - 32 - 4125 - True - 1,892 ± 650 / 318 ± 105 - 1.85 - 63.71 ± 2.43 / 57.08 ± 2.70 - 58.17 ± 2.51 / 71.34 ± 1.62 - 36.33 ± 5.00 / 64.51 ± 3.38 - 36.06 ± 2.89 / 69.62 ± 2.81 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - dbmdz/bert-base-german-cased 111 @@ -392,21 +358,21 @@ title: German NLU 🇩🇪 12.7.0 - gpt-4-1106-preview (zero-shot, val) - unknown - 100 - 8191 + google/gemma-2-27b-it (few-shot) + 27227 + 256 + 8224 True - 436 ± 152 / 57 ± 21 + 1,516 ± 257 / 480 ± 148 1.86 - 57.47 ± 2.20 / 36.29 ± 1.91 - 58.67 ± 2.30 / 71.20 ± 1.34 - 54.55 ± 3.18 / 75.08 ± 1.71 - 27.02 ± 1.33 / 53.97 ± 1.74 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 + 64.13 ± 1.65 / 55.46 ± 2.00 + 60.28 ± 1.75 / 73.37 ± 1.43 + 46.69 ± 1.99 / 71.96 ± 0.73 + 28.54 ± 1.38 / 59.38 ± 1.85 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 gwlms/bert-base-dewiki-v1 @@ -442,6 +408,23 @@ title: German NLU 🇩🇪 13.0.0 13.0.0 + + gpt-4-1106-preview (zero-shot, val) + unknown + 100 + 8191 + True + 436 ± 152 / 57 ± 21 + 1.87 + 57.47 ± 2.20 / 36.29 ± 1.91 + 58.67 ± 2.30 / 71.20 ± 1.34 + 54.55 ± 3.18 / 75.08 ± 1.71 + 27.02 ± 1.33 / 53.97 ± 1.74 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + gpt-4o-2024-05-13 (few-shot, val) unknown @@ -459,6 +442,23 @@ title: German NLU 🇩🇪 12.10.2 12.10.0 + + meta-llama/Llama-2-70b-hf (few-shot, val) + 68977 + 32 + 4125 + True + 1,892 ± 650 / 318 ± 105 + 1.87 + 63.71 ± 2.43 / 57.08 ± 2.70 + 58.17 ± 2.51 / 71.34 ± 1.62 + 36.33 ± 5.00 / 64.51 ± 3.38 + 36.06 ± 2.89 / 69.62 ± 2.81 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + dbmdz/bert-base-german-uncased 111 @@ -545,21 +545,21 @@ title: German NLU 🇩🇪 14.0.3 - 152334H/miqu-1-70b-sf (few-shot, val) - 68977 - 32 - 32793 + claude-3-5-sonnet-20241022 (zero-shot, val) + unknown + unknown + 200000 True - 2,126 ± 676 / 319 ± 104 - 1.93 - 65.19 ± 2.58 / 56.17 ± 3.57 - 59.80 ± 2.15 / 71.98 ± 1.46 - 41.86 ± 5.44 / 69.70 ± 2.31 - 25.51 ± 3.79 / 63.19 ± 2.48 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 + 193 ± 87 / 55 ± 19 + 1.94 + 61.83 ± 1.50 / 46.40 ± 1.54 + 61.59 ± 4.13 / 73.65 ± 2.62 + 46.40 ± 3.16 / 69.51 ± 1.58 + 23.77 ± 1.57 / 48.70 ± 1.26 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 deepset/gbert-base @@ -568,7 +568,7 @@ title: German NLU 🇩🇪 512 True 37,268 ± 6,577 / 8,719 ± 2,865 - 1.93 + 1.94 80.09 ± 0.84 / 78.71 ± 0.84 59.80 ± 2.24 / 73.18 ± 1.49 47.48 ± 7.30 / 70.97 ± 3.94 @@ -578,23 +578,6 @@ title: German NLU 🇩🇪 12.7.0 12.7.0 - - claude-3-5-sonnet-20241022 (zero-shot, val) - unknown - unknown - 200000 - True - 193 ± 87 / 55 ± 19 - 1.94 - 61.83 ± 1.50 / 46.40 ± 1.54 - 61.59 ± 4.13 / 73.65 ± 2.62 - 46.40 ± 3.16 / 69.51 ± 1.58 - 23.77 ± 1.57 / 48.70 ± 1.26 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - meta-llama/Meta-Llama-3-70B-Instruct (few-shot, val) 70554 @@ -629,6 +612,23 @@ title: German NLU 🇩🇪 13.0.0 13.0.0 + + 152334H/miqu-1-70b-sf (few-shot, val) + 68977 + 32 + 32793 + True + 2,126 ± 676 / 319 ± 104 + 1.96 + 65.19 ± 2.58 / 56.17 ± 3.57 + 59.80 ± 2.15 / 71.98 ± 1.46 + 41.86 ± 5.44 / 69.70 ± 2.31 + 25.51 ± 3.79 / 63.19 ± 2.48 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + intfloat/multilingual-e5-base 278 @@ -731,6 +731,23 @@ title: German NLU 🇩🇪 12.6.1 12.6.1 + + gpt-4o-2024-05-13 (zero-shot, val) + unknown + 200 + 8191 + True + 637 ± 306 / 92 ± 31 + 2.06 + 67.18 ± 1.47 / 35.10 ± 1.57 + 50.12 ± 2.00 / 65.79 ± 1.25 + 44.98 ± 4.97 / 71.63 ± 2.39 + 27.01 ± 1.67 / 55.19 ± 1.49 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + gpt-4o-mini-2024-07-18 (zero-shot, val) unknown @@ -738,7 +755,7 @@ title: German NLU 🇩🇪 8191 True 908 ± 303 / 96 ± 36 - 2.03 + 2.06 59.19 ± 1.52 / 30.16 ± 1.22 52.66 ± 3.86 / 68.10 ± 2.51 46.66 ± 4.45 / 72.82 ± 2.13 @@ -749,21 +766,21 @@ title: German NLU 🇩🇪 14.0.1 - gpt-4o-2024-05-13 (zero-shot, val) + gpt-4o-mini-2024-07-18 (few-shot, val) unknown 200 8191 True - 637 ± 306 / 92 ± 31 - 2.05 - 67.18 ± 1.47 / 35.10 ± 1.57 - 50.12 ± 2.00 / 65.79 ± 1.25 - 44.98 ± 4.97 / 71.63 ± 2.39 - 27.01 ± 1.67 / 55.19 ± 1.49 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 + 784 ± 310 / 95 ± 28 + 2.06 + 65.51 ± 1.91 / 48.61 ± 3.73 + 55.16 ± 3.05 / 69.81 ± 1.96 + 44.60 ± 3.19 / 67.35 ± 2.53 + 21.87 ± 3.80 / 59.67 ± 2.67 + 14.0.0 + 14.0.0 + 14.0.0 + 14.0.0 meta-llama/Meta-Llama-3-8B-Instruct (few-shot) @@ -771,7 +788,7 @@ title: German NLU 🇩🇪 128 8192 True - 1,007 ± 316 / 162 ± 45 + 1,483 ± 377 / 287 ± 97 2.06 68.18 ± 0.95 / 57.72 ± 1.15 58.33 ± 2.83 / 69.31 ± 3.16 @@ -816,40 +833,6 @@ title: German NLU 🇩🇪 14.0.4 14.0.4 - - gpt-4o-mini-2024-07-18 (few-shot, val) - unknown - 200 - 8191 - True - 784 ± 310 / 95 ± 28 - 2.08 - 65.51 ± 1.91 / 48.61 ± 3.73 - 55.16 ± 3.05 / 69.81 ± 1.96 - 44.60 ± 3.19 / 67.35 ± 2.53 - 21.87 ± 3.80 / 59.67 ± 2.67 - 14.0.0 - 14.0.0 - 14.0.0 - 14.0.0 - - - meta-llama/Llama-3.1-8B-Instruct (few-shot) - 8030 - 128 - 131104 - True - 1,005 ± 330 / 196 ± 74 - 2.08 - 67.61 ± 1.23 / 60.39 ± 1.02 - 58.07 ± 2.32 / 70.76 ± 1.84 - 28.25 ± 3.57 / 59.54 ± 3.88 - 28.79 ± 2.02 / 55.82 ± 3.28 - 13.0.0 - 14.0.4 - 13.0.0 - 13.0.0 - google-bert/bert-base-multilingual-uncased 168 @@ -873,8 +856,8 @@ title: German NLU 🇩🇪 128 8192 True - 1,335 ± 338 / 260 ± 88 - 2.13 + 1,477 ± 376 / 285 ± 97 + 2.14 56.00 ± 1.94 / 43.49 ± 2.05 56.40 ± 3.89 / 70.17 ± 2.91 22.01 ± 5.17 / 56.97 ± 3.54 @@ -891,7 +874,7 @@ title: German NLU 🇩🇪 32768 False 1,584 ± 217 / 635 ± 178 - 2.13 + 2.14 55.76 ± 1.16 / 40.04 ± 3.21 55.91 ± 2.49 / 70.31 ± 1.76 22.47 ± 3.37 / 56.77 ± 3.69 @@ -925,7 +908,7 @@ title: German NLU 🇩🇪 32768 False 5,980 ± 1,031 / 1,714 ± 552 - 2.15 + 2.16 58.90 ± 2.30 / 45.55 ± 3.30 61.34 ± 1.90 / 72.98 ± 1.30 31.58 ± 4.39 / 65.51 ± 2.23 @@ -942,7 +925,7 @@ title: German NLU 🇩🇪 4125 True 1,979 ± 621 / 320 ± 105 - 2.15 + 2.16 62.39 ± 2.72 / 50.86 ± 2.31 53.16 ± 3.17 / 64.24 ± 3.42 31.81 ± 5.15 / 62.15 ± 4.02 @@ -969,6 +952,23 @@ title: German NLU 🇩🇪 12.9.1 12.9.1 + + mistralai/Ministral-8B-Instruct-2410 (few-shot) + 8020 + 131 + 32768 + True + 1,302 ± 323 / 253 ± 86 + 2.17 + 60.50 ± 1.22 / 40.72 ± 1.61 + 50.39 ± 2.45 / 66.39 ± 1.62 + 30.86 ± 1.37 / 53.78 ± 1.61 + 30.53 ± 1.21 / 58.26 ± 1.93 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot) 46703 @@ -1004,17 +1004,17 @@ title: German NLU 🇩🇪 12.5.2 - mistralai/Ministral-8B-Instruct-2410 (few-shot) - 8020 - 131 - 32768 + meta-llama/Llama-3.1-8B-Instruct (few-shot) + 8030 + 128 + 131072 True - 1,302 ± 323 / 253 ± 86 + 1,473 ± 377 / 283 ± 96 2.20 - 60.50 ± 1.22 / 40.72 ± 1.61 - 50.39 ± 2.45 / 66.39 ± 1.62 - 30.86 ± 1.37 / 53.78 ± 1.61 - 30.53 ± 1.21 / 58.26 ± 1.93 + 62.03 ± 1.17 / 45.31 ± 1.89 + 58.15 ± 2.40 / 70.81 ± 1.86 + 30.18 ± 1.92 / 63.41 ± 1.19 + 26.48 ± 0.98 / 52.41 ± 1.48 14.1.2 14.1.2 14.1.2 @@ -1044,7 +1044,7 @@ title: German NLU 🇩🇪 8192 True 2,470 ± 836 / 326 ± 111 - 2.22 + 2.23 59.82 ± 2.10 / 47.76 ± 2.63 50.22 ± 5.25 / 58.77 ± 6.76 30.43 ± 3.65 / 61.43 ± 3.32 @@ -1054,23 +1054,6 @@ title: German NLU 🇩🇪 14.1.1 14.1.1 - - senseable/WestLake-7B-v2 (few-shot) - 7242 - 32 - 32768 - False - 5,993 ± 1,028 / 1,742 ± 561 - 2.22 - 64.38 ± 1.60 / 50.26 ± 2.53 - 54.44 ± 1.45 / 69.32 ± 1.02 - 26.03 ± 2.23 / 61.88 ± 1.38 - 25.68 ± 2.81 / 62.48 ± 2.93 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - CohereForAI/aya-expanse-8b (few-shot) 8028 @@ -1088,6 +1071,23 @@ title: German NLU 🇩🇪 14.1.2 14.1.2 + + senseable/WestLake-7B-v2 (few-shot) + 7242 + 32 + 32768 + False + 5,993 ± 1,028 / 1,742 ± 561 + 2.25 + 64.38 ± 1.60 / 50.26 ± 2.53 + 54.44 ± 1.45 / 69.32 ± 1.02 + 26.03 ± 2.23 / 61.88 ± 1.38 + 25.68 ± 2.81 / 62.48 ± 2.93 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + ibm-granite/granite-3.0-8b-instruct (few-shot) 8171 @@ -1095,7 +1095,7 @@ title: German NLU 🇩🇪 4096 True 1,118 ± 302 / 184 ± 63 - 2.25 + 2.26 54.68 ± 1.38 / 46.36 ± 2.67 55.48 ± 2.67 / 69.91 ± 1.90 26.89 ± 0.86 / 62.51 ± 0.48 @@ -1122,6 +1122,23 @@ title: German NLU 🇩🇪 9.1.2 12.5.1 + + microsoft/Phi-3-mini-4k-instruct (few-shot) + 3821 + 32 + 2047 + True + 8,681 ± 1,650 / 2,177 ± 717 + 2.28 + 59.14 ± 1.40 / 43.16 ± 2.75 + 51.72 ± 2.44 / 65.74 ± 2.10 + 33.78 ± 2.17 / 65.03 ± 1.74 + 24.82 ± 1.35 / 53.54 ± 2.38 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + ZurichNLP/unsup-simcse-xlm-roberta-base 277 @@ -1197,7 +1214,7 @@ title: German NLU 🇩🇪 128 True 5,567 ± 450 / 2,483 ± 798 - 2.31 + 2.32 72.25 ± 1.06 / 71.08 ± 1.17 62.00 ± 2.11 / 74.40 ± 1.38 48.10 ± 4.34 / 71.95 ± 3.29 @@ -1207,23 +1224,6 @@ title: German NLU 🇩🇪 0.0.0 0.0.0 - - microsoft/Phi-3-mini-4k-instruct (few-shot) - 3821 - 32 - 2047 - True - 3,194 ± 687 / 650 ± 216 - 2.31 - 59.33 ± 1.30 / 43.34 ± 2.86 - 51.50 ± 2.48 / 65.62 ± 2.13 - 33.30 ± 2.11 / 64.80 ± 1.68 - 24.83 ± 1.38 / 53.63 ± 2.42 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - microsoft/xlm-align-base 277 @@ -1231,7 +1231,7 @@ title: German NLU 🇩🇪 512 True 14,744 ± 2,870 / 3,265 ± 1,053 - 2.31 + 2.32 79.38 ± 0.80 / 79.33 ± 0.74 58.58 ± 2.31 / 72.09 ± 1.64 15.34 ± 5.24 / 52.99 ± 1.90 @@ -1248,7 +1248,7 @@ title: German NLU 🇩🇪 131072 True 2,986 ± 823 / 276 ± 94 - 2.32 + 2.33 59.45 ± 1.64 / 46.60 ± 2.02 53.39 ± 5.35 / 65.74 ± 5.74 23.87 ± 5.74 / 57.17 ± 6.01 @@ -1258,23 +1258,6 @@ title: German NLU 🇩🇪 14.1.2 14.1.2 - - mlabonne/AlphaMonarch-7B (few-shot, val) - 7242 - 32 - 8192 - False - 5,340 ± 1,262 / 1,157 ± 375 - 2.32 - 63.36 ± 2.68 / 51.59 ± 3.44 - 59.80 ± 3.18 / 72.32 ± 2.23 - 22.98 ± 8.11 / 60.88 ± 3.98 - 20.96 ± 3.59 / 57.36 ± 2.94 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - Nexusflow/Starling-LM-7B-beta (few-shot) 7242 @@ -1292,6 +1275,23 @@ title: German NLU 🇩🇪 14.1.2 14.1.2 + + mlabonne/AlphaMonarch-7B (few-shot, val) + 7242 + 32 + 8192 + False + 5,340 ± 1,262 / 1,157 ± 375 + 2.34 + 63.36 ± 2.68 / 51.59 ± 3.44 + 59.80 ± 3.18 / 72.32 ± 2.23 + 22.98 ± 8.11 / 60.88 ± 3.98 + 20.96 ± 3.59 / 57.36 ± 2.94 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + alpindale/Mistral-7B-v0.2-hf (few-shot) 7242 @@ -1462,23 +1462,6 @@ title: German NLU 🇩🇪 12.7.0 12.7.0 - - occiglot/occiglot-7b-eu5 (few-shot) - 7242 - 32 - 32768 - True - 2,219 ± 427 / 717 ± 224 - 2.43 - 51.39 ± 1.35 / 44.47 ± 2.77 - 47.30 ± 4.44 / 62.28 ± 4.24 - 21.83 ± 1.98 / 57.05 ± 2.18 - 31.55 ± 3.67 / 60.39 ± 4.29 - 12.5.2 - 12.1.0 - 12.1.0 - 12.1.0 - VAGOsolutions/SauerkrautLM-Gemma-7b (few-shot) 8538 @@ -1496,6 +1479,23 @@ title: German NLU 🇩🇪 12.10.0 12.10.0 + + occiglot/occiglot-7b-eu5 (few-shot) + 7242 + 32 + 32768 + True + 2,219 ± 427 / 717 ± 224 + 2.44 + 51.39 ± 1.35 / 44.47 ± 2.77 + 47.30 ± 4.44 / 62.28 ± 4.24 + 21.83 ± 1.98 / 57.05 ± 2.18 + 31.55 ± 3.67 / 60.39 ± 4.29 + 12.5.2 + 12.1.0 + 12.1.0 + 12.1.0 + Twitter/twhin-bert-base 279 @@ -1531,34 +1531,34 @@ title: German NLU 🇩🇪 12.5.3 - NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot) + NorwAI/NorwAI-Mixtral-8x7B (few-shot) 46998 68 32768 True - 9,015 ± 2,966 / 1,121 ± 510 + 2,368 ± 793 / 317 ± 108 2.46 - 56.14 ± 1.69 / 44.49 ± 3.25 - 53.33 ± 4.66 / 64.05 ± 6.18 - 29.49 ± 6.46 / 59.01 ± 6.80 - 18.49 ± 4.03 / 55.32 ± 2.95 + 57.26 ± 1.41 / 42.54 ± 2.77 + 54.57 ± 4.98 / 68.40 ± 4.08 + 26.52 ± 4.63 / 59.83 ± 5.12 + 19.96 ± 1.50 / 43.10 ± 2.18 14.0.4 14.0.4 14.0.4 14.0.4 - NorwAI/NorwAI-Mixtral-8x7B (few-shot) + NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot) 46998 68 32768 True - 2,368 ± 793 / 317 ± 108 - 2.46 - 57.26 ± 1.41 / 42.54 ± 2.77 - 54.57 ± 4.98 / 68.40 ± 4.08 - 26.52 ± 4.63 / 59.83 ± 5.12 - 19.96 ± 1.50 / 43.10 ± 2.18 + 9,015 ± 2,966 / 1,121 ± 510 + 2.47 + 56.14 ± 1.69 / 44.49 ± 3.25 + 53.33 ± 4.66 / 64.05 ± 6.18 + 29.49 ± 6.46 / 59.01 ± 6.80 + 18.49 ± 4.03 / 55.32 ± 2.95 14.0.4 14.0.4 14.0.4 @@ -1588,7 +1588,7 @@ title: German NLU 🇩🇪 512 True 9,707 ± 1,664 / 2,549 ± 831 - 2.48 + 2.49 74.36 ± 1.33 / 73.61 ± 1.47 53.52 ± 2.28 / 68.49 ± 1.49 22.26 ± 11.63 / 58.51 ± 6.63 @@ -1598,23 +1598,6 @@ title: German NLU 🇩🇪 12.7.0 12.7.0 - - clips/mfaq - 277 - 250 - 128 - True - 5,591 ± 187 / 3,349 ± 1,105 - 2.49 - 76.68 ± 0.91 / 76.46 ± 0.98 - 59.51 ± 1.54 / 72.84 ± 1.06 - 32.54 ± 11.48 / 60.57 ± 7.28 - 1.53 ± 0.96 / 2.39 ± 1.52 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - ibm-granite/granite-8b-code-base-4k (few-shot) 8055 @@ -1649,6 +1632,23 @@ title: German NLU 🇩🇪 12.7.0 12.7.0 + + clips/mfaq + 277 + 250 + 128 + True + 5,591 ± 187 / 3,349 ± 1,105 + 2.50 + 76.68 ± 0.91 / 76.46 ± 0.98 + 59.51 ± 1.54 / 72.84 ± 1.06 + 32.54 ± 11.48 / 60.57 ± 7.28 + 1.53 ± 0.96 / 2.39 ± 1.52 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + CohereForAI/aya-23-8B (few-shot) 8028 @@ -1802,23 +1802,6 @@ title: German NLU 🇩🇪 12.6.1 12.6.1 - - meta-llama/Llama-3.2-3B (few-shot) - 3213 - 128 - 131104 - True - 3,713 ± 877 / 836 ± 267 - 2.63 - 49.85 ± 1.96 / 41.04 ± 2.44 - 54.65 ± 1.58 / 65.94 ± 2.42 - 3.17 ± 5.20 / 36.54 ± 5.71 - 29.37 ± 3.48 / 58.09 ± 4.16 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - ibm-granite/granite-3.0-3b-a800m-instruct (few-shot) 3374 @@ -1836,6 +1819,23 @@ title: German NLU 🇩🇪 13.0.0 13.0.0 + + meta-llama/Llama-2-7b-hf (few-shot) + 6738 + 32 + 4125 + True + 930 ± 310 / 128 ± 43 + 2.65 + 43.02 ± 1.93 / 32.69 ± 1.98 + 50.21 ± 2.43 / 65.81 ± 1.82 + 15.79 ± 2.35 / 53.25 ± 4.45 + 28.57 ± 5.09 / 55.54 ± 6.14 + 12.8.0 + 12.8.0 + 12.8.0 + 12.8.0 + mayflowergmbh/Wiedervereinigung-7b-dpo (few-shot, val) 7242 @@ -1843,7 +1843,7 @@ title: German NLU 🇩🇪 32768 False 2,374 ± 432 / 744 ± 230 - 2.65 + 2.66 52.17 ± 2.87 / 40.26 ± 2.43 51.92 ± 3.19 / 67.12 ± 2.11 29.06 ± 5.04 / 62.77 ± 2.22 @@ -1854,21 +1854,21 @@ title: German NLU 🇩🇪 12.4.0 - meta-llama/Llama-2-7b-hf (few-shot) - 6738 - 32 - 4125 + meta-llama/Llama-3.2-3B (few-shot) + 3213 + 128 + 131104 True - 930 ± 310 / 128 ± 43 - 2.65 - 43.02 ± 1.93 / 32.69 ± 1.98 - 50.21 ± 2.43 / 65.81 ± 1.82 - 15.79 ± 2.35 / 53.25 ± 4.45 - 28.57 ± 5.09 / 55.54 ± 6.14 - 12.8.0 - 12.8.0 - 12.8.0 - 12.8.0 + 3,713 ± 877 / 836 ± 267 + 2.66 + 49.85 ± 1.96 / 41.04 ± 2.44 + 54.65 ± 1.58 / 65.94 ± 2.42 + 3.17 ± 5.20 / 36.54 ± 5.71 + 29.37 ± 3.48 / 58.09 ± 4.16 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 claude-3-5-haiku-20241022 (zero-shot, val) @@ -1905,39 +1905,22 @@ title: German NLU 🇩🇪 13.0.0 - ibm-granite/granite-8b-code-instruct-4k (few-shot) - 8055 - 49 - 4096 - True - 5,617 ± 995 / 1,623 ± 540 - 2.70 - 54.45 ± 1.17 / 42.36 ± 2.59 - 43.62 ± 3.18 / 59.82 ± 2.70 - 15.24 ± 1.87 / 55.49 ± 2.89 - 26.00 ± 2.28 / 51.82 ± 2.70 + meta-llama/Llama-3.2-3B-Instruct (few-shot) + 3213 + 128 + 131104 + False + 10,424 ± 2,641 / 2,081 ± 666 + 2.71 + 55.52 ± 2.07 / 46.18 ± 2.32 + 50.52 ± 2.29 / 62.39 ± 2.63 + 9.87 ± 2.95 / 42.20 ± 3.60 + 20.20 ± 3.28 / 47.02 ± 5.20 13.0.0 13.0.0 13.0.0 13.0.0 - - meta-llama/Llama-2-7b-chat-hf (few-shot) - 6738 - 32 - 4125 - False - 2,643 ± 455 / 800 ± 247 - 2.70 - 50.09 ± 1.33 / 38.59 ± 1.66 - 46.52 ± 2.85 / 63.64 ± 2.10 - 15.23 ± 1.71 / 55.08 ± 1.88 - 25.54 ± 3.58 / 56.07 ± 3.76 - 12.9.0 - 12.9.0 - 12.10.0 - 12.10.0 - NbAiLab/nb-llama-3.1-70B (few-shot) 70554 @@ -1945,7 +1928,7 @@ title: German NLU 🇩🇪 131072 True 1,220 ± 411 / 158 ± 53 - 2.71 + 2.72 67.63 ± 1.29 / 58.41 ± 2.59 55.79 ± 4.32 / 64.18 ± 5.71 24.45 ± 4.07 / 59.54 ± 4.12 @@ -1956,55 +1939,38 @@ title: German NLU 🇩🇪 14.0.4 - meta-llama/Llama-3.2-3B-Instruct (few-shot) - 3213 - 128 - 131104 - False - 10,424 ± 2,641 / 2,081 ± 666 - 2.71 - 55.52 ± 2.07 / 46.18 ± 2.32 - 50.52 ± 2.29 / 62.39 ± 2.63 - 9.87 ± 2.95 / 42.20 ± 3.60 - 20.20 ± 3.28 / 47.02 ± 5.20 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - - - TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot) - 7800 - 100 + ibm-granite/granite-8b-code-instruct-4k (few-shot) + 8055 + 49 4096 True - 6,197 ± 1,118 / 1,730 ± 577 - 2.73 - 39.21 ± 2.29 / 36.08 ± 2.06 - 58.36 ± 1.80 / 71.98 ± 1.17 - 7.03 ± 3.09 / 50.18 ± 3.74 - 27.02 ± 3.09 / 51.94 ± 4.17 + 5,617 ± 995 / 1,623 ± 540 + 2.72 + 54.45 ± 1.17 / 42.36 ± 2.59 + 43.62 ± 3.18 / 59.82 ± 2.70 + 15.24 ± 1.87 / 55.49 ± 2.89 + 26.00 ± 2.28 / 51.82 ± 2.70 13.0.0 13.0.0 13.0.0 13.0.0 - sentence-transformers/stsb-xlm-r-multilingual - 277 - 250 - 512 - True - 15,040 ± 2,953 / 3,417 ± 1,100 - 2.74 - 67.47 ± 1.09 / 66.34 ± 1.08 - 52.85 ± 1.53 / 68.48 ± 1.02 - 29.59 ± 6.40 / 60.98 ± 2.57 - 0.73 ± 0.17 / 4.35 ± 1.32 - 12.8.0 - 12.8.0 - 12.8.0 - 0.0.0 + meta-llama/Llama-2-7b-chat-hf (few-shot) + 6738 + 32 + 4125 + False + 2,643 ± 455 / 800 ± 247 + 2.72 + 50.09 ± 1.33 / 38.59 ± 1.66 + 46.52 ± 2.85 / 63.64 ± 2.10 + 15.23 ± 1.71 / 55.08 ± 1.88 + 25.54 ± 3.58 / 56.07 ± 3.76 + 12.9.0 + 12.9.0 + 12.10.0 + 12.10.0 nvidia/mistral-nemo-minitron-8b-instruct (few-shot) @@ -2040,6 +2006,57 @@ title: German NLU 🇩🇪 12.8.0 12.8.0 + + sentence-transformers/stsb-xlm-r-multilingual + 277 + 250 + 512 + True + 15,040 ± 2,953 / 3,417 ± 1,100 + 2.75 + 67.47 ± 1.09 / 66.34 ± 1.08 + 52.85 ± 1.53 / 68.48 ± 1.02 + 29.59 ± 6.40 / 60.98 ± 2.57 + 0.73 ± 0.17 / 4.35 ± 1.32 + 12.8.0 + 12.8.0 + 12.8.0 + 0.0.0 + + + TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot) + 7800 + 100 + 4096 + True + 6,197 ± 1,118 / 1,730 ± 577 + 2.76 + 39.21 ± 2.29 / 36.08 ± 2.06 + 58.36 ± 1.80 / 71.98 ± 1.17 + 7.03 ± 3.09 / 50.18 ± 3.74 + 27.02 ± 3.09 / 51.94 ± 4.17 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + + + Qwen/Qwen1.5-4B-Chat (few-shot) + 3950 + 152 + 32768 + False + 4,347 ± 893 / 1,135 ± 365 + 2.79 + 42.08 ± 1.65 / 36.90 ± 2.00 + 41.52 ± 3.53 / 57.69 ± 3.35 + 12.78 ± 3.75 / 46.43 ± 5.48 + 29.35 ± 2.51 / 59.90 ± 2.80 + 12.5.2 + 10.0.1 + 12.1.0 + 12.5.2 + AI-Sweden-Models/roberta-large-1160k 355 @@ -2047,7 +2064,7 @@ title: German NLU 🇩🇪 512 True 14,014 ± 2,384 / 3,625 ± 1,146 - 2.78 + 2.81 68.93 ± 1.48 / 67.48 ± 1.55 46.81 ± 1.79 / 64.02 ± 1.31 3.39 ± 3.01 / 39.29 ± 4.82 @@ -2064,7 +2081,7 @@ title: German NLU 🇩🇪 512 True 34,735 ± 7,558 / 6,846 ± 2,312 - 2.78 + 2.82 77.84 ± 0.92 / 77.81 ± 1.05 59.16 ± 2.05 / 72.70 ± 1.38 3.66 ± 2.14 / 49.63 ± 2.38 @@ -2074,23 +2091,6 @@ title: German NLU 🇩🇪 12.7.0 12.7.0 - - Qwen/Qwen1.5-4B-Chat (few-shot) - 3950 - 152 - 32768 - False - 4,347 ± 893 / 1,135 ± 365 - 2.79 - 42.08 ± 1.65 / 36.90 ± 2.00 - 41.52 ± 3.53 / 57.69 ± 3.35 - 12.78 ± 3.75 / 46.43 ± 5.48 - 29.35 ± 2.51 / 59.90 ± 2.80 - 12.5.2 - 10.0.1 - 12.1.0 - 12.5.2 - AI-Sweden-Models/roberta-large-1350k 354 @@ -2108,23 +2108,6 @@ title: German NLU 🇩🇪 10.0.1 10.0.1 - - ibm-granite/granite-7b-instruct (few-shot) - 6738 - 32 - 4096 - True - 3,136 ± 558 / 942 ± 290 - 2.84 - 39.38 ± 0.94 / 30.79 ± 1.08 - 49.83 ± 1.61 / 66.00 ± 1.36 - 4.03 ± 1.54 / 40.66 ± 3.14 - 30.20 ± 1.18 / 58.73 ± 1.73 - 13.2.0 - 13.0.0 - 13.2.0 - 13.2.0 - ibm-granite/granite-3.0-2b-base (few-shot) 2534 @@ -2142,6 +2125,23 @@ title: German NLU 🇩🇪 13.0.0 13.0.0 + + ibm-granite/granite-7b-instruct (few-shot) + 6738 + 32 + 4096 + True + 3,136 ± 558 / 942 ± 290 + 2.85 + 39.38 ± 0.94 / 30.79 ± 1.08 + 49.83 ± 1.61 / 66.00 ± 1.36 + 4.03 ± 1.54 / 40.66 ± 3.14 + 30.20 ± 1.18 / 58.73 ± 1.73 + 13.2.0 + 13.0.0 + 13.2.0 + 13.2.0 + allenai/OLMo-1.7-7B-hf (few-shot) 6888 @@ -2149,7 +2149,7 @@ title: German NLU 🇩🇪 4096 True 3,371 ± 876 / 561 ± 184 - 2.87 + 2.86 39.41 ± 2.30 / 36.17 ± 2.32 49.42 ± 4.33 / 61.57 ± 5.43 6.02 ± 2.53 / 46.41 ± 4.35 @@ -2166,7 +2166,7 @@ title: German NLU 🇩🇪 512 True 15,100 ± 3,019 / 3,369 ± 1,103 - 2.87 + 2.88 65.28 ± 0.73 / 64.04 ± 0.52 60.29 ± 1.25 / 73.42 ± 0.83 16.83 ± 13.26 / 56.96 ± 5.68 @@ -2193,6 +2193,23 @@ title: German NLU 🇩🇪 13.0.0 13.0.0 + + ibm-granite/granite-7b-base (few-shot) + 6738 + 32 + 2048 + True + 4,405 ± 1,098 / 1,032 ± 345 + 3.01 + 37.68 ± 1.26 / 33.74 ± 1.78 + 46.00 ± 3.63 / 61.88 ± 2.94 + 0.83 ± 0.84 / 33.38 ± 0.28 + 26.65 ± 4.01 / 53.15 ± 4.26 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + stabilityai/stablelm-2-1_6b (few-shot) 1645 @@ -2200,7 +2217,7 @@ title: German NLU 🇩🇪 4096 True 7,259 ± 2,120 / 1,240 ± 432 - 2.97 + 3.01 34.81 ± 2.51 / 30.33 ± 2.95 51.01 ± 2.18 / 65.35 ± 2.23 0.00 ± 0.00 / 33.34 ± 0.31 @@ -2227,23 +2244,6 @@ title: German NLU 🇩🇪 13.0.0 13.0.0 - - ibm-granite/granite-7b-base (few-shot) - 6738 - 32 - 2048 - True - 4,405 ± 1,098 / 1,032 ± 345 - 3.02 - 37.68 ± 1.26 / 33.74 ± 1.78 - 46.00 ± 3.63 / 61.88 ± 2.94 - 0.83 ± 0.84 / 33.38 ± 0.28 - 26.65 ± 4.01 / 53.15 ± 4.26 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - google/gemma-2-2b (few-shot) 2614 @@ -2312,40 +2312,6 @@ title: German NLU 🇩🇪 13.0.0 13.0.0 - - ibm-granite/granite-3.0-3b-a800m-base (few-shot) - 3374 - 49 - 4096 - True - 10,504 ± 3,028 / 1,678 ± 559 - 3.12 - 40.61 ± 2.18 / 28.49 ± 2.11 - 31.86 ± 3.60 / 42.96 ± 3.98 - 5.36 ± 3.96 / 37.83 ± 4.03 - 25.99 ± 3.85 / 47.72 ± 4.74 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - - - Qwen/Qwen1.5-4B (few-shot) - 3950 - 152 - 32768 - True - 3,248 ± 739 / 761 ± 252 - 3.15 - 31.52 ± 2.96 / 29.20 ± 1.88 - 39.91 ± 3.29 / 53.66 ± 3.20 - 3.27 ± 2.51 / 34.30 ± 1.29 - 27.55 ± 3.12 / 57.60 ± 3.34 - 12.5.2 - 10.0.1 - 12.1.0 - 12.1.0 - dbmdz/bert-medium-historic-multilingual-cased 42 @@ -2363,6 +2329,23 @@ title: German NLU 🇩🇪 12.10.1 12.10.1 + + ibm-granite/granite-3.0-3b-a800m-base (few-shot) + 3374 + 49 + 4096 + True + 10,504 ± 3,028 / 1,678 ± 559 + 3.15 + 40.61 ± 2.18 / 28.49 ± 2.11 + 31.86 ± 3.60 / 42.96 ± 3.98 + 5.36 ± 3.96 / 37.83 ± 4.03 + 25.99 ± 3.85 / 47.72 ± 4.74 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + EuropeanParliament/EUBERT 94 @@ -2380,6 +2363,23 @@ title: German NLU 🇩🇪 12.6.1 12.6.1 + + Qwen/Qwen1.5-4B (few-shot) + 3950 + 152 + 32768 + True + 3,248 ± 739 / 761 ± 252 + 3.18 + 31.52 ± 2.96 / 29.20 ± 1.88 + 39.91 ± 3.29 / 53.66 ± 3.20 + 3.27 ± 2.51 / 34.30 ± 1.29 + 27.55 ± 3.12 / 57.60 ± 3.34 + 12.5.2 + 10.0.1 + 12.1.0 + 12.1.0 + 01-ai/Yi-6B (few-shot) 6061 @@ -2421,7 +2421,7 @@ title: German NLU 🇩🇪 512 True 29,201 ± 6,282 / 6,045 ± 2,027 - 3.21 + 3.20 60.54 ± 1.96 / 59.68 ± 1.94 54.99 ± 2.05 / 70.00 ± 1.37 0.52 ± 2.01 / 49.40 ± 1.06 @@ -2431,23 +2431,6 @@ title: German NLU 🇩🇪 12.8.0 12.8.0 - - sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking - 135 - 120 - 512 - True - 33,753 ± 8,349 / 5,937 ± 1,946 - 3.25 - 63.78 ± 0.75 / 62.34 ± 0.64 - 49.69 ± 1.56 / 66.32 ± 1.07 - 0.74 ± 0.83 / 48.97 ± 0.85 - 0.02 ± 0.02 / 0.20 ± 0.17 - 12.8.0 - 12.8.0 - 12.8.0 - 12.8.0 - sentence-transformers/quora-distilbert-multilingual 135 @@ -2472,7 +2455,7 @@ title: German NLU 🇩🇪 4096 True 15,009 ± 4,072 / 2,702 ± 878 - 3.27 + 3.26 28.49 ± 2.30 / 24.73 ± 1.76 43.18 ± 2.89 / 58.22 ± 3.50 2.92 ± 1.40 / 44.65 ± 3.51 @@ -2482,6 +2465,23 @@ title: German NLU 🇩🇪 13.1.0 13.1.0 + + sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking + 135 + 120 + 512 + True + 33,753 ± 8,349 / 5,937 ± 1,946 + 3.27 + 63.78 ± 0.75 / 62.34 ± 0.64 + 49.69 ± 1.56 / 66.32 ± 1.07 + 0.74 ± 0.83 / 48.97 ± 0.85 + 0.02 ± 0.02 / 0.20 ± 0.17 + 12.8.0 + 12.8.0 + 12.8.0 + 12.8.0 + ibm-granite/granite-3.0-1b-a400m-instruct (few-shot) 1335 @@ -2516,6 +2516,23 @@ title: German NLU 🇩🇪 14.0.4 14.0.4 + + meta-llama/Llama-3.2-1B (few-shot) + 1236 + 128 + 131104 + True + 7,577 ± 1,884 / 1,555 ± 492 + 3.35 + 24.79 ± 6.48 / 22.92 ± 5.74 + 47.65 ± 2.85 / 63.11 ± 2.17 + 2.39 ± 1.46 / 39.92 ± 4.38 + 13.39 ± 4.13 / 33.76 ± 5.50 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + MaLA-LM/emma-500-llama2-7b (few-shot) 6738 @@ -2523,7 +2540,7 @@ title: German NLU 🇩🇪 4096 True 6,275 ± 1,193 / 1,755 ± 578 - 3.35 + 3.36 32.33 ± 2.48 / 30.20 ± 1.92 26.39 ± 5.23 / 36.06 ± 6.62 1.44 ± 1.38 / 33.60 ± 0.42 @@ -2533,23 +2550,6 @@ title: German NLU 🇩🇪 13.0.0 13.0.0 - - Rijgersberg/GEITje-7B (few-shot) - 7242 - 32 - 32797 - True - 5,887 ± 1,087 / 1,600 ± 522 - 3.35 - 0.00 ± 0.00 / 0.00 ± 0.00 - 47.67 ± 2.82 / 60.09 ± 3.28 - 9.67 ± 2.79 / 46.35 ± 4.48 - 26.23 ± 3.79 / 53.16 ± 4.52 - 12.10.0 - 12.10.0 - 12.10.0 - 12.10.0 - LumiOpen/Viking-13B (few-shot) 14030 @@ -2557,7 +2557,7 @@ title: German NLU 🇩🇪 4128 True 840 ± 79 / 400 ± 124 - 3.36 + 3.37 34.53 ± 1.24 / 29.89 ± 1.96 42.90 ± 2.66 / 56.64 ± 4.71 1.51 ± 1.64 / 43.36 ± 4.05 @@ -2568,21 +2568,21 @@ title: German NLU 🇩🇪 12.5.2 - meta-llama/Llama-3.2-1B (few-shot) - 1236 - 128 - 131104 + Rijgersberg/GEITje-7B (few-shot) + 7242 + 32 + 32797 True - 7,577 ± 1,884 / 1,555 ± 492 - 3.36 - 24.79 ± 6.48 / 22.92 ± 5.74 - 47.65 ± 2.85 / 63.11 ± 2.17 - 2.39 ± 1.46 / 39.92 ± 4.38 - 13.39 ± 4.13 / 33.76 ± 5.50 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 + 5,887 ± 1,087 / 1,600 ± 522 + 3.37 + 0.00 ± 0.00 / 0.00 ± 0.00 + 47.67 ± 2.82 / 60.09 ± 3.28 + 9.67 ± 2.79 / 46.35 ± 4.48 + 26.23 ± 3.79 / 53.16 ± 4.52 + 12.10.0 + 12.10.0 + 12.10.0 + 12.10.0 VAGOsolutions/SauerkrautLM-Gemma-2b (few-shot) @@ -2591,7 +2591,7 @@ title: German NLU 🇩🇪 8192 False 3,607 ± 565 / 1,212 ± 349 - 3.39 + 3.40 12.21 ± 2.76 / 11.93 ± 2.08 44.84 ± 2.70 / 57.27 ± 3.65 2.02 ± 2.19 / 37.47 ± 3.26 @@ -2608,7 +2608,7 @@ title: German NLU 🇩🇪 4096 True 3,035 ± 503 / 911 ± 300 - 3.42 + 3.43 19.29 ± 5.77 / 20.20 ± 3.59 43.88 ± 3.42 / 61.04 ± 2.69 5.63 ± 1.32 / 49.95 ± 1.54 @@ -2625,7 +2625,7 @@ title: German NLU 🇩🇪 8192 False 6,471 ± 1,142 / 1,961 ± 584 - 3.44 + 3.43 36.62 ± 1.56 / 28.22 ± 1.66 28.54 ± 2.70 / 50.10 ± 1.65 1.15 ± 1.66 / 38.16 ± 2.78 @@ -2635,6 +2635,23 @@ title: German NLU 🇩🇪 12.1.0 12.4.0 + + AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot) + 20918 + 64 + 2077 + True + 1,831 ± 587 / 268 ± 90 + 3.47 + 24.35 ± 1.72 / 21.90 ± 0.85 + 43.35 ± 3.81 / 60.49 ± 3.18 + 2.38 ± 1.21 / 37.27 ± 1.09 + 15.56 ± 2.24 / 34.68 ± 3.15 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot) 1711 @@ -2652,6 +2669,23 @@ title: German NLU 🇩🇪 13.1.0 13.1.0 + + google/gemma-2b (few-shot) + 2506 + 256 + 8192 + True + 6,087 ± 1,046 / 1,902 ± 563 + 3.47 + 16.95 ± 2.96 / 15.80 ± 2.16 + 44.96 ± 3.30 / 61.27 ± 2.88 + 0.77 ± 1.22 / 33.68 ± 0.59 + 17.92 ± 4.72 / 40.68 ± 6.34 + 12.5.2 + 12.1.0 + 12.1.0 + 12.1.0 + sentence-transformers/distiluse-base-multilingual-cased-v2 135 @@ -2659,7 +2693,7 @@ title: German NLU 🇩🇪 512 True 33,247 ± 8,123 / 6,017 ± 1,977 - 3.47 + 3.48 41.82 ± 3.25 / 42.03 ± 3.15 49.38 ± 1.72 / 66.10 ± 1.17 4.77 ± 2.08 / 49.48 ± 2.07 @@ -2676,7 +2710,7 @@ title: German NLU 🇩🇪 512 True 19,206 ± 4,451 / 3,658 ± 1,187 - 3.47 + 3.48 40.20 ± 3.20 / 40.33 ± 3.13 48.71 ± 1.60 / 65.32 ± 1.57 5.53 ± 1.92 / 51.10 ± 1.38 @@ -2687,38 +2721,21 @@ title: German NLU 🇩🇪 12.6.1 - AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot) + AI-Sweden-Models/gpt-sw3-20b (few-shot) 20918 64 2077 True - 1,831 ± 587 / 268 ± 90 - 3.48 - 24.35 ± 1.72 / 21.90 ± 0.85 - 43.35 ± 3.81 / 60.49 ± 3.18 - 2.38 ± 1.21 / 37.27 ± 1.09 - 15.56 ± 2.24 / 34.68 ± 3.15 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - - - google/gemma-2b (few-shot) - 2506 - 256 - 8192 - True - 6,087 ± 1,046 / 1,902 ± 563 - 3.48 - 16.95 ± 2.96 / 15.80 ± 2.16 - 44.96 ± 3.30 / 61.27 ± 2.88 - 0.77 ± 1.22 / 33.68 ± 0.59 - 17.92 ± 4.72 / 40.68 ± 6.34 - 12.5.2 - 12.1.0 - 12.1.0 - 12.1.0 + 1,875 ± 673 / 261 ± 91 + 3.51 + 36.17 ± 2.52 / 27.29 ± 1.74 + 34.17 ± 7.08 / 46.97 ± 8.28 + 2.21 ± 1.64 / 38.29 ± 3.56 + 13.60 ± 3.04 / 30.89 ± 4.33 + 12.9.0 + 12.9.0 + 12.9.0 + 12.9.0 RuterNorway/Llama-2-7b-chat-norwegian (few-shot) @@ -2727,7 +2744,7 @@ title: German NLU 🇩🇪 4096 False 10,890 ± 2,686 / 2,186 ± 750 - 3.50 + 3.51 27.22 ± 1.38 / 24.48 ± 1.76 33.61 ± 5.06 / 49.68 ± 5.74 0.45 ± 0.91 / 35.24 ± 3.71 @@ -2737,23 +2754,6 @@ title: German NLU 🇩🇪 9.3.1 12.5.2 - - AI-Sweden-Models/gpt-sw3-20b (few-shot) - 20918 - 64 - 2077 - True - 1,875 ± 673 / 261 ± 91 - 3.52 - 36.17 ± 2.52 / 27.29 ± 1.74 - 34.17 ± 7.08 / 46.97 ± 8.28 - 2.21 ± 1.64 / 38.29 ± 3.56 - 13.60 ± 3.04 / 30.89 ± 4.33 - 12.9.0 - 12.9.0 - 12.9.0 - 12.9.0 - Qwen/Qwen1.5-1.8B-Chat (few-shot) 1837 @@ -2761,7 +2761,7 @@ title: German NLU 🇩🇪 32768 False 8,304 ± 1,846 / 1,933 ± 617 - 3.53 + 3.52 28.04 ± 2.71 / 24.08 ± 1.58 36.21 ± 3.42 / 54.82 ± 3.32 3.12 ± 1.42 / 46.21 ± 2.93 @@ -2795,7 +2795,7 @@ title: German NLU 🇩🇪 4096 True 4,438 ± 1,128 / 1,028 ± 346 - 3.58 + 3.61 25.69 ± 1.43 / 25.95 ± 1.23 33.48 ± 2.83 / 47.14 ± 4.43 3.73 ± 1.14 / 44.43 ± 4.17 @@ -2805,6 +2805,23 @@ title: German NLU 🇩🇪 12.10.4 12.10.4 + + tiiuae/Falcon3-1B-Instruct (few-shot) + 1669 + 131 + 8192 + True + 9,270 ± 2,690 / 1,434 ± 437 + 3.62 + 32.54 ± 1.23 / 30.63 ± 1.12 + 22.27 ± 4.97 / 36.09 ± 3.66 + 7.18 ± 1.13 / 51.77 ± 1.56 + 16.72 ± 0.88 / 38.07 ± 0.76 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + DiscoResearch/DiscoLM-70b (few-shot, val) 68977 @@ -2812,7 +2829,7 @@ title: German NLU 🇩🇪 8221 False 291 ± 23 / 163 ± 39 - 3.64 + 3.63 62.16 ± 2.83 / 48.46 ± 3.31 27.56 ± 3.62 / 47.04 ± 4.06 0.59 ± 1.16 / 32.41 ± 0.74 @@ -2839,23 +2856,6 @@ title: German NLU 🇩🇪 12.8.0 12.8.0 - - seedboxai/KafkaLM-70B-German-V0.1 (few-shot, val) - 68977 - 32 - 4125 - True - 294 ± 21 / 168 ± 42 - 3.67 - 59.23 ± 2.95 / 52.06 ± 3.03 - -5.01 ± 4.03 / 18.00 ± 0.71 - 3.19 ± 4.99 / 37.63 ± 3.16 - 19.84 ± 2.17 / 56.60 ± 3.00 - 12.10.0 - 12.10.0 - 12.10.0 - 12.10.0 - ibm-granite/granite-3.0-1b-a400m-base (few-shot) 1385 @@ -2863,7 +2863,7 @@ title: German NLU 🇩🇪 4096 True 7,808 ± 2,183 / 1,289 ± 428 - 3.69 + 3.68 19.94 ± 4.65 / 19.11 ± 3.71 34.66 ± 3.73 / 44.27 ± 4.29 0.70 ± 1.06 / 33.46 ± 0.38 @@ -2880,7 +2880,7 @@ title: German NLU 🇩🇪 4096 True 1,438 ± 410 / 233 ± 79 - 3.69 + 3.68 38.81 ± 2.72 / 27.14 ± 1.94 10.59 ± 3.02 / 21.22 ± 2.19 0.91 ± 1.67 / 33.45 ± 0.28 @@ -2890,23 +2890,6 @@ title: German NLU 🇩🇪 14.0.4 14.0.4 - - HuggingFaceTB/SmolLM2-1.7B (few-shot) - 1711 - 49 - 8192 - True - 16,249 ± 3,690 / 3,689 ± 1,226 - 3.71 - 28.67 ± 3.31 / 25.27 ± 2.68 - 19.69 ± 2.50 / 29.00 ± 2.22 - 5.07 ± 0.89 / 47.60 ± 2.39 - 18.43 ± 2.31 / 38.33 ± 2.91 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - PleIAs/Pleias-1.2b-Preview (few-shot) 1195 @@ -2914,7 +2897,7 @@ title: German NLU 🇩🇪 2048 True 10,756 ± 3,589 / 1,157 ± 670 - 3.71 + 3.70 34.68 ± 3.56 / 30.00 ± 2.82 21.76 ± 5.01 / 39.73 ± 5.21 0.85 ± 1.86 / 38.42 ± 3.98 @@ -2924,6 +2907,40 @@ title: German NLU 🇩🇪 14.1.2 14.0.4 + + seedboxai/KafkaLM-70B-German-V0.1 (few-shot, val) + 68977 + 32 + 4125 + True + 294 ± 21 / 168 ± 42 + 3.70 + 59.23 ± 2.95 / 52.06 ± 3.03 + -5.01 ± 4.03 / 18.00 ± 0.71 + 3.19 ± 4.99 / 37.63 ± 3.16 + 19.84 ± 2.17 / 56.60 ± 3.00 + 12.10.0 + 12.10.0 + 12.10.0 + 12.10.0 + + + HuggingFaceTB/SmolLM2-1.7B (few-shot) + 1711 + 49 + 8192 + True + 16,249 ± 3,690 / 3,689 ± 1,226 + 3.72 + 28.67 ± 3.31 / 25.27 ± 2.68 + 19.69 ± 2.50 / 29.00 ± 2.22 + 5.07 ± 0.89 / 47.60 ± 2.39 + 18.43 ± 2.31 / 38.33 ± 2.91 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + Qwen/Qwen1.5-1.8B (few-shot) 1837 @@ -2931,7 +2948,7 @@ title: German NLU 🇩🇪 32768 True 5,666 ± 1,328 / 1,256 ± 408 - 3.71 + 3.72 9.23 ± 4.86 / 10.43 ± 3.83 38.30 ± 2.90 / 56.94 ± 2.83 0.39 ± 1.17 / 33.47 ± 0.34 @@ -2948,7 +2965,7 @@ title: German NLU 🇩🇪 512 True 47,122 ± 9,661 / 9,714 ± 3,152 - 3.80 + 3.82 38.71 ± 2.23 / 36.92 ± 1.95 34.21 ± 2.42 / 55.54 ± 1.69 1.59 ± 1.31 / 49.57 ± 1.09 @@ -2958,23 +2975,6 @@ title: German NLU 🇩🇪 12.10.0 12.10.0 - - dbmdz/bert-tiny-historic-multilingual-cased - 5 - 32 - 512 - True - 78,027 ± 15,466 / 17,064 ± 5,335 - 3.87 - 33.18 ± 2.13 / 32.48 ± 2.13 - 33.61 ± 2.23 / 55.01 ± 2.11 - 1.83 ± 1.54 / 49.40 ± 1.24 - 0.00 ± 0.00 / 0.00 ± 0.00 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 3ebdola/Dialectal-Arabic-XLM-R-Base 277 @@ -2982,7 +2982,7 @@ title: German NLU 🇩🇪 512 True 12,783 ± 2,537 / 2,712 ± 885 - 3.88 + 3.89 30.18 ± 6.85 / 29.11 ± 6.64 32.66 ± 3.87 / 53.17 ± 4.09 2.10 ± 1.30 / 47.10 ± 2.29 @@ -2993,21 +2993,21 @@ title: German NLU 🇩🇪 12.8.0 - Qwen/Qwen1.5-0.5B (few-shot) - 620 - 152 - 32768 + dbmdz/bert-tiny-historic-multilingual-cased + 5 + 32 + 512 True - 11,371 ± 2,924 / 2,122 ± 692 - 3.99 - 27.34 ± 1.95 / 24.46 ± 1.25 - 10.64 ± 5.31 / 26.79 ± 4.73 - 0.33 ± 1.20 / 35.20 ± 2.45 - 11.81 ± 2.10 / 27.38 ± 2.49 - 12.5.2 - 10.0.1 - 12.1.0 - 12.1.0 + 78,027 ± 15,466 / 17,064 ± 5,335 + 3.89 + 33.18 ± 2.13 / 32.48 ± 2.13 + 33.61 ± 2.23 / 55.01 ± 2.11 + 1.83 ± 1.54 / 49.40 ± 1.24 + 0.00 ± 0.00 / 0.00 ± 0.00 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 PleIAs/Pleias-3b-Preview (few-shot) @@ -3016,7 +3016,7 @@ title: German NLU 🇩🇪 4096 True 6,513 ± 1,241 / 1,282 ± 644 - 4.00 + 3.99 23.08 ± 4.90 / 21.68 ± 4.10 7.41 ± 5.97 / 23.37 ± 5.40 0.89 ± 1.81 / 40.64 ± 3.86 @@ -3026,6 +3026,23 @@ title: German NLU 🇩🇪 14.1.2 14.0.4 + + Qwen/Qwen1.5-0.5B (few-shot) + 620 + 152 + 32768 + True + 11,371 ± 2,924 / 2,122 ± 692 + 4.00 + 27.34 ± 1.95 / 24.46 ± 1.25 + 10.64 ± 5.31 / 26.79 ± 4.73 + 0.33 ± 1.20 / 35.20 ± 2.45 + 11.81 ± 2.10 / 27.38 ± 2.49 + 12.5.2 + 10.0.1 + 12.1.0 + 12.1.0 + allenai/OLMo-7B-Twin-2T (few-shot) 6888 @@ -3050,7 +3067,7 @@ title: German NLU 🇩🇪 32768 False 11,740 ± 3,000 / 2,209 ± 721 - 4.07 + 4.05 24.67 ± 0.99 / 23.98 ± 0.73 9.31 ± 2.97 / 21.50 ± 2.70 1.11 ± 1.69 / 37.88 ± 4.05 @@ -3067,7 +3084,7 @@ title: German NLU 🇩🇪 8192 True 22,023 ± 6,203 / 3,675 ± 1,231 - 4.10 + 4.11 19.94 ± 0.96 / 18.01 ± 0.59 19.64 ± 5.59 / 36.97 ± 5.41 0.00 ± 0.00 / 33.32 ± 0.30 @@ -3084,7 +3101,7 @@ title: German NLU 🇩🇪 2048 True 10,242 ± 3,432 / 1,335 ± 484 - 4.11 + 4.12 25.30 ± 2.32 / 24.21 ± 2.13 13.90 ± 5.43 / 30.42 ± 6.17 -0.25 ± 1.90 / 39.25 ± 4.50 @@ -3094,23 +3111,6 @@ title: German NLU 🇩🇪 14.1.2 14.0.4 - - PleIAs/Pleias-Nano (few-shot) - 1195 - 66 - 2048 - True - 2,519 ± 841 / 323 ± 104 - 4.13 - 24.32 ± 2.91 / 22.08 ± 1.97 - 15.58 ± 3.73 / 29.45 ± 4.32 - 1.25 ± 1.70 / 37.57 ± 3.82 - 6.82 ± 2.39 / 20.51 ± 3.06 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 - state-spaces/mamba-2.8b-hf (few-shot) 2768 @@ -3118,7 +3118,7 @@ title: German NLU 🇩🇪 32800 True 2,722 ± 495 / 766 ± 250 - 4.13 + 4.14 21.96 ± 1.53 / 18.48 ± 1.53 18.66 ± 3.01 / 35.11 ± 2.93 0.16 ± 1.78 / 37.84 ± 2.92 @@ -3128,6 +3128,23 @@ title: German NLU 🇩🇪 13.0.0 13.0.0 + + PleIAs/Pleias-Nano (few-shot) + 1195 + 66 + 2048 + True + 2,519 ± 841 / 323 ± 104 + 4.15 + 24.32 ± 2.91 / 22.08 ± 1.97 + 15.58 ± 3.73 / 29.45 ± 4.32 + 1.25 ± 1.70 / 37.57 ± 3.82 + 6.82 ± 2.39 / 20.51 ± 3.06 + 14.0.4 + 14.1.2 + 14.1.2 + 14.0.4 + HuggingFaceTB/SmolLM2-360M-Instruct (few-shot) 362 @@ -3135,7 +3152,7 @@ title: German NLU 🇩🇪 8192 True 21,777 ± 6,115 / 3,617 ± 1,211 - 4.14 + 4.16 18.77 ± 3.96 / 18.65 ± 3.26 12.59 ± 3.85 / 22.64 ± 2.25 1.64 ± 1.30 / 34.84 ± 2.12 @@ -3152,7 +3169,7 @@ title: German NLU 🇩🇪 2080 True 8,536 ± 1,926 / 1,940 ± 619 - 4.20 + 4.22 21.46 ± 2.04 / 20.83 ± 1.63 21.03 ± 6.33 / 38.33 ± 7.79 0.13 ± 1.48 / 43.17 ± 4.90 @@ -3169,7 +3186,7 @@ title: German NLU 🇩🇪 2048 True 2,331 ± 787 / 301 ± 97 - 4.26 + 4.27 21.03 ± 2.96 / 21.02 ± 2.73 10.99 ± 6.72 / 27.88 ± 6.96 0.13 ± 1.96 / 36.48 ± 3.10 @@ -3186,7 +3203,7 @@ title: German NLU 🇩🇪 512 True 2,214 ± 94 / 1,494 ± 229 - 4.39 + 4.40 8.03 ± 1.35 / 8.63 ± 1.36 23.44 ± 7.21 / 42.87 ± 7.78 -0.17 ± 1.13 / 39.21 ± 4.70 @@ -3203,7 +3220,7 @@ title: German NLU 🇩🇪 512 True 7,840 ± 1,538 / 3,024 ± 438 - 4.50 + 4.51 9.53 ± 0.71 / 9.93 ± 0.76 13.29 ± 8.29 / 29.94 ± 8.32 -0.15 ± 0.77 / 33.37 ± 0.37 @@ -3220,7 +3237,7 @@ title: German NLU 🇩🇪 8192 True 25,602 ± 7,583 / 3,953 ± 1,325 - 4.55 + 4.56 15.54 ± 1.74 / 15.29 ± 1.88 2.51 ± 1.90 / 21.27 ± 3.22 0.36 ± 1.07 / 39.04 ± 3.21 @@ -3237,7 +3254,7 @@ title: German NLU 🇩🇪 8192 True 26,346 ± 7,812 / 4,082 ± 1,372 - 4.58 + 4.59 16.89 ± 1.62 / 16.63 ± 1.80 2.74 ± 3.46 / 23.30 ± 3.11 -0.34 ± 1.06 / 39.21 ± 4.13 @@ -3254,7 +3271,7 @@ title: German NLU 🇩🇪 4096 True 3,024 ± 496 / 909 ± 301 - 4.72 + 4.73 5.80 ± 1.56 / 5.41 ± 1.56 4.45 ± 1.73 / 29.26 ± 3.66 -0.48 ± 1.33 / 43.09 ± 3.56 @@ -3271,7 +3288,7 @@ title: German NLU 🇩🇪 541 True 5,847 ± 1,029 / 1,640 ± 525 - 4.86 + 4.88 0.00 ± 0.00 / 0.00 ± 0.00 0.00 ± 0.00 / 17.05 ± 0.35 0.00 ± 0.00 / 33.34 ± 0.31 @@ -3288,7 +3305,7 @@ title: German NLU 🇩🇪 2077 True 11,734 ± 3,124 / 2,174 ± 720 - 4.86 + 4.88 0.00 ± 0.00 / 0.00 ± 0.00 0.19 ± 1.24 / 17.20 ± 1.22 -0.12 ± 0.91 / 36.65 ± 3.92 @@ -3305,7 +3322,7 @@ title: German NLU 🇩🇪 8192 True 7,692 ± 1,423 / 1,960 ± 644 - 4.86 + 4.88 0.00 ± 0.00 / 0.00 ± 0.00 0.00 ± 0.00 / 17.05 ± 0.35 0.00 ± 0.00 / 33.34 ± 0.31 diff --git a/germanic-nlg.csv b/germanic-nlg.csv index 8db14197..343f94b0 100644 --- a/germanic-nlg.csv +++ b/germanic-nlg.csv @@ -1,69 +1,70 @@ model_id,num_model_parameters,vocabulary_size,max_sequence_length,commercially_licensed,merge,speed,rank,da_rank,no_rank,sv_rank,is_rank,fo_rank,de_rank,nl_rank,en_rank,dansk,angry_tweets,scala_da,scandiqa_da,nordjylland_news,danske_talemaader,danish_citizen_tests,hellaswag_da,norne_nb,norne_nn,norec,no_sammendrag,scala_nb,scala_nn,norquad,mmlu_no,hellaswag_no,suc3,swerec,scala_sv,scandiqa_sv,swedn,mmlu_sv,hellaswag_sv,mim_gold_ner,hotter_and_colder_sentiment,scala_is,nqii,rrn,arc_is,winogrande_is,fone,fosent,scala_fo,foqa,germeval,sb10k,scala_de,germanquad,mlsum,mmlu_de,hellaswag_de,conll_nl,dutch_social,scala_nl,squad_nl,wiki_lingua_nl,mmlu_nl,hellaswag_nl,conll_en,sst5,scala_en,squad,cnn_dailymail,mmlu,hellaswag -"gpt-4-1106-preview (few-shot, val)",-1,100,128000,True,False,576,1.25,1.14,1.29,1.08,1.12,1.3,1.3,1.53,1.21,66.8,61.62,66.84,56.85,66.21,95.21,97.19,78.74,77.48,78.7,62.55,63.6,74.45,56.31,44.67,70.84,86.3,74.45,77.59,71.35,56.56,66.08,71.32,84.09,86.37,49.59,43.03,37.26,69.61,89.09,72.03,86.51,38.22,35.09,58.65,68.94,60.47,51.26,30.04,63.62,73.8,83.93,66.44,14.22,72.3,57.81,67.13,70.04,88.29,81.79,67.55,51.21,66.6,68.8,81.71,89.91 -meta-llama/Llama-3.1-405B-Instruct-FP8 (few-shot),405869,128,131072,True,False,799,1.39,1.13,1.48,1.09,1.69,1.65,1.27,1.64,1.19,71.94,61.26,64.94,56.0,67.44,89.91,96.37,81.66,82.44,82.17,40.55,66.76,63.91,45.93,45.33,73.55,81.37,76.27,80.7,68.85,56.41,67.18,75.85,81.49,66.4,53.84,26.22,26.49,69.88,77.37,59.56,81.95,53.25,14.29,60.41,72.87,60.79,50.25,28.34,66.17,74.98,83.48,69.12,11.23,68.74,55.25,70.51,74.89,80.93,82.86,70.6,53.8,62.69,69.33,80.39,88.02 -"gpt-4o-2024-05-13 (few-shot, val)",-1,200,128000,True,False,916,1.44,1.2,1.26,1.09,1.2,2.46,1.37,1.65,1.31,71.15,49.42,64.59,57.35,66.03,96.41,97.68,85.96,79.07,81.56,66.66,63.25,64.53,54.7,43.51,73.81,89.91,76.66,77.16,68.99,57.96,66.0,70.7,86.3,81.19,49.86,51.1,29.64,68.25,91.27,70.85,81.86,27.3,-0.97,56.45,69.99,54.82,43.66,30.06,63.8,74.13,88.18,76.75,10.8,56.26,55.55,66.86,73.11,92.69,83.48,62.74,46.56,65.41,67.64,78.55,91.34 -meta-llama/Llama-3.1-70B-Instruct (few-shot),70554,128,131072,True,False,1409,1.49,1.29,1.59,1.23,1.81,1.69,1.34,1.72,1.27,68.57,60.52,57.57,54.33,68.01,87.22,92.93,70.23,80.05,80.67,40.65,66.56,56.42,38.21,49.22,65.19,69.47,72.16,81.69,63.97,57.99,66.85,69.59,70.4,69.95,51.27,18.75,30.79,69.53,75.02,48.44,79.04,52.33,15.72,59.08,72.11,59.87,46.82,30.64,66.2,68.8,73.49,68.82,11.41,61.66,55.43,69.51,68.61,69.72,83.16,69.96,50.83,60.82,69.21,77.51,83.85 -meta-llama/Llama-3.3-70B-Instruct (few-shot),70554,128,131072,True,False,1353,1.58,1.34,1.66,1.25,1.92,1.81,1.5,1.83,1.35,67.07,59.89,56.56,49.02,67.0,87.31,94.13,76.43,79.84,79.93,41.11,65.38,57.84,43.52,40.92,66.69,75.66,71.98,81.15,64.46,51.22,66.63,70.79,75.48,69.04,50.79,20.36,24.56,69.13,77.24,46.46,78.46,51.6,17.25,50.98,70.47,60.55,50.13,20.42,65.2,69.32,78.33,70.37,10.87,62.87,44.3,69.36,68.82,74.72,82.35,71.07,51.27,50.23,68.77,76.73,86.49 -Qwen/Qwen2.5-72B-Instruct (few-shot),72706,152,32768,True,False,1219,1.59,1.41,1.72,1.34,2.07,1.66,1.37,1.84,1.32,63.81,54.5,57.19,55.77,64.62,82.81,87.32,77.57,72.21,70.24,39.85,62.86,63.14,43.24,43.41,70.2,76.79,62.12,79.89,61.71,54.99,65.38,70.61,76.53,68.54,47.72,22.98,26.72,66.93,67.74,36.29,76.56,46.61,23.45,55.28,68.94,58.78,52.66,27.62,63.32,70.49,81.41,67.16,9.84,66.06,50.91,64.83,73.09,79.0,75.84,68.66,56.46,58.39,68.32,78.29,86.39 -"gpt-4o-2024-05-13 (zero-shot, val)",-1,200,8191,True,False,637,1.61,1.4,1.69,1.24,1.82,1.88,1.6,1.83,1.39,64.8,53.07,64.18,49.02,64.78,90.74,95.22,78.1,77.72,71.7,36.27,62.76,71.7,58.79,40.95,69.22,76.4,75.06,74.85,65.23,53.02,65.8,71.87,69.98,72.85,53.43,49.26,27.36,67.01,78.98,9.83,68.0,27.3,28.09,58.59,67.18,50.12,44.98,27.01,63.1,74.22,69.31,69.12,12.36,58.88,45.88,64.37,70.81,84.34,81.23,63.46,46.45,57.64,68.71,80.09,86.91 -Qwen/QwQ-32B-Preview (few-shot),32764,152,32768,True,False,2258,1.65,1.52,1.85,1.39,2.24,1.97,1.24,1.82,1.2,64.66,53.42,53.93,55.55,64.54,72.75,78.49,74.11,75.68,75.89,38.41,62.79,56.42,39.34,44.35,61.53,67.36,69.73,78.76,57.57,56.43,65.53,64.02,67.11,65.08,48.71,20.51,28.29,65.67,52.92,28.01,77.19,45.55,16.92,47.51,70.66,59.33,54.53,32.66,65.0,67.84,79.0,71.32,9.12,63.96,58.36,66.59,67.82,78.26,76.84,68.94,57.74,71.22,68.96,75.15,87.36 -ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot),28411,256,4096,True,False,3633,1.71,1.41,1.77,1.5,2.18,1.82,1.56,2.04,1.44,66.5,58.93,57.27,55.02,66.59,81.37,83.16,67.83,76.25,77.91,40.54,64.53,59.75,47.82,40.99,56.11,67.72,62.91,79.51,60.28,55.44,65.24,56.4,65.89,67.23,50.38,20.01,21.18,67.61,66.74,33.03,81.3,60.99,12.49,48.47,73.31,58.02,45.12,24.67,64.4,58.7,68.9,68.17,10.56,56.89,53.05,64.01,58.74,65.18,81.06,68.92,49.06,61.27,68.0,67.84,78.26 -"gpt-4-1106-preview (zero-shot, val)",-1,100,8191,True,False,436,1.81,1.69,2.0,1.51,1.9,2.23,1.53,1.96,1.67,35.79,53.69,62.98,51.96,64.56,87.34,98.11,67.56,60.16,48.74,39.62,62.75,71.38,42.94,36.04,68.89,59.02,51.31,73.54,66.39,52.22,65.32,69.31,63.92,30.39,47.47,43.3,29.82,63.89,86.79,54.76,57.52,13.18,28.03,59.06,57.47,58.67,54.55,27.02,62.91,69.23,71.17,55.72,11.13,67.28,54.2,64.09,67.45,72.71,42.4,65.24,44.59,62.94,68.33,78.8,82.6 -"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,1.81,1.71,1.8,1.87,1.83,2.2,1.73,1.85,1.53,59.96,56.91,67.13,17.52,65.32,91.7,88.38,71.08,72.74,69.17,67.45,63.0,74.27,54.83,3.67,59.93,76.01,62.45,77.69,68.93,12.11,66.04,55.8,63.61,64.69,56.18,45.52,15.8,67.06,82.1,42.82,73.8,39.45,34.78,0.87,65.51,55.16,44.6,21.87,63.5,60.83,67.98,68.71,20.33,49.52,34.06,66.13,62.12,65.74,77.38,66.75,52.43,41.03,68.73,66.63,79.95 -"gpt-4o-mini-2024-07-18 (zero-shot, val)",-1,200,8191,True,False,908,1.89,1.63,2.09,1.77,2.22,2.09,1.73,2.02,1.58,57.52,49.73,57.56,51.79,64.66,79.84,83.21,63.33,60.43,55.59,39.82,62.77,54.84,33.8,36.55,56.96,68.7,52.47,73.55,52.27,48.95,65.59,54.3,54.13,31.11,50.02,35.85,26.93,66.48,72.8,28.63,53.92,35.05,23.12,54.99,59.19,52.66,46.66,26.02,63.3,58.21,65.19,64.15,12.67,62.44,45.65,64.26,56.45,62.09,75.8,61.65,47.74,56.98,68.6,63.18,77.31 -CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,2.0,1.89,2.32,1.87,2.68,2.0,1.75,1.97,1.54,63.63,50.82,35.58,54.33,65.71,58.67,72.01,33.61,73.2,72.26,35.8,63.37,36.86,23.4,40.32,38.62,40.5,65.01,77.68,34.06,56.78,66.08,39.39,37.81,62.68,31.96,11.81,30.49,66.24,21.1,9.34,73.26,47.71,10.41,57.08,68.62,56.2,43.36,28.5,64.52,50.16,51.13,68.58,14.41,55.01,58.63,65.69,50.48,49.08,78.35,67.62,46.5,63.2,70.22,60.4,60.49 -nvidia/Llama-3.1-Nemotron-70B-Instruct-HF (few-shot),70554,128,131072,True,False,1208,2.07,2.02,2.22,1.69,2.67,2.0,2.01,2.25,1.72,58.34,59.14,56.46,39.77,47.41,85.69,91.37,74.3,69.21,70.45,39.87,47.52,57.8,40.31,40.97,63.81,73.01,58.65,81.81,63.69,42.29,50.67,68.28,72.36,54.15,51.96,19.63,21.22,43.26,73.61,45.29,64.88,52.56,14.28,50.11,61.98,57.07,45.61,24.89,43.28,66.68,75.48,55.08,10.8,61.31,49.8,52.72,68.15,72.5,73.66,68.56,51.33,56.87,54.04,75.5,84.61 -"claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,2.11,1.64,2.2,1.87,2.35,2.03,2.26,2.53,2.03,65.88,63.61,71.03,46.24,64.6,84.02,85.34,23.71,74.23,70.5,50.92,62.57,76.1,72.03,40.57,24.04,-0.8,70.22,77.7,74.34,49.32,65.36,28.34,13.68,61.7,51.24,52.43,22.92,66.02,15.97,5.31,72.52,8.17,32.38,45.34,61.83,61.59,46.4,23.77,62.69,17.15,9.7,62.41,12.64,74.06,35.77,64.25,27.77,14.21,82.11,67.01,51.09,52.41,67.73,26.14,13.56 -mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot),341029,256,4096,True,False,1904,2.18,2.04,2.13,1.92,2.1,2.56,2.08,2.65,1.94,48.24,39.52,62.92,36.92,56.79,67.68,84.85,68.82,56.41,55.6,25.18,59.28,62.56,53.09,42.57,45.67,73.86,48.92,62.08,68.93,36.4,61.39,48.18,56.87,37.98,47.34,50.14,16.27,63.4,79.94,44.61,55.31,51.62,15.07,18.22,50.17,43.41,40.59,19.75,60.34,52.07,74.99,47.6,10.62,61.64,24.02,54.51,50.32,64.62,51.92,67.01,45.98,30.12,65.82,60.71,77.14 -meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1005,2.2,1.98,2.36,2.04,2.79,2.42,1.85,2.46,1.73,54.7,54.81,32.11,48.87,66.79,56.14,63.54,39.48,64.55,66.44,35.17,64.48,27.41,15.6,43.11,38.1,39.3,55.8,79.23,32.67,46.88,66.43,36.35,37.89,46.48,39.91,11.72,25.91,67.67,25.83,6.4,67.67,48.54,3.89,47.07,67.61,58.07,28.25,28.79,66.87,40.0,45.93,69.76,9.09,37.58,41.26,68.84,40.98,39.98,76.95,68.12,34.34,47.88,69.57,56.62,69.03 -meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1007,2.21,2.14,2.4,2.08,2.73,2.27,1.94,2.33,1.82,57.74,48.43,27.12,46.76,66.36,57.87,50.42,29.17,74.47,72.93,34.44,63.98,27.77,20.35,42.9,33.44,30.91,69.67,59.93,27.63,49.84,66.6,33.54,30.32,60.2,38.09,9.14,28.66,67.5,17.93,5.59,70.61,45.78,4.58,50.67,68.18,58.33,29.12,28.68,65.23,38.44,37.69,68.72,14.67,32.91,45.36,67.62,36.18,33.91,75.02,67.64,32.29,54.84,69.28,53.77,57.64 -mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,2.22,1.99,2.46,2.05,2.78,2.42,1.95,2.28,1.82,52.62,50.07,37.37,54.87,66.46,69.0,79.86,24.68,63.92,62.15,46.68,63.04,33.38,19.99,31.87,38.91,20.29,60.92,79.78,34.88,50.35,62.82,43.74,19.86,55.09,39.6,8.23,30.78,63.01,18.12,8.15,67.97,45.19,5.21,43.26,59.1,55.3,37.84,31.71,64.23,47.89,28.53,64.81,12.99,39.38,49.08,66.41,45.6,24.71,65.31,68.87,43.07,63.97,68.68,56.64,33.42 -mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,2.28,2.18,2.48,2.14,3.0,2.72,1.96,2.12,1.63,55.49,49.18,7.4,57.72,65.1,45.83,59.45,35.46,67.24,66.08,31.41,62.89,28.72,20.55,40.6,30.39,38.46,54.76,73.32,16.17,57.94,65.95,30.08,39.51,47.1,24.43,7.19,28.73,66.62,21.98,0.1,65.55,29.49,2.05,47.72,60.5,50.39,30.86,30.53,64.03,39.61,60.04,66.51,11.91,34.46,59.23,68.87,40.81,60.68,72.4,63.46,35.86,68.42,69.38,53.42,78.36 -mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot),46703,32,32768,True,False,5535,2.29,2.06,2.52,2.05,3.14,2.59,1.92,2.33,1.72,51.2,50.95,33.44,46.85,64.66,67.12,76.78,39.93,55.02,57.37,36.76,61.59,30.73,18.96,41.01,39.07,40.48,46.15,80.33,32.89,46.51,66.04,42.98,35.33,35.0,23.93,9.69,21.38,64.69,17.73,9.06,63.43,43.99,9.38,25.35,58.07,52.18,41.45,26.87,64.1,47.93,51.72,58.8,12.5,45.22,47.03,65.95,43.41,43.33,70.02,69.48,44.59,55.7,68.79,55.61,57.52 -CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,2.31,2.17,2.4,2.08,3.32,2.75,2.01,2.01,1.72,51.32,52.0,18.48,52.43,66.18,41.32,52.24,37.67,66.55,63.63,38.61,64.48,15.8,12.3,43.26,36.48,35.85,57.38,78.43,14.52,53.14,65.69,37.32,38.28,28.98,19.83,4.93,24.72,63.45,10.97,4.23,64.72,28.57,5.12,38.83,59.95,55.39,30.59,26.94,66.11,38.63,48.53,62.07,13.7,35.14,49.15,74.4,46.76,60.36,67.33,68.67,31.18,68.33,72.23,49.41,58.93 -nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,2.31,2.19,2.48,2.16,2.78,2.5,2.1,2.52,1.76,49.85,49.52,32.35,52.54,65.13,52.71,63.47,16.3,64.15,62.16,55.29,60.17,32.3,22.82,32.62,35.37,15.39,58.75,79.59,33.09,47.28,62.78,36.58,18.78,53.28,41.8,11.07,29.74,64.08,20.14,3.62,66.18,39.48,5.78,44.81,59.82,50.22,30.43,30.22,62.56,43.09,33.4,66.29,12.71,31.39,48.33,64.1,37.55,29.69,67.52,69.03,40.51,58.12,67.86,55.93,64.11 -meta-llama/Llama-3.1-8B (few-shot),8030,128,131072,True,False,2986,2.35,2.2,2.62,2.17,2.81,2.8,2.08,2.3,1.85,50.92,47.86,29.19,48.38,64.89,51.88,62.31,27.37,65.17,60.22,34.02,62.16,32.48,18.38,33.06,32.4,24.89,62.19,80.31,30.29,42.78,64.14,35.1,23.18,52.97,41.29,5.95,31.99,65.19,21.56,4.44,65.65,24.3,0.61,45.01,59.45,53.39,23.87,27.14,69.58,38.77,34.29,64.79,11.95,32.97,63.89,66.29,38.44,30.88,69.86,66.76,30.96,71.39,67.93,52.47,43.95 -NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,2.37,2.26,2.28,1.95,3.12,2.69,2.24,2.61,1.8,49.18,49.76,41.28,12.83,67.44,77.55,74.67,36.42,58.05,59.65,57.94,65.07,51.36,42.84,14.72,43.18,37.58,57.66,80.04,45.21,52.73,59.91,42.25,28.88,41.84,38.12,7.47,20.87,65.24,13.41,3.94,58.62,40.74,5.58,28.04,56.14,53.33,29.49,18.49,64.84,44.31,30.72,62.81,11.28,28.57,38.75,65.35,39.53,26.71,63.77,69.23,38.49,57.03,70.47,54.58,47.93 -meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1335,2.37,2.23,2.48,2.18,2.87,2.85,2.0,2.48,1.89,49.46,51.16,23.01,49.75,65.26,51.64,59.28,24.3,61.48,61.58,32.94,63.38,21.2,19.65,53.35,33.02,24.93,59.92,80.91,26.39,47.69,63.94,33.39,20.21,50.45,34.68,8.69,31.94,65.22,19.93,4.52,61.11,19.4,2.02,50.34,56.0,56.4,22.01,35.39,68.92,38.12,31.37,62.26,10.45,30.3,62.99,65.17,36.38,28.33,66.31,64.3,28.18,70.38,67.9,52.54,41.19 -Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,2.4,2.27,2.1,2.14,3.33,3.08,2.18,2.46,1.63,51.94,51.97,29.99,38.99,64.49,35.95,59.76,34.63,66.22,64.14,55.48,65.32,26.13,17.32,49.75,29.72,46.78,56.28,77.51,23.25,47.09,65.58,31.52,39.95,42.23,27.93,6.38,19.39,61.92,4.22,0.31,62.2,26.68,7.07,11.97,61.37,51.38,35.58,19.92,63.64,33.87,48.46,62.86,15.11,39.11,36.48,64.42,32.99,42.56,72.77,70.12,44.68,57.17,69.32,47.17,77.25 -senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.41,2.13,2.35,2.18,3.23,3.32,1.98,2.26,1.86,52.61,49.81,19.64,48.03,66.67,57.65,51.99,44.44,64.37,62.77,50.6,65.09,18.09,12.25,38.34,27.33,41.59,58.9,67.74,16.52,49.41,66.09,31.76,45.84,56.71,7.92,3.44,21.55,65.39,9.11,3.3,67.42,20.01,7.02,0.65,64.38,54.44,26.03,25.68,68.16,33.84,50.99,64.25,13.66,28.59,49.64,68.66,35.37,47.5,70.62,67.78,30.99,49.56,70.76,44.11,69.2 -utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.41,1.98,2.43,2.09,3.44,2.89,2.03,2.36,2.09,34.0,53.97,32.21,57.1,67.55,80.75,74.77,36.86,40.91,42.91,52.62,65.77,9.7,11.98,47.36,36.97,37.64,40.59,76.02,33.98,56.98,66.33,40.09,36.27,24.92,9.76,0.15,28.18,64.67,11.1,0.89,54.79,35.84,0.0,41.94,40.85,56.53,24.74,38.2,69.44,39.43,37.67,43.06,11.95,40.85,63.42,69.33,41.9,36.69,44.81,62.54,28.1,71.71,69.27,45.95,43.97 -CohereForAI/c4ai-command-r-v01 (few-shot),34981,256,8192,False,False,1919,2.44,2.23,2.66,2.3,3.17,2.65,2.14,2.5,1.85,53.44,49.17,20.55,51.7,65.47,37.41,62.46,26.25,61.54,60.94,35.73,63.77,21.33,13.2,32.36,27.87,30.99,47.15,80.24,11.35,49.93,65.3,29.64,31.96,42.29,38.87,0.28,18.74,66.07,10.93,4.44,64.53,44.01,2.39,34.84,62.43,53.9,29.68,19.35,64.8,41.22,46.05,61.21,9.03,38.15,44.91,67.58,41.03,44.57,71.96,67.26,40.81,49.79,68.97,50.59,58.04 -ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,2.49,2.4,2.55,2.31,3.43,2.86,2.12,2.52,1.75,44.92,49.31,10.14,57.34,66.02,33.71,43.55,21.34,53.79,56.13,51.36,62.4,6.83,8.09,48.01,24.55,26.71,44.94,76.78,16.96,56.83,65.09,26.57,24.62,42.67,9.95,1.11,22.25,63.81,5.12,0.89,59.96,28.33,2.24,39.52,54.68,55.48,26.89,31.27,66.33,33.98,34.59,53.62,13.37,23.47,61.2,62.34,34.69,31.36,66.17,68.03,39.76,71.21,69.3,49.99,52.45 -mistralai/Mistral-7B-v0.1 (few-shot),7242,32,32768,True,False,1446,2.49,2.36,2.63,2.24,3.27,2.79,2.09,2.68,1.89,45.42,43.16,8.79,59.43,66.47,53.26,58.26,18.53,52.0,55.12,47.25,63.49,8.66,6.8,46.86,27.78,10.88,53.34,80.0,4.61,58.99,64.87,35.52,19.67,50.69,24.38,1.46,27.11,56.76,8.71,3.21,62.63,25.57,2.84,44.06,55.37,54.27,23.12,31.89,68.24,35.63,26.4,58.15,7.94,25.41,62.56,64.24,35.49,19.88,63.4,68.17,30.92,73.45,69.11,47.74,34.96 -NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,2.52,2.05,2.36,2.22,3.25,2.8,2.36,2.74,2.41,51.95,52.11,44.47,43.32,64.64,77.33,79.05,23.81,58.53,60.26,59.48,54.54,51.85,41.89,25.62,43.35,26.91,57.01,80.12,43.04,30.44,63.55,41.92,19.13,36.73,34.34,6.57,22.59,58.69,12.07,2.87,62.6,31.36,5.21,34.26,57.26,54.57,26.52,19.96,61.1,43.3,23.82,62.76,13.83,24.44,26.17,63.34,38.5,20.26,63.12,66.47,38.82,29.16,57.65,54.57,30.13 -mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,2.52,2.27,2.68,2.25,3.33,2.82,2.13,2.79,1.93,43.6,45.92,15.43,59.13,66.33,53.81,61.06,20.64,50.56,52.65,44.61,63.13,12.1,9.3,45.15,28.31,13.59,49.18,79.08,11.06,58.98,64.79,34.51,20.84,46.73,26.28,1.5,25.17,55.71,7.21,2.09,61.32,26.73,1.3,44.98,55.41,52.58,24.1,31.52,68.96,35.06,28.85,56.52,7.02,23.41,61.9,64.37,34.93,23.73,61.02,67.29,30.1,73.59,69.04,47.63,35.63 -ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4097,True,False,2515,2.54,2.29,2.53,2.27,3.55,2.86,2.24,2.62,1.97,44.58,47.16,19.2,58.41,65.64,32.05,47.42,28.73,49.94,52.17,53.27,63.01,17.22,12.01,45.04,24.31,30.34,44.8,75.92,24.84,56.71,63.65,26.71,30.43,37.82,-0.29,-0.12,21.59,62.35,6.54,1.83,61.47,24.35,1.44,41.54,50.43,57.84,22.58,27.96,65.08,35.21,26.48,52.26,8.46,42.42,53.11,66.01,34.35,31.89,55.76,66.89,36.6,67.55,66.51,46.22,52.23 -CohereForAI/aya-23-8B (few-shot),8028,256,8192,False,False,2707,2.58,2.35,2.71,2.28,3.56,3.06,2.47,2.29,1.93,47.08,47.16,8.41,58.83,65.03,36.64,39.24,27.29,60.94,59.61,35.73,62.45,6.18,4.0,46.52,20.14,27.5,60.04,76.21,7.54,58.6,63.0,20.97,28.96,47.16,0.33,3.84,21.75,59.16,3.7,-3.24,62.22,17.34,0.01,38.7,51.2,47.79,18.04,29.46,62.58,24.21,30.55,60.81,7.9,31.12,63.0,72.9,32.37,53.32,56.16,68.27,23.82,74.23,72.14,33.04,55.1 -mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,2.64,2.34,2.76,2.4,3.6,2.85,2.36,2.68,2.13,44.89,48.09,19.06,51.56,66.84,51.6,35.85,22.21,53.42,54.34,38.79,64.43,17.06,11.0,35.74,20.37,21.16,47.92,62.9,19.95,52.51,66.11,25.6,21.75,34.8,17.64,5.46,12.66,61.79,2.88,-1.11,61.28,32.07,1.68,39.0,55.1,47.69,24.14,23.93,67.51,26.06,31.09,55.56,12.37,21.5,50.77,67.99,22.86,24.8,62.11,59.91,30.66,58.27,69.75,34.93,44.91 -occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,2.67,2.51,2.77,2.4,3.38,3.12,2.21,2.81,2.18,40.19,42.31,1.14,57.89,66.68,44.3,48.76,15.44,45.5,45.96,44.46,63.95,0.0,0.0,52.19,20.61,16.18,47.67,71.73,7.9,57.78,65.07,25.52,14.06,40.71,14.7,0.71,20.66,65.25,5.35,0.35,60.37,8.21,0.0,43.69,52.63,43.16,27.09,34.01,69.43,32.56,23.61,53.78,7.78,16.23,63.09,66.46,28.37,15.25,56.9,62.1,20.17,75.29,69.63,38.48,27.67 -timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,2.67,2.33,2.67,2.15,3.47,3.23,2.37,3.11,2.06,42.43,47.82,16.51,56.95,65.43,50.76,50.82,14.47,48.97,51.52,49.05,63.32,14.37,9.96,44.07,25.07,15.56,44.14,80.14,34.23,57.07,65.15,33.24,25.5,36.47,1.84,2.54,18.66,63.68,5.12,8.3,58.96,8.97,0.0,39.2,50.66,54.79,20.17,27.86,65.53,27.04,17.47,54.56,8.43,10.99,55.91,57.88,25.12,10.65,59.1,68.41,25.43,71.89,67.99,44.09,32.29 -meta-llama/Llama-3.2-3B-Instruct (few-shot),3213,128,131073,False,False,10424,2.69,2.47,2.96,2.41,3.53,2.81,2.5,2.81,2.02,41.12,42.77,11.52,51.14,65.19,49.78,45.88,21.89,49.66,51.98,44.13,60.5,0.67,1.11,28.62,26.82,20.98,43.74,76.98,16.01,48.38,64.98,29.44,22.42,27.57,10.07,-1.39,22.98,62.0,13.33,0.74,58.24,32.79,1.77,45.13,55.52,50.52,9.87,20.2,66.4,33.58,28.97,43.66,12.87,17.94,47.77,66.74,33.8,21.02,68.44,66.0,32.04,49.54,69.23,45.5,46.5 -mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32768,False,False,634,2.75,2.51,2.82,2.52,3.5,3.15,2.49,2.95,2.07,37.93,44.49,14.09,51.38,65.8,45.07,35.36,14.85,50.08,51.27,43.65,62.39,14.09,8.28,37.23,20.44,15.87,45.01,73.33,11.59,52.12,63.1,24.03,15.37,36.04,12.93,-0.36,18.06,62.8,5.44,6.35,55.42,15.85,1.11,33.54,47.19,47.26,22.32,24.36,67.75,26.79,20.33,52.72,7.91,18.14,52.75,64.77,26.06,14.26,57.58,61.44,34.92,65.38,69.62,38.4,35.72 -nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,2.75,2.67,2.86,2.56,3.45,2.99,2.37,2.95,2.12,49.01,47.95,32.89,0.0,62.7,51.21,62.3,33.79,63.7,62.53,34.35,61.42,31.53,22.71,0.06,35.46,36.47,48.51,78.68,29.18,0.0,65.35,40.16,39.81,43.65,10.7,10.77,0.29,65.26,23.45,8.35,67.72,42.98,6.4,0.07,60.69,53.77,38.53,0.0,62.73,45.91,47.37,60.11,11.12,32.68,0.0,64.01,41.0,39.2,72.63,65.74,43.43,0.0,66.76,60.0,69.36 -occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,2.75,2.54,2.86,2.42,3.45,3.2,2.26,2.98,2.25,37.93,44.62,0.28,58.05,66.05,38.54,45.89,12.38,45.28,46.0,44.95,63.26,0.0,0.0,43.88,20.87,13.1,49.02,76.56,2.18,58.98,64.42,23.68,14.05,40.08,16.23,1.59,15.98,62.55,5.98,-0.51,58.67,10.39,0.0,40.95,51.39,47.3,21.83,31.55,69.31,32.49,22.25,51.31,7.41,13.04,59.28,64.66,27.12,13.99,55.37,63.32,18.92,72.38,68.61,37.04,23.54 -ibm-granite/granite-8b-code-base-4k (few-shot),8055,49,4096,True,False,2313,2.76,2.68,2.86,2.55,3.51,3.03,2.43,2.82,2.21,48.44,39.07,9.72,51.18,63.93,33.11,18.96,9.03,68.4,65.15,42.0,61.27,5.2,3.32,37.51,12.42,8.32,59.77,74.45,3.97,50.18,62.61,14.34,7.4,50.89,9.52,0.5,17.43,59.94,5.52,1.73,66.82,21.19,-0.36,36.47,59.07,49.75,14.71,29.45,67.78,16.39,10.68,63.29,13.81,8.16,56.64,63.08,16.86,6.24,72.76,62.35,21.57,69.8,67.73,25.63,16.44 -NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,2.78,2.68,2.73,2.53,3.21,3.07,2.6,2.9,2.5,50.83,53.23,23.02,0.0,57.74,77.42,82.47,30.39,65.75,70.12,41.9,53.29,47.88,35.66,0.03,46.91,33.14,69.54,79.55,28.27,0.02,55.51,48.02,24.46,62.28,38.08,2.85,0.86,57.6,48.56,16.74,65.06,48.44,3.3,0.0,67.63,55.79,24.45,1.17,55.51,44.2,31.71,69.37,14.35,29.13,0.34,52.69,50.9,34.78,76.84,67.91,30.61,0.1,58.37,48.18,34.99 -meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4096,False,False,2643,2.83,2.59,2.95,2.61,3.52,3.26,2.58,2.91,2.22,35.44,44.88,9.74,55.04,66.15,32.17,35.74,11.32,44.99,49.09,41.56,63.59,3.04,4.03,33.77,14.81,12.69,39.72,66.18,6.74,54.05,65.92,17.73,12.85,41.1,13.59,-1.07,16.13,62.3,3.16,1.84,59.77,13.24,-0.54,31.87,50.09,46.52,15.23,25.54,67.62,20.12,13.98,50.23,10.07,14.73,53.42,67.59,20.19,11.42,62.53,62.23,22.71,64.45,69.95,30.47,30.18 -"claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,2.85,2.28,3.14,2.71,3.36,3.27,2.73,2.84,2.47,59.48,56.46,20.57,38.23,64.38,53.54,69.09,21.03,69.39,62.76,3.97,62.53,31.65,5.86,36.65,16.62,2.63,57.06,59.89,9.3,39.97,65.29,16.34,0.0,34.99,31.19,-10.68,23.65,66.12,9.0,0.0,51.06,-3.58,4.1,45.29,55.59,43.73,23.74,21.36,62.97,16.53,-1.78,61.15,12.71,35.26,41.27,64.93,8.55,10.69,74.35,31.19,21.76,45.7,67.96,28.8,42.52 -ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,2.89,2.76,2.95,2.62,3.81,3.15,2.61,2.96,2.28,45.9,37.11,11.7,50.11,63.86,32.44,7.46,5.62,66.91,62.82,40.71,60.59,9.5,6.74,32.83,11.35,6.21,52.85,73.93,8.27,48.49,60.98,13.69,5.68,43.2,2.54,0.0,14.28,49.66,3.07,2.79,60.46,21.59,0.51,33.54,54.45,43.62,15.24,26.0,66.68,15.81,9.6,60.72,12.38,10.96,51.2,63.23,15.26,4.03,72.59,61.61,18.37,66.68,68.41,24.14,14.42 -ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,3.02,2.96,3.23,2.77,3.86,3.56,2.6,2.92,2.23,37.37,31.44,5.27,48.41,63.82,18.25,19.54,2.64,44.89,48.08,32.29,59.77,7.49,4.65,26.37,11.54,3.42,40.68,68.96,4.77,49.73,60.93,13.55,5.27,23.14,5.07,0.18,14.15,60.8,2.86,-1.31,45.56,7.44,0.92,20.82,47.31,48.28,14.08,28.37,61.97,22.99,20.06,49.25,9.45,11.87,54.2,64.77,21.52,22.32,52.79,65.92,16.74,64.92,65.5,33.84,49.84 -MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,3.18,3.2,3.41,2.76,3.52,3.33,3.19,3.44,2.56,28.18,29.32,2.9,56.48,53.81,27.86,34.62,4.73,36.96,39.38,32.67,51.44,2.18,5.33,45.23,9.35,4.85,41.49,75.64,0.66,57.48,55.94,10.56,5.03,31.81,18.33,3.63,16.72,58.72,12.62,3.43,40.18,14.19,0.31,41.6,32.33,26.39,1.44,28.15,58.62,14.94,8.7,36.61,8.77,3.52,59.51,54.5,14.9,7.26,47.2,64.82,7.57,73.88,67.34,16.59,11.97 -ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,3.18,3.26,3.33,2.92,4.01,3.83,2.79,3.02,2.3,31.8,6.85,0.97,49.83,63.43,15.97,17.19,3.07,40.08,43.96,31.9,59.98,-0.07,1.27,23.32,11.78,5.48,36.01,57.18,1.52,51.04,58.57,13.42,7.33,18.07,0.65,-0.72,12.27,56.49,0.32,1.0,41.27,5.4,-0.2,19.69,40.61,31.86,5.36,25.99,66.77,22.17,22.61,42.52,9.91,0.69,56.95,63.71,20.93,24.42,49.44,66.65,12.56,63.29,66.38,32.06,58.21 -openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,3.23,2.92,3.37,2.85,3.78,3.53,3.01,3.37,2.99,34.66,21.93,1.5,52.36,64.84,24.39,33.37,13.98,37.36,42.83,16.02,61.93,-0.08,2.29,31.6,8.67,9.8,35.02,51.8,6.15,50.85,65.68,10.11,8.76,28.74,4.3,0.06,17.41,60.79,0.18,-1.64,46.64,16.72,-1.54,18.69,39.39,23.6,7.68,25.3,63.84,14.99,14.99,39.24,4.25,11.48,54.18,65.55,13.37,9.13,50.73,27.52,2.96,63.42,68.74,16.65,10.69 -ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,3.27,3.1,3.33,3.05,4.04,3.64,3.07,3.34,2.59,37.21,31.54,6.3,44.86,61.56,17.92,10.79,1.7,53.78,55.14,26.21,57.11,3.9,2.42,24.86,10.36,5.85,50.1,65.67,4.55,42.83,45.16,7.58,3.79,33.57,0.6,0.0,11.27,49.32,1.37,-4.04,56.88,3.8,-0.21,13.72,49.16,35.17,9.79,22.48,60.81,6.89,4.79,48.53,10.15,4.88,45.38,59.56,7.38,2.69,58.3,59.01,10.33,65.04,67.46,14.1,10.67 -ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,3.31,2.94,3.71,3.01,3.79,3.63,3.38,3.33,2.71,38.62,35.47,5.07,45.21,62.5,13.46,15.31,6.0,53.93,54.04,23.83,50.59,3.91,1.55,2.37,8.68,6.19,51.76,70.61,6.24,44.67,41.31,7.41,5.42,38.52,4.29,0.0,12.94,58.58,2.11,-4.75,60.88,0.16,-0.35,18.54,49.38,41.72,7.67,13.7,45.88,8.73,6.18,50.88,12.39,3.31,48.44,52.5,9.22,4.45,60.64,61.2,7.63,69.83,56.62,16.29,10.37 -openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.33,2.98,3.4,3.07,3.79,3.6,3.24,3.52,3.07,29.49,13.77,0.0,51.53,66.31,24.59,39.09,15.6,34.78,39.0,10.69,62.73,6.17,5.9,31.25,7.97,10.39,37.17,20.2,6.13,46.66,65.28,10.89,9.2,26.58,-0.79,0.63,15.14,60.84,1.17,-0.12,39.78,16.03,-0.48,20.04,38.81,10.59,0.91,22.54,64.28,15.69,14.18,42.35,0.78,-0.02,47.61,65.32,14.59,11.78,44.48,23.69,8.52,56.97,68.81,15.31,9.35 -HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,3.41,3.22,3.79,3.02,3.76,3.95,3.31,3.8,2.41,29.44,18.49,1.73,44.39,61.76,22.03,12.61,2.06,37.6,38.38,24.05,48.55,3.56,2.61,13.58,9.52,3.62,37.37,64.46,4.49,43.92,54.5,8.61,4.51,26.23,6.86,2.69,10.84,60.43,1.93,3.27,40.28,3.94,-0.26,10.68,32.54,27.03,8.95,18.38,59.91,12.3,6.28,31.84,1.56,5.05,40.55,60.35,13.39,4.57,47.58,66.78,20.53,58.07,62.45,32.9,25.32 -utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,3.41,3.1,3.42,2.98,4.02,4.09,3.14,3.58,2.93,19.61,37.92,2.81,50.05,63.91,2.54,17.85,0.52,31.43,36.92,30.63,59.45,0.98,1.67,33.24,1.85,-1.18,27.41,72.24,0.13,49.77,63.23,3.39,0.54,13.29,6.83,0.92,7.49,56.76,0.59,1.87,33.53,4.25,-2.32,15.41,28.49,43.18,2.92,23.26,64.64,2.6,1.46,32.45,7.03,5.58,51.18,61.96,2.53,1.04,37.47,58.61,5.3,63.26,67.24,3.93,1.26 -HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,3.49,3.38,3.71,2.98,3.92,4.1,3.31,3.83,2.65,24.47,9.93,1.22,42.09,61.62,19.65,19.01,1.34,26.7,28.23,23.25,56.31,-0.47,0.26,13.4,11.12,2.53,35.96,68.31,3.61,43.26,57.04,10.86,2.53,20.5,10.09,0.83,10.84,57.52,3.16,-1.83,27.91,0.77,-0.48,16.56,28.67,19.69,5.07,18.43,64.25,15.15,6.23,22.84,4.6,2.55,40.33,58.31,14.32,3.87,41.57,62.32,8.04,56.01,65.06,34.02,22.81 -PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,3.83,3.44,3.95,3.6,4.03,4.06,3.84,4.01,3.68,28.3,28.95,0.2,36.39,56.6,-0.19,11.52,0.06,38.96,40.42,19.42,49.86,-0.13,0.77,4.7,-0.21,-0.56,36.29,39.68,0.96,32.64,28.38,1.35,-0.19,22.56,0.53,-0.26,11.77,53.36,0.36,0.24,38.91,-1.72,0.66,4.82,34.68,21.76,0.85,14.3,43.98,0.36,1.32,38.22,4.99,1.85,27.77,49.31,-1.45,0.83,40.45,47.89,0.28,26.77,50.31,1.03,-0.3 -PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.85,3.63,3.92,3.4,4.07,4.31,3.78,3.85,3.83,15.93,13.01,0.05,36.85,58.6,-0.09,2.94,-0.35,28.82,27.81,18.74,53.53,-0.46,-0.84,12.66,-1.29,1.29,21.42,45.75,-0.25,32.71,57.21,1.57,0.36,18.86,-0.67,-0.76,8.09,60.61,-0.78,-0.84,22.75,-0.03,-0.78,7.75,23.08,7.41,0.89,17.32,56.57,-0.63,-0.17,31.13,7.24,1.23,32.13,56.85,1.79,-0.63,27.37,36.35,-0.37,7.42,60.89,2.21,-0.9 -HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,3.97,3.87,4.24,3.88,4.19,4.29,3.81,4.13,3.34,12.68,3.61,1.79,28.12,56.85,-0.03,6.03,0.2,26.6,23.7,6.21,48.59,-0.39,0.21,4.65,-1.13,-0.51,18.22,11.52,1.72,27.27,45.57,0.69,0.68,13.43,3.82,1.14,3.71,51.93,0.95,2.9,28.14,-0.56,-0.06,2.43,19.94,19.64,0.0,8.78,57.06,0.32,-0.65,20.95,6.84,-1.5,22.67,53.89,-0.45,-0.31,31.14,43.97,3.49,47.91,62.2,0.12,0.13 -PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,3.97,3.51,4.06,3.86,4.11,4.36,4.09,3.97,3.79,16.17,29.12,-0.47,34.8,59.04,-0.22,6.94,0.52,27.47,23.82,22.22,49.84,-2.06,-0.77,2.48,0.39,-1.07,14.09,23.71,1.74,32.0,28.3,0.94,-0.48,9.9,2.13,0.02,10.64,54.83,-0.56,1.21,20.76,-1.78,0.87,3.58,24.32,15.58,1.25,6.82,44.9,-0.19,1.18,23.58,7.9,1.79,26.11,53.77,-0.41,-0.45,21.6,45.04,-0.46,33.46,49.12,2.37,-0.33 -HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.05,3.96,4.26,3.97,4.29,4.23,4.07,4.26,3.33,8.97,2.66,1.65,24.92,55.39,0.28,3.55,-0.41,20.37,21.27,7.6,49.27,1.31,0.51,4.8,-0.9,-1.0,13.64,9.34,2.2,26.06,37.49,-0.0,0.78,13.6,3.12,0.28,4.09,50.0,-0.11,2.51,26.85,3.07,-0.12,1.39,18.77,12.59,1.64,9.27,46.31,0.84,0.01,15.68,6.73,0.63,19.73,50.53,1.3,-0.36,30.73,59.51,1.55,49.03,57.73,0.11,-0.06 -HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.2,4.21,4.15,3.94,4.23,4.34,4.2,4.52,4.01,13.72,3.79,-0.45,14.69,47.44,-0.58,10.99,-0.51,24.37,24.69,8.84,53.61,-1.2,-0.5,0.16,-0.81,-0.71,19.15,-3.03,0.06,14.18,51.51,0.02,0.04,14.74,3.13,-0.25,1.35,52.66,1.21,1.69,25.51,-0.24,0.46,0.15,16.89,2.74,-0.34,0.28,54.79,-0.32,0.18,17.49,2.01,-0.02,0.53,52.46,0.33,-0.1,31.26,26.69,1.78,13.88,52.05,1.51,-0.76 -PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.23,4.13,4.28,4.16,4.23,4.26,4.15,4.53,4.07,13.84,9.47,-0.36,22.1,45.28,-0.27,2.54,-0.66,26.59,26.78,7.91,45.83,0.28,0.04,0.65,0.57,-0.66,22.09,14.15,-0.04,21.6,27.08,-0.65,0.48,17.73,2.38,-0.18,1.59,52.81,0.89,-1.11,31.99,0.0,0.48,0.29,25.3,13.9,-0.25,6.12,41.9,-1.81,0.0,24.47,3.57,-2.03,10.18,44.43,-0.11,-0.01,31.79,19.13,-0.03,12.35,46.13,-1.2,0.47 -HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.24,4.03,4.27,4.12,4.32,4.34,4.31,4.55,3.95,12.11,2.61,0.25,14.02,56.53,1.05,10.81,-0.5,20.89,19.62,2.78,53.93,-0.98,0.93,0.15,-0.48,-0.53,17.09,7.41,0.47,11.73,38.3,0.3,0.06,13.7,3.01,-0.83,0.94,50.3,1.1,-0.07,23.22,3.78,0.41,0.54,15.54,2.51,0.36,1.77,49.41,-1.46,-0.18,15.82,-0.62,1.16,3.25,54.82,1.36,0.34,29.96,18.64,1.85,26.9,52.96,1.34,0.1 -PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.27,4.03,4.41,4.18,4.47,4.41,4.16,4.32,4.19,10.59,13.31,0.52,16.61,53.52,0.89,4.3,-0.01,25.02,21.59,8.05,43.81,-0.15,-0.97,0.37,-0.31,-0.61,16.28,17.38,-0.45,17.78,27.12,-1.38,0.94,13.8,2.17,-0.63,1.29,43.4,1.16,1.21,22.55,0.67,0.87,0.27,21.03,10.99,0.13,3.61,43.73,-0.77,0.11,21.32,4.37,-0.19,9.38,50.94,0.33,-0.81,27.45,27.39,0.31,15.62,43.83,0.65,-0.44 -ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.72,4.68,4.69,4.43,4.8,4.74,4.69,4.96,4.79,0.0,0.0,0.0,0.02,42.82,4.55,-1.17,-1.26,0.0,0.0,0.0,41.98,0.0,0.0,0.0,1.14,0.64,0.0,0.0,0.0,0.0,40.82,1.19,1.55,0.0,-1.3,0.0,0.0,36.58,-0.0,0.0,0.0,0.74,0.0,0.0,0.0,0.0,0.0,0.0,37.66,0.87,1.95,0.0,0.0,0.0,0.0,43.74,0.32,1.73,0.0,0.0,2.48,0.01,44.8,-0.69,0.37 +"gpt-4-1106-preview (few-shot, val)",-1,100,128000,True,False,576,1.24,1.14,1.28,1.08,1.12,1.29,1.3,1.52,1.21,66.8,61.62,66.84,56.85,66.21,95.21,97.19,78.74,77.48,78.7,62.55,63.6,74.45,56.31,44.67,70.84,86.3,74.45,77.59,71.35,56.56,66.08,71.32,84.09,86.37,49.59,43.03,37.26,69.61,89.09,72.03,86.51,38.22,35.09,58.65,68.94,60.47,51.26,30.04,63.62,73.8,83.93,66.44,14.22,72.3,57.81,67.13,70.04,88.29,81.79,67.55,51.21,66.6,68.8,81.71,89.91 +meta-llama/Llama-3.1-405B-Instruct-FP8 (few-shot),405869,128,131072,True,False,799,1.39,1.13,1.47,1.08,1.69,1.65,1.26,1.63,1.19,71.94,61.26,64.94,56.0,67.44,89.91,96.37,81.66,82.44,82.17,40.55,66.76,63.91,45.93,45.33,73.55,81.37,76.27,80.7,68.85,56.41,67.18,75.85,81.49,66.4,53.84,26.22,26.49,69.88,77.37,59.56,81.95,53.25,14.29,60.41,72.87,60.79,50.25,28.34,66.17,74.98,83.48,69.12,11.23,68.74,55.25,70.51,74.89,80.93,82.86,70.6,53.8,62.69,69.33,80.39,88.02 +"gpt-4o-2024-05-13 (few-shot, val)",-1,200,128000,True,False,916,1.44,1.2,1.25,1.09,1.2,2.46,1.37,1.64,1.31,71.15,49.42,64.59,57.35,66.03,96.41,97.68,85.96,79.07,81.56,66.66,63.25,64.53,54.7,43.51,73.81,89.91,76.66,77.16,68.99,57.96,66.0,70.7,86.3,81.19,49.86,51.1,29.64,68.25,91.27,70.85,81.86,27.3,-0.97,56.45,69.99,54.82,43.66,30.06,63.8,74.13,88.18,76.75,10.8,56.26,55.55,66.86,73.11,92.69,83.48,62.74,46.56,65.41,67.64,78.55,91.34 +meta-llama/Llama-3.1-70B-Instruct (few-shot),70554,128,131072,True,False,1409,1.49,1.29,1.59,1.23,1.81,1.69,1.33,1.71,1.27,68.57,60.52,57.57,54.33,68.01,87.22,92.93,70.23,80.05,80.67,40.65,66.56,56.42,38.21,49.22,65.19,69.47,72.16,81.69,63.97,57.99,66.85,69.59,70.4,69.95,51.27,18.75,30.79,69.53,75.02,48.44,79.04,52.33,15.72,59.08,72.11,59.87,46.82,30.64,66.2,68.8,73.49,68.82,11.41,61.66,55.43,69.51,68.61,69.72,83.16,69.96,50.83,60.82,69.21,77.51,83.85 +meta-llama/Llama-3.3-70B-Instruct (few-shot),70554,128,131072,True,False,1353,1.58,1.34,1.65,1.25,1.92,1.81,1.5,1.82,1.35,67.07,59.89,56.56,49.02,67.0,87.31,94.13,76.43,79.84,79.93,41.11,65.38,57.84,43.52,40.92,66.69,75.66,71.98,81.15,64.46,51.22,66.63,70.79,75.48,69.04,50.79,20.36,24.56,69.13,77.24,46.46,78.46,51.6,17.25,50.98,70.47,60.55,50.13,20.42,65.2,69.32,78.33,70.37,10.87,62.87,44.3,69.36,68.82,74.72,82.35,71.07,51.27,50.23,68.77,76.73,86.49 +Qwen/Qwen2.5-72B-Instruct (few-shot),72706,152,32768,True,False,1219,1.59,1.41,1.71,1.33,2.07,1.65,1.37,1.83,1.32,63.81,54.5,57.19,55.77,64.62,82.81,87.32,77.57,72.21,70.24,39.85,62.86,63.14,43.24,43.41,70.2,76.79,62.12,79.89,61.71,54.99,65.38,70.61,76.53,68.54,47.72,22.98,26.72,66.93,67.74,36.29,76.56,46.61,23.45,55.28,68.94,58.78,52.66,27.62,63.32,70.49,81.41,67.16,9.84,66.06,50.91,64.83,73.09,79.0,75.84,68.66,56.46,58.39,68.32,78.29,86.39 +"gpt-4o-2024-05-13 (zero-shot, val)",-1,200,8191,True,False,637,1.6,1.39,1.68,1.24,1.82,1.87,1.6,1.82,1.39,64.8,53.07,64.18,49.02,64.78,90.74,95.22,78.1,77.72,71.7,36.27,62.76,71.7,58.79,40.95,69.22,76.4,75.06,74.85,65.23,53.02,65.8,71.87,69.98,72.85,53.43,49.26,27.36,67.01,78.98,9.83,68.0,27.3,28.09,58.59,67.18,50.12,44.98,27.01,63.1,74.22,69.31,69.12,12.36,58.88,45.88,64.37,70.81,84.34,81.23,63.46,46.45,57.64,68.71,80.09,86.91 +Qwen/QwQ-32B-Preview (few-shot),32764,152,32768,True,False,2258,1.65,1.52,1.83,1.39,2.24,1.97,1.24,1.81,1.2,64.66,53.42,53.93,55.55,64.54,72.75,78.49,74.11,75.68,75.89,38.41,62.79,56.42,39.34,44.35,61.53,67.36,69.73,78.76,57.57,56.43,65.53,64.02,67.11,65.08,48.71,20.51,28.29,65.67,52.92,28.01,77.19,45.55,16.92,47.51,70.66,59.33,54.53,32.66,65.0,67.84,79.0,71.32,9.12,63.96,58.36,66.59,67.82,78.26,76.84,68.94,57.74,71.22,68.96,75.15,87.36 +ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot),28411,256,4096,True,False,3633,1.71,1.41,1.76,1.5,2.17,1.82,1.56,2.03,1.44,66.5,58.93,57.27,55.02,66.59,81.37,83.16,67.83,76.25,77.91,40.54,64.53,59.75,47.82,40.99,56.11,67.72,62.91,79.51,60.28,55.44,65.24,56.4,65.89,67.23,50.38,20.01,21.18,67.61,66.74,33.03,81.3,60.99,12.49,48.47,73.31,58.02,45.12,24.67,64.4,58.7,68.9,68.17,10.56,56.89,53.05,64.01,58.74,65.18,81.06,68.92,49.06,61.27,68.0,67.84,78.26 +"gpt-4-1106-preview (zero-shot, val)",-1,100,8191,True,False,436,1.81,1.69,1.99,1.51,1.89,2.22,1.53,1.95,1.67,35.79,53.69,62.98,51.96,64.56,87.34,98.11,67.56,60.16,48.74,39.62,62.75,71.38,42.94,36.04,68.89,59.02,51.31,73.54,66.39,52.22,65.32,69.31,63.92,30.39,47.47,43.3,29.82,63.89,86.79,54.76,57.52,13.18,28.03,59.06,57.47,58.67,54.55,27.02,62.91,69.23,71.17,55.72,11.13,67.28,54.2,64.09,67.45,72.71,42.4,65.24,44.59,62.94,68.33,78.8,82.6 +"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,1.81,1.71,1.8,1.87,1.83,2.19,1.71,1.83,1.53,59.96,56.91,67.13,17.52,65.32,91.7,88.38,71.08,72.74,69.17,67.45,63.0,74.27,54.83,3.67,59.93,76.01,62.45,77.69,68.93,12.11,66.04,55.8,63.61,64.69,56.18,45.52,15.8,67.06,82.1,42.82,73.8,39.45,34.78,0.87,65.51,55.16,44.6,21.87,63.5,60.83,67.98,68.71,20.33,49.52,34.06,66.13,62.12,65.74,77.38,66.75,52.43,41.03,68.73,66.63,79.95 +"gpt-4o-mini-2024-07-18 (zero-shot, val)",-1,200,8191,True,False,908,1.89,1.63,2.07,1.77,2.22,2.08,1.73,2.01,1.58,57.52,49.73,57.56,51.79,64.66,79.84,83.21,63.33,60.43,55.59,39.82,62.77,54.84,33.8,36.55,56.96,68.7,52.47,73.55,52.27,48.95,65.59,54.3,54.13,31.11,50.02,35.85,26.93,66.48,72.8,28.63,53.92,35.05,23.12,54.99,59.19,52.66,46.66,26.02,63.3,58.21,65.19,64.15,12.67,62.44,45.65,64.26,56.45,62.09,75.8,61.65,47.74,56.98,68.6,63.18,77.31 +CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,2.0,1.89,2.31,1.87,2.68,2.0,1.75,1.96,1.54,63.63,50.82,35.58,54.33,65.71,58.67,72.01,33.61,73.2,72.26,35.8,63.37,36.86,23.4,40.32,38.62,40.5,65.01,77.68,34.06,56.78,66.08,39.39,37.81,62.68,31.96,11.81,30.49,66.24,21.1,9.34,73.26,47.71,10.41,57.08,68.62,56.2,43.36,28.5,64.52,50.16,51.13,68.58,14.41,55.01,58.63,65.69,50.48,49.08,78.35,67.62,46.5,63.2,70.22,60.4,60.49 +nvidia/Llama-3.1-Nemotron-70B-Instruct-HF (few-shot),70554,128,131072,True,False,1208,2.07,2.01,2.21,1.69,2.67,1.99,2.01,2.25,1.72,58.34,59.14,56.46,39.77,47.41,85.69,91.37,74.3,69.21,70.45,39.87,47.52,57.8,40.31,40.97,63.81,73.01,58.65,81.81,63.69,42.29,50.67,68.28,72.36,54.15,51.96,19.63,21.22,43.26,73.61,45.29,64.88,52.56,14.28,50.11,61.98,57.07,45.61,24.89,43.28,66.68,75.48,55.08,10.8,61.31,49.8,52.72,68.15,72.5,73.66,68.56,51.33,56.87,54.04,75.5,84.61 +"claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,2.11,1.64,2.18,1.87,2.35,2.02,2.26,2.52,2.04,65.88,63.61,71.03,46.24,64.6,84.02,85.34,23.71,74.23,70.5,50.92,62.57,76.1,72.03,40.57,24.04,-0.8,70.22,77.7,74.34,49.32,65.36,28.34,13.68,61.7,51.24,52.43,22.92,66.02,15.97,5.31,72.52,8.17,32.38,45.34,61.83,61.59,46.4,23.77,62.69,17.15,9.7,62.41,12.64,74.06,35.77,64.25,27.77,14.21,82.11,67.01,51.09,52.41,67.73,26.14,13.56 +mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot),341029,256,4096,True,False,1904,2.17,2.04,2.11,1.92,2.09,2.54,2.08,2.64,1.95,48.24,39.52,62.92,36.92,56.79,67.68,84.85,68.82,56.41,55.6,25.18,59.28,62.56,53.09,42.57,45.67,73.86,48.92,62.08,68.93,36.4,61.39,48.18,56.87,37.98,47.34,50.14,16.27,63.4,79.94,44.61,55.31,51.62,15.07,18.22,50.17,43.41,40.59,19.75,60.34,52.07,74.99,47.6,10.62,61.64,24.02,54.51,50.32,64.62,51.92,67.01,45.98,30.12,65.82,60.71,77.14 +meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1483,2.22,2.14,2.43,2.08,2.73,2.26,1.94,2.33,1.82,57.74,48.43,27.12,46.76,66.36,57.87,50.42,29.17,66.56,68.29,34.47,63.8,28.22,18.21,47.34,30.78,31.49,69.67,59.93,27.63,49.84,66.6,33.54,30.32,60.2,38.09,9.14,28.66,67.5,17.93,5.59,70.61,45.78,4.58,50.67,68.18,58.33,29.12,28.68,65.23,38.44,37.69,68.72,14.67,32.91,45.36,67.62,36.18,33.91,75.02,67.64,32.29,54.84,69.28,53.77,57.64 +mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,2.22,1.99,2.45,2.05,2.78,2.41,1.95,2.28,1.82,52.62,50.07,37.37,54.87,66.46,69.0,79.86,24.68,63.92,62.15,46.68,63.04,33.38,19.99,31.87,38.91,20.29,60.92,79.78,34.88,50.35,62.82,43.74,19.86,55.09,39.6,8.23,30.78,63.01,18.12,8.15,67.97,45.19,5.21,43.26,59.1,55.3,37.84,31.71,64.23,47.89,28.53,64.81,12.99,39.38,49.08,66.41,45.6,24.71,65.31,68.87,43.07,63.97,68.68,56.64,33.42 +meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1473,2.25,1.98,2.39,2.04,2.79,2.42,1.99,2.66,1.73,54.7,54.81,32.11,48.87,66.79,56.14,63.54,39.48,64.55,66.44,35.17,64.48,27.41,15.6,43.11,35.02,37.61,55.8,79.23,32.67,46.88,66.43,36.35,37.89,46.48,39.91,11.72,25.91,67.67,25.83,6.4,67.67,48.54,3.89,47.07,62.03,58.15,30.18,26.48,64.68,39.36,47.21,61.68,8.97,36.57,33.88,64.8,37.45,42.45,76.95,68.12,34.34,47.88,69.57,56.62,69.03 +mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,2.28,2.18,2.47,2.13,3.0,2.71,1.96,2.15,1.63,55.49,49.18,7.4,57.72,65.1,45.83,59.45,35.46,67.24,66.08,31.41,62.89,28.72,20.55,40.6,30.39,38.46,54.76,73.32,16.17,57.94,65.95,30.08,39.51,47.1,24.43,7.19,28.73,66.62,21.98,0.1,65.55,29.49,2.05,47.72,60.5,50.39,30.86,30.53,64.03,39.61,60.04,63.3,11.82,32.2,59.45,68.87,40.81,60.68,72.4,63.46,35.86,68.42,69.38,53.42,78.36 +mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot),46703,32,32768,True,False,5535,2.29,2.06,2.51,2.04,3.14,2.58,1.92,2.32,1.72,51.2,50.95,33.44,46.85,64.66,67.12,76.78,39.93,55.02,57.37,36.76,61.59,30.73,18.96,41.01,39.07,40.48,46.15,80.33,32.89,46.51,66.04,42.98,35.33,35.0,23.93,9.69,21.38,64.69,17.73,9.06,63.43,43.99,9.38,25.35,58.07,52.18,41.45,26.87,64.1,47.93,51.72,58.8,12.5,45.22,47.03,65.95,43.41,43.33,70.02,69.48,44.59,55.7,68.79,55.61,57.52 +nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,2.31,2.19,2.48,2.16,2.78,2.5,2.1,2.5,1.76,49.85,49.52,32.35,52.54,65.13,52.71,63.47,16.3,64.15,62.16,55.29,60.17,32.3,22.82,32.62,35.37,15.39,58.75,79.59,33.09,47.28,62.78,36.58,18.78,53.28,41.8,11.07,29.74,64.08,20.14,3.62,66.18,39.48,5.78,44.81,59.82,50.22,30.43,30.22,62.56,43.09,33.4,66.29,12.71,31.39,48.33,64.1,37.55,29.69,67.52,69.03,40.51,58.12,67.86,55.93,64.11 +CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,2.33,2.17,2.39,2.08,3.32,2.74,2.0,2.2,1.72,51.32,52.0,18.48,52.43,66.18,41.32,52.24,37.67,66.55,63.63,38.61,64.48,15.8,12.3,43.26,36.48,35.85,57.38,78.43,14.52,53.14,65.69,37.32,38.28,28.98,19.83,4.93,24.72,63.45,10.97,4.23,64.72,28.57,5.12,38.83,59.95,55.39,30.59,26.94,66.11,38.63,48.53,53.02,13.68,29.97,53.4,73.93,38.81,40.95,67.33,68.67,31.18,68.33,72.23,49.41,58.93 +NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,2.37,2.26,2.27,1.95,3.12,2.68,2.24,2.6,1.8,49.18,49.76,41.28,12.83,67.44,77.55,74.67,36.42,58.05,59.65,57.94,65.07,51.36,42.84,14.72,43.18,37.58,57.66,80.04,45.21,52.73,59.91,42.25,28.88,41.84,38.12,7.47,20.87,65.24,13.41,3.94,58.62,40.74,5.58,28.04,56.14,53.33,29.49,18.49,64.84,44.31,30.72,62.81,11.28,28.57,38.75,65.35,39.53,26.71,63.77,69.23,38.49,57.03,70.47,54.58,47.93 +meta-llama/Llama-3.1-8B (few-shot),8030,128,131072,True,False,2986,2.37,2.2,2.62,2.17,2.81,2.8,2.22,2.31,1.85,50.92,47.86,29.19,48.38,64.89,51.88,62.31,27.37,65.17,60.22,34.02,62.16,32.48,18.38,33.06,32.4,24.89,62.19,80.31,30.29,42.78,64.14,35.1,23.18,52.97,41.29,5.95,31.99,65.19,21.56,4.44,65.65,24.3,0.61,45.01,59.45,53.39,23.87,27.14,64.01,36.56,30.05,64.79,11.95,32.97,63.89,66.29,38.44,30.88,69.86,66.76,30.96,71.39,67.93,52.47,43.95 +meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1477,2.39,2.22,2.67,2.17,2.87,2.84,2.0,2.46,1.9,49.46,51.16,23.01,49.75,65.26,51.64,59.28,24.3,62.89,56.18,33.07,61.95,30.73,20.57,30.77,30.95,21.98,59.92,80.91,26.39,47.69,63.94,33.39,20.21,50.45,34.68,8.69,31.94,65.22,19.93,4.52,61.11,19.4,2.02,50.34,56.0,56.4,22.01,35.39,68.92,38.12,31.37,62.26,10.45,30.3,62.99,65.17,36.38,28.33,66.31,64.3,28.18,70.38,67.9,52.54,41.19 +Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,2.4,2.27,2.09,2.13,3.33,3.07,2.19,2.45,1.63,51.94,51.97,29.99,38.99,64.49,35.95,59.76,34.63,66.22,64.14,55.48,65.32,26.13,17.32,49.75,29.72,46.78,56.28,77.51,23.25,47.09,65.58,31.52,39.95,42.23,27.93,6.38,19.39,61.92,4.22,0.31,62.2,26.68,7.07,11.97,61.37,51.38,35.58,19.92,63.64,33.87,48.46,62.86,15.11,39.11,36.48,64.42,32.99,42.56,72.77,70.12,44.68,57.17,69.32,47.17,77.25 +senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.41,2.13,2.34,2.18,3.23,3.31,2.01,2.25,1.86,52.61,49.81,19.64,48.03,66.67,57.65,51.99,44.44,64.37,62.77,50.6,65.09,18.09,12.25,38.34,27.33,41.59,58.9,67.74,16.52,49.41,66.09,31.76,45.84,56.71,7.92,3.44,21.55,65.39,9.11,3.3,67.42,20.01,7.02,0.65,64.38,54.44,26.03,25.68,68.16,33.84,50.99,64.25,13.66,28.59,49.64,68.66,35.37,47.5,70.62,67.78,30.99,49.56,70.76,44.11,69.2 +utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.41,1.98,2.42,2.08,3.44,2.88,2.04,2.35,2.09,34.0,53.97,32.21,57.1,67.55,80.75,74.77,36.86,40.91,42.91,52.62,65.77,9.7,11.98,47.36,36.97,37.64,40.59,76.02,33.98,56.98,66.33,40.09,36.27,24.92,9.76,0.15,28.18,64.67,11.1,0.89,54.79,35.84,0.0,41.94,40.85,56.53,24.74,38.2,69.44,39.43,37.67,43.06,11.95,40.85,63.42,69.33,41.9,36.69,44.81,62.54,28.1,71.71,69.27,45.95,43.97 +CohereForAI/c4ai-command-r-v01 (few-shot),34981,256,8192,False,False,1919,2.43,2.23,2.66,2.29,3.17,2.64,2.14,2.48,1.86,53.44,49.17,20.55,51.7,65.47,37.41,62.46,26.25,61.54,60.94,35.73,63.77,21.33,13.2,32.36,27.87,30.99,47.15,80.24,11.35,49.93,65.3,29.64,31.96,42.29,38.87,0.28,18.74,66.07,10.93,4.44,64.53,44.01,2.39,34.84,62.43,53.9,29.68,19.35,64.8,41.22,46.05,61.21,9.03,38.15,44.91,67.58,41.03,44.57,71.96,67.26,40.81,49.79,68.97,50.59,58.04 +ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,2.49,2.39,2.54,2.3,3.44,2.85,2.11,2.52,1.75,44.92,49.31,10.14,57.34,66.02,33.71,43.55,21.34,53.79,56.13,51.36,62.4,6.83,8.09,48.01,24.55,26.71,44.94,76.78,16.96,56.83,65.09,26.57,24.62,42.67,9.95,1.11,22.25,63.81,5.12,0.89,59.96,28.33,2.24,39.52,54.68,55.48,26.89,31.27,66.33,33.98,34.59,53.62,13.37,23.47,61.2,62.34,34.69,31.36,66.17,68.03,39.76,71.21,69.3,49.99,52.45 +mistralai/Mistral-7B-v0.1 (few-shot),7242,32,32768,True,False,1446,2.49,2.35,2.62,2.24,3.27,2.78,2.1,2.67,1.9,45.42,43.16,8.79,59.43,66.47,53.26,58.26,18.53,52.0,55.12,47.25,63.49,8.66,6.8,46.86,27.78,10.88,53.34,80.0,4.61,58.99,64.87,35.52,19.67,50.69,24.38,1.46,27.11,56.76,8.71,3.21,62.63,25.57,2.84,44.06,55.37,54.27,23.12,31.89,68.24,35.63,26.4,58.15,7.94,25.41,62.56,64.24,35.49,19.88,63.4,68.17,30.92,73.45,69.11,47.74,34.96 +mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,2.52,2.27,2.67,2.25,3.33,2.81,2.13,2.78,1.93,43.6,45.92,15.43,59.13,66.33,53.81,61.06,20.64,50.56,52.65,44.61,63.13,12.1,9.3,45.15,28.31,13.59,49.18,79.08,11.06,58.98,64.79,34.51,20.84,46.73,26.28,1.5,25.17,55.71,7.21,2.09,61.32,26.73,1.3,44.98,55.41,52.58,24.1,31.52,68.96,35.06,28.85,56.52,7.02,23.41,61.9,64.37,34.93,23.73,61.02,67.29,30.1,73.59,69.04,47.63,35.63 +NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,2.53,2.05,2.39,2.22,3.29,2.79,2.36,2.73,2.41,51.95,52.11,44.47,43.32,64.64,77.33,79.05,23.81,58.53,60.26,59.48,54.54,51.85,41.89,25.62,43.35,26.91,57.01,80.12,43.04,30.44,63.55,41.92,19.13,36.73,34.34,6.57,22.59,58.69,12.07,2.87,62.6,31.36,5.21,34.26,57.26,54.57,26.52,19.96,61.1,43.3,23.82,62.76,13.83,24.44,26.17,63.34,38.5,20.26,63.12,66.47,38.82,29.16,57.65,54.57,30.13 +ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4097,True,False,2515,2.54,2.29,2.52,2.26,3.55,2.85,2.24,2.64,1.97,44.58,47.16,19.2,58.41,65.64,32.05,47.42,28.73,49.94,52.17,53.27,63.01,17.22,12.01,45.04,24.31,30.34,44.8,75.92,24.84,56.71,63.65,26.71,30.43,37.82,-0.29,-0.12,21.59,62.35,6.54,1.83,61.47,24.35,1.44,41.54,50.43,57.84,22.58,27.96,65.08,35.21,26.48,52.32,8.46,42.42,53.12,66.0,34.34,26.46,55.76,66.89,36.6,67.55,66.51,46.22,52.23 +CohereForAI/aya-23-8B (few-shot),8028,256,8192,False,False,2707,2.58,2.34,2.7,2.28,3.6,3.05,2.47,2.27,1.94,47.08,47.16,8.41,58.83,65.03,36.64,39.24,27.29,60.94,59.61,35.73,62.45,6.18,4.0,46.52,20.14,27.5,60.04,76.21,7.54,58.6,63.0,20.97,28.96,47.16,0.33,3.84,21.75,59.16,3.7,-3.24,62.22,17.34,0.01,38.7,51.2,47.79,18.04,29.46,62.58,24.21,30.55,60.81,7.9,31.12,63.0,72.9,32.37,53.32,56.16,68.27,23.82,74.23,72.14,33.04,55.1 +mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,2.63,2.33,2.74,2.39,3.6,2.84,2.35,2.67,2.13,44.89,48.09,19.06,51.56,66.84,51.6,35.85,22.21,53.42,54.34,38.79,64.43,17.06,11.0,35.74,20.37,21.16,47.92,62.9,19.95,52.51,66.11,25.6,21.75,34.8,17.64,5.46,12.66,61.79,2.88,-1.11,61.28,32.07,1.68,39.0,55.1,47.69,24.14,23.93,67.51,26.06,31.09,55.56,12.37,21.5,50.77,67.99,22.86,24.8,62.11,59.91,30.66,58.27,69.75,34.93,44.91 +occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,2.67,2.5,2.77,2.4,3.38,3.11,2.21,2.8,2.18,40.19,42.31,1.14,57.89,66.68,44.3,48.76,15.44,45.5,45.96,44.46,63.95,0.0,0.0,52.19,20.61,16.18,47.67,71.73,7.9,57.78,65.07,25.52,14.06,40.71,14.7,0.71,20.66,65.25,5.35,0.35,60.37,8.21,0.0,43.69,52.63,43.16,27.09,34.01,69.43,32.56,23.61,53.78,7.78,16.23,63.09,66.46,28.37,15.25,56.9,62.1,20.17,75.29,69.63,38.48,27.67 +timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,2.67,2.33,2.66,2.15,3.47,3.22,2.37,3.11,2.06,42.43,47.82,16.51,56.95,65.43,50.76,50.82,14.47,48.97,51.52,49.05,63.32,14.37,9.96,44.07,25.07,15.56,44.14,80.14,34.23,57.07,65.15,33.24,25.5,36.47,1.84,2.54,18.66,63.68,5.12,8.3,58.96,8.97,0.0,39.2,50.66,54.79,20.17,27.86,65.53,27.04,17.47,54.56,8.43,10.99,55.91,57.88,25.12,10.65,59.1,68.41,25.43,71.89,67.99,44.09,32.29 +meta-llama/Llama-3.2-3B-Instruct (few-shot),3213,128,131073,False,False,10424,2.68,2.46,2.94,2.41,3.53,2.8,2.49,2.8,2.03,41.12,42.77,11.52,51.14,65.19,49.78,45.88,21.89,49.66,51.98,44.13,60.5,0.67,1.11,28.62,26.82,20.98,43.74,76.98,16.01,48.38,64.98,29.44,22.42,27.57,10.07,-1.39,22.98,62.0,13.33,0.74,58.24,32.79,1.77,45.13,55.52,50.52,9.87,20.2,66.4,33.58,28.97,43.66,12.87,17.94,47.77,66.74,33.8,21.02,68.44,66.0,32.04,49.54,69.23,45.5,46.5 +nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,2.74,2.67,2.84,2.56,3.45,2.98,2.38,2.95,2.12,49.01,47.95,32.89,0.0,62.7,51.21,62.3,33.79,63.7,62.53,34.35,61.42,31.53,22.71,0.06,35.46,36.47,48.51,78.68,29.18,0.0,65.35,40.16,39.81,43.65,10.7,10.77,0.29,65.26,23.45,8.35,67.72,42.98,6.4,0.07,60.69,53.77,38.53,0.0,62.73,45.91,47.37,60.11,11.12,32.68,0.0,64.01,41.0,39.2,72.63,65.74,43.43,0.0,66.76,60.0,69.36 +occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,2.74,2.54,2.85,2.42,3.45,3.19,2.26,2.97,2.26,37.93,44.62,0.28,58.05,66.05,38.54,45.89,12.38,45.28,46.0,44.95,63.26,0.0,0.0,43.88,20.87,13.1,49.02,76.56,2.18,58.98,64.42,23.68,14.05,40.08,16.23,1.59,15.98,62.55,5.98,-0.51,58.67,10.39,0.0,40.95,51.39,47.3,21.83,31.55,69.31,32.49,22.25,51.31,7.41,13.04,59.28,64.66,27.12,13.99,55.37,63.32,18.92,72.38,68.61,37.04,23.54 +mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32768,False,False,634,2.75,2.51,2.81,2.52,3.5,3.14,2.48,2.94,2.07,37.93,44.49,14.09,51.38,65.8,45.07,35.36,14.85,50.08,51.27,43.65,62.39,14.09,8.28,37.23,20.44,15.87,45.01,73.33,11.59,52.12,63.1,24.03,15.37,36.04,12.93,-0.36,18.06,62.8,5.44,6.35,55.42,15.85,1.11,33.54,47.19,47.26,22.32,24.36,67.75,26.79,20.33,52.72,7.91,18.14,52.75,64.77,26.06,14.26,57.58,61.44,34.92,65.38,69.62,38.4,35.72 +ibm-granite/granite-8b-code-base-4k (few-shot),8055,49,4096,True,False,2313,2.76,2.67,2.85,2.55,3.51,3.02,2.45,2.81,2.21,48.44,39.07,9.72,51.18,63.93,33.11,18.96,9.03,68.4,65.15,42.0,61.27,5.2,3.32,37.51,12.42,8.32,59.77,74.45,3.97,50.18,62.61,14.34,7.4,50.89,9.52,0.5,17.43,59.94,5.52,1.73,66.82,21.19,-0.36,36.47,59.07,49.75,14.71,29.45,67.78,16.39,10.68,63.29,13.81,8.16,56.64,63.08,16.86,6.24,72.76,62.35,21.57,69.8,67.73,25.63,16.44 +NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,2.78,2.68,2.73,2.53,3.2,3.07,2.63,2.87,2.51,50.83,53.23,23.02,0.0,57.74,77.42,82.47,30.39,65.75,70.12,41.9,53.29,47.88,35.66,0.03,46.91,33.14,69.54,79.55,28.27,0.02,55.51,48.02,24.46,62.28,38.08,2.85,0.86,57.6,48.56,16.74,65.06,48.44,3.3,0.0,67.63,55.79,24.45,1.17,55.51,44.2,31.71,69.37,14.35,29.13,0.34,52.69,50.9,34.78,76.84,67.91,30.61,0.1,58.37,48.18,34.99 +meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4096,False,False,2643,2.82,2.58,2.94,2.61,3.52,3.25,2.57,2.9,2.22,35.44,44.88,9.74,55.04,66.15,32.17,35.74,11.32,44.99,49.09,41.56,63.59,3.04,4.03,33.77,14.81,12.69,39.72,66.18,6.74,54.05,65.92,17.73,12.85,41.1,13.59,-1.07,16.13,62.3,3.16,1.84,59.77,13.24,-0.54,31.87,50.09,46.52,15.23,25.54,67.62,20.12,13.98,50.23,10.07,14.73,53.42,67.59,20.19,11.42,62.53,62.23,22.71,64.45,69.95,30.47,30.18 +"claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,2.85,2.28,3.12,2.71,3.36,3.26,2.73,2.83,2.48,59.48,56.46,20.57,38.23,64.38,53.54,69.09,21.03,69.39,62.76,3.97,62.53,31.65,5.86,36.65,16.62,2.63,57.06,59.89,9.3,39.97,65.29,16.34,0.0,34.99,31.19,-10.68,23.65,66.12,9.0,0.0,51.06,-3.58,4.1,45.29,55.59,43.73,23.74,21.36,62.97,16.53,-1.78,61.15,12.71,35.26,41.27,64.93,8.55,10.69,74.35,31.19,21.76,45.7,67.96,28.8,42.52 +ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,2.89,2.75,2.94,2.62,3.82,3.14,2.63,2.95,2.29,45.9,37.11,11.7,50.11,63.86,32.44,7.46,5.62,66.91,62.82,40.71,60.59,9.5,6.74,32.83,11.35,6.21,52.85,73.93,8.27,48.49,60.98,13.69,5.68,43.2,2.54,0.0,14.28,49.66,3.07,2.79,60.46,21.59,0.51,33.54,54.45,43.62,15.24,26.0,66.68,15.81,9.6,60.72,12.38,10.96,51.2,63.23,15.26,4.03,72.59,61.61,18.37,66.68,68.41,24.14,14.42 +ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,3.01,2.96,3.22,2.76,3.84,3.54,2.6,2.91,2.23,37.37,31.44,5.27,48.41,63.82,18.25,19.54,2.64,44.89,48.08,32.29,59.77,7.49,4.65,26.37,11.54,3.42,40.68,68.96,4.77,49.73,60.93,13.55,5.27,23.14,5.07,0.18,14.15,60.8,2.86,-1.31,45.56,7.44,0.92,20.82,47.31,48.28,14.08,28.37,61.97,22.99,20.06,49.25,9.45,11.87,54.2,64.77,21.52,22.32,52.79,65.92,16.74,64.92,65.5,33.84,49.84 +MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,3.17,3.19,3.39,2.76,3.55,3.32,3.19,3.43,2.57,28.18,29.32,2.9,56.48,53.81,27.86,34.62,4.73,36.96,39.38,32.67,51.44,2.18,5.33,45.23,9.35,4.85,41.49,75.64,0.66,57.48,55.94,10.56,5.03,31.81,18.33,3.63,16.72,58.72,12.62,3.43,40.18,14.19,0.31,41.6,32.33,26.39,1.44,28.15,58.62,14.94,8.7,36.61,8.77,3.52,59.51,54.5,14.9,7.26,47.2,64.82,7.57,73.88,67.34,16.59,11.97 +ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,3.17,3.22,3.32,2.91,4.01,3.81,2.8,3.01,2.3,31.8,6.85,0.97,49.83,63.43,15.97,17.19,3.07,40.08,43.96,31.9,59.98,-0.07,1.27,23.32,11.78,5.48,36.01,57.18,1.52,51.04,58.57,13.42,7.33,18.07,0.65,-0.72,12.27,56.49,0.32,1.0,41.27,5.4,-0.2,19.69,40.61,31.86,5.36,25.99,66.77,22.17,22.61,42.52,9.91,0.69,56.95,63.71,20.93,24.42,49.44,66.65,12.56,63.29,66.38,32.06,58.21 +openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,3.22,2.92,3.35,2.84,3.79,3.51,3.01,3.36,3.0,34.66,21.93,1.5,52.36,64.84,24.39,33.37,13.98,37.36,42.83,16.02,61.93,-0.08,2.29,31.6,8.67,9.8,35.02,51.8,6.15,50.85,65.68,10.11,8.76,28.74,4.3,0.06,17.41,60.79,0.18,-1.64,46.64,16.72,-1.54,18.69,39.39,23.6,7.68,25.3,63.84,14.99,14.99,39.24,4.25,11.48,54.18,65.55,13.37,9.13,50.73,27.52,2.96,63.42,68.74,16.65,10.69 +ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,3.26,3.09,3.32,3.05,4.04,3.63,3.06,3.33,2.59,37.21,31.54,6.3,44.86,61.56,17.92,10.79,1.7,53.78,55.14,26.21,57.11,3.9,2.42,24.86,10.36,5.85,50.1,65.67,4.55,42.83,45.16,7.58,3.79,33.57,0.6,0.0,11.27,49.32,1.37,-4.04,56.88,3.8,-0.21,13.72,49.16,35.17,9.79,22.48,60.81,6.89,4.79,48.53,10.15,4.88,45.38,59.56,7.38,2.69,58.3,59.01,10.33,65.04,67.46,14.1,10.67 +ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,3.31,2.93,3.7,3.0,3.83,3.62,3.38,3.32,2.72,38.62,35.47,5.07,45.21,62.5,13.46,15.31,6.0,53.93,54.04,23.83,50.59,3.91,1.55,2.37,8.68,6.19,51.76,70.61,6.24,44.67,41.31,7.41,5.42,38.52,4.29,0.0,12.94,58.58,2.11,-4.75,60.88,0.16,-0.35,18.54,49.38,41.72,7.67,13.7,45.88,8.73,6.18,50.88,12.39,3.31,48.44,52.5,9.22,4.45,60.64,61.2,7.63,69.83,56.62,16.29,10.37 +openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.33,2.95,3.38,3.08,3.79,3.58,3.24,3.51,3.07,29.49,13.77,0.0,51.53,66.31,24.59,39.09,15.6,34.78,39.0,10.69,62.73,6.17,5.9,31.25,7.97,10.39,37.17,20.2,6.13,46.66,65.28,10.89,9.2,26.58,-0.79,0.63,15.14,60.84,1.17,-0.12,39.78,16.03,-0.48,20.04,38.81,10.59,0.91,22.54,64.28,15.69,14.18,42.35,0.78,-0.02,47.61,65.32,14.59,11.78,44.48,23.69,8.52,56.97,68.81,15.31,9.35 +HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,3.4,3.21,3.77,3.01,3.76,3.94,3.31,3.79,2.42,29.44,18.49,1.73,44.39,61.76,22.03,12.61,2.06,37.6,38.38,24.05,48.55,3.56,2.61,13.58,9.52,3.62,37.37,64.46,4.49,43.92,54.5,8.61,4.51,26.23,6.86,2.69,10.84,60.43,1.93,3.27,40.28,3.94,-0.26,10.68,32.54,27.03,8.95,18.38,59.91,12.3,6.28,31.84,1.56,5.05,40.55,60.35,13.39,4.57,47.58,66.78,20.53,58.07,62.45,32.9,25.32 +utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,3.4,3.09,3.4,2.97,4.02,4.07,3.15,3.57,2.93,19.61,37.92,2.81,50.05,63.91,2.54,17.85,0.52,31.43,36.92,30.63,59.45,0.98,1.67,33.24,1.85,-1.18,27.41,72.24,0.13,49.77,63.23,3.39,0.54,13.29,6.83,0.92,7.49,56.76,0.59,1.87,33.53,4.25,-2.32,15.41,28.49,43.18,2.92,23.26,64.64,2.6,1.46,32.45,7.03,5.58,51.18,61.96,2.53,1.04,37.47,58.61,5.3,63.26,67.24,3.93,1.26 +HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,3.48,3.4,3.7,2.97,3.91,4.08,3.31,3.82,2.66,24.47,9.93,1.22,42.09,61.62,19.65,19.01,1.34,26.7,28.23,23.25,56.31,-0.47,0.26,13.4,11.12,2.53,35.96,68.31,3.61,43.26,57.04,10.86,2.53,20.5,10.09,0.83,10.84,57.52,3.16,-1.83,27.91,0.77,-0.48,16.56,28.67,19.69,5.07,18.43,64.25,15.15,6.23,22.84,4.6,2.55,40.33,58.31,14.32,3.87,41.57,62.32,8.04,56.01,65.06,34.02,22.81 +tiiuae/Falcon3-1B-Instruct (few-shot),1669,131,8192,True,False,9270,3.73,3.71,3.97,3.58,4.14,4.26,3.4,4.04,2.73,20.03,15.96,0.86,28.98,56.75,2.15,2.51,0.88,29.25,25.45,11.28,55.58,1.52,0.52,8.47,2.62,-0.06,26.41,25.99,1.64,21.39,58.09,3.51,1.05,9.39,6.44,-0.72,3.34,59.37,-1.22,4.2,14.18,2.36,2.29,6.35,32.54,22.27,7.18,16.72,58.08,6.57,5.16,28.25,3.73,0.76,19.08,57.2,5.45,2.34,43.0,54.47,17.44,53.15,67.33,20.49,17.5 +PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,3.82,3.45,3.93,3.6,4.03,4.04,3.84,4.0,3.69,28.3,28.95,0.2,36.39,56.6,-0.19,11.52,0.06,38.96,40.42,19.42,49.86,-0.13,0.77,4.7,-0.21,-0.56,36.29,39.68,0.96,32.64,28.38,1.35,-0.19,22.56,0.53,-0.26,11.77,53.36,0.36,0.24,38.91,-1.72,0.66,4.82,34.68,21.76,0.85,14.3,43.98,0.36,1.32,38.22,4.99,1.85,27.77,49.31,-1.45,0.83,40.45,47.89,0.28,26.77,50.31,1.03,-0.3 +PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.84,3.63,3.91,3.39,4.06,4.28,3.75,3.84,3.84,15.93,13.01,0.05,36.85,58.6,-0.09,2.94,-0.35,28.82,27.81,18.74,53.53,-0.46,-0.84,12.66,-1.29,1.29,21.42,45.75,-0.25,32.71,57.21,1.57,0.36,18.86,-0.67,-0.76,8.09,60.61,-0.78,-0.84,22.75,-0.03,-0.78,7.75,23.08,7.41,0.89,17.32,56.57,-0.63,-0.17,31.13,7.24,1.23,32.13,56.85,1.79,-0.63,27.37,36.35,-0.37,7.42,60.89,2.21,-0.9 +HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,3.97,3.86,4.25,3.88,4.19,4.27,3.84,4.12,3.34,12.68,3.61,1.79,28.12,56.85,-0.03,6.03,0.2,26.6,23.7,6.21,48.59,-0.39,0.21,4.65,-1.13,-0.51,18.22,11.52,1.72,27.27,45.57,0.69,0.68,13.43,3.82,1.14,3.71,51.93,0.95,2.9,28.14,-0.56,-0.06,2.43,19.94,19.64,0.0,8.78,57.06,0.32,-0.65,20.95,6.84,-1.5,22.67,53.89,-0.45,-0.31,31.14,43.97,3.49,47.91,62.2,0.12,0.13 +PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,3.97,3.52,4.06,3.85,4.12,4.34,4.1,3.96,3.8,16.17,29.12,-0.47,34.8,59.04,-0.22,6.94,0.52,27.47,23.82,22.22,49.84,-2.06,-0.77,2.48,0.39,-1.07,14.09,23.71,1.74,32.0,28.3,0.94,-0.48,9.9,2.13,0.02,10.64,54.83,-0.56,1.21,20.76,-1.78,0.87,3.58,24.32,15.58,1.25,6.82,44.9,-0.19,1.18,23.58,7.9,1.79,26.11,53.77,-0.41,-0.45,21.6,45.04,-0.46,33.46,49.12,2.37,-0.33 +HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.05,3.99,4.26,3.97,4.29,4.2,4.07,4.25,3.34,8.97,2.66,1.65,24.92,55.39,0.28,3.55,-0.41,20.37,21.27,7.6,49.27,1.31,0.51,4.8,-0.9,-1.0,13.64,9.34,2.2,26.06,37.49,-0.0,0.78,13.6,3.12,0.28,4.09,50.0,-0.11,2.51,26.85,3.07,-0.12,1.39,18.77,12.59,1.64,9.27,46.31,0.84,0.01,15.68,6.73,0.63,19.73,50.53,1.3,-0.36,30.73,59.51,1.55,49.03,57.73,0.11,-0.06 +HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.19,4.2,4.09,3.93,4.23,4.31,4.22,4.51,4.03,13.72,3.79,-0.45,14.69,47.44,-0.58,10.99,-0.51,24.37,24.69,8.84,53.61,-1.2,-0.5,0.16,-0.81,-0.71,19.15,-3.03,0.06,14.18,51.51,0.02,0.04,14.74,3.13,-0.25,1.35,52.66,1.21,1.69,25.51,-0.24,0.46,0.15,16.89,2.74,-0.34,0.28,54.79,-0.32,0.18,17.49,2.01,-0.02,0.53,52.46,0.33,-0.1,31.26,26.69,1.78,13.88,52.05,1.51,-0.76 +HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.22,4.02,4.21,4.12,4.32,4.31,4.31,4.54,3.97,12.11,2.61,0.25,14.02,56.53,1.05,10.81,-0.5,20.89,19.62,2.78,53.93,-0.98,0.93,0.15,-0.48,-0.53,17.09,7.41,0.47,11.73,38.3,0.3,0.06,13.7,3.01,-0.83,0.94,50.3,1.1,-0.07,23.22,3.78,0.41,0.54,15.54,2.51,0.36,1.77,49.41,-1.46,-0.18,15.82,-0.62,1.16,3.25,54.82,1.36,0.34,29.96,18.64,1.85,26.9,52.96,1.34,0.1 +PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.22,4.13,4.3,4.12,4.23,4.24,4.15,4.52,4.08,13.84,9.47,-0.36,22.1,45.28,-0.27,2.54,-0.66,26.59,26.78,7.91,45.83,0.28,0.04,0.65,0.57,-0.66,22.09,14.15,-0.04,21.6,27.08,-0.65,0.48,17.73,2.38,-0.18,1.59,52.81,0.89,-1.11,31.99,0.0,0.48,0.29,25.3,13.9,-0.25,6.12,41.9,-1.81,0.0,24.47,3.57,-2.03,10.18,44.43,-0.11,-0.01,31.79,19.13,-0.03,12.35,46.13,-1.2,0.47 +PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.26,3.99,4.42,4.19,4.47,4.38,4.16,4.31,4.2,10.59,13.31,0.52,16.61,53.52,0.89,4.3,-0.01,25.02,21.59,8.05,43.81,-0.15,-0.97,0.37,-0.31,-0.61,16.28,17.38,-0.45,17.78,27.12,-1.38,0.94,13.8,2.17,-0.63,1.29,43.4,1.16,1.21,22.55,0.67,0.87,0.27,21.03,10.99,0.13,3.61,43.73,-0.77,0.11,21.32,4.37,-0.19,9.38,50.94,0.33,-0.81,27.45,27.39,0.31,15.62,43.83,0.65,-0.44 +ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.71,4.67,4.67,4.42,4.8,4.7,4.7,4.95,4.81,0.0,0.0,0.0,0.02,42.82,4.55,-1.17,-1.26,0.0,0.0,0.0,41.98,0.0,0.0,0.0,1.14,0.64,0.0,0.0,0.0,0.0,40.82,1.19,1.55,0.0,-1.3,0.0,0.0,36.58,-0.0,0.0,0.0,0.74,0.0,0.0,0.0,0.0,0.0,0.0,37.66,0.87,1.95,0.0,0.0,0.0,0.0,43.74,0.32,1.73,0.0,0.0,2.48,0.01,44.8,-0.69,0.37 diff --git a/germanic-nlg.md b/germanic-nlg.md index 1486f9a6..dc4ca773 100644 --- a/germanic-nlg.md +++ b/germanic-nlg.md @@ -3,7 +3,7 @@ layout: leaderboard title: Germanic NLG 🇪🇺 --- -
Last updated: 10/01/2025 12:30:56 CET
+
Last updated: 11/01/2025 11:04:06 CET
@@ -154,14 +154,14 @@ title: Germanic NLG 🇪🇺 128000 True 576 ± 221 / 81 ± 28 - 1.25 + 1.24 1.14 - 1.29 + 1.28 1.08 1.12 - 1.30 + 1.29 1.30 - 1.53 + 1.52 1.21 66.80 ± 3.01 / 45.69 ± 2.85 61.62 ± 2.17 / 73.99 ± 1.48 @@ -285,12 +285,12 @@ title: Germanic NLG 🇪🇺 799 ± 246 / 112 ± 38 1.39 1.13 - 1.48 - 1.09 + 1.47 + 1.08 1.69 1.65 - 1.27 - 1.64 + 1.26 + 1.63 1.19 71.94 ± 1.37 / 61.59 ± 1.44 61.26 ± 1.20 / 73.27 ± 0.95 @@ -414,12 +414,12 @@ title: Germanic NLG 🇪🇺 916 ± 329 / 114 ± 38 1.44 1.20 - 1.26 + 1.25 1.09 1.20 2.46 1.37 - 1.65 + 1.64 1.31 71.15 ± 2.89 / 52.24 ± 3.76 49.42 ± 3.29 / 61.74 ± 2.59 @@ -547,8 +547,8 @@ title: Germanic NLG 🇪🇺 1.23 1.81 1.69 - 1.34 - 1.72 + 1.33 + 1.71 1.27 68.57 ± 0.95 / 50.90 ± 2.30 60.52 ± 1.22 / 72.91 ± 1.05 @@ -672,12 +672,12 @@ title: Germanic NLG 🇪🇺 1,353 ± 443 / 180 ± 61 1.58 1.34 - 1.66 + 1.65 1.25 1.92 1.81 1.50 - 1.83 + 1.82 1.35 67.07 ± 1.22 / 48.82 ± 2.09 59.89 ± 0.97 / 72.51 ± 0.84 @@ -801,12 +801,12 @@ title: Germanic NLG 🇪🇺 1,219 ± 412 / 158 ± 53 1.59 1.41 - 1.72 - 1.34 + 1.71 + 1.33 2.07 - 1.66 + 1.65 1.37 - 1.84 + 1.83 1.32 63.81 ± 1.27 / 42.18 ± 2.59 54.50 ± 1.55 / 69.89 ± 1.02 @@ -928,14 +928,14 @@ title: Germanic NLG 🇪🇺 8191 True 637 ± 306 / 92 ± 31 - 1.61 - 1.40 - 1.69 + 1.60 + 1.39 + 1.68 1.24 1.82 - 1.88 + 1.87 1.60 - 1.83 + 1.82 1.39 64.80 ± 1.56 / 45.57 ± 1.47 53.07 ± 1.45 / 68.64 ± 1.04 @@ -1059,12 +1059,12 @@ title: Germanic NLG 🇪🇺 2,258 ± 1,221 / 198 ± 67 1.65 1.52 - 1.85 + 1.83 1.39 2.24 1.97 1.24 - 1.82 + 1.81 1.20 64.66 ± 1.67 / 48.66 ± 3.44 53.42 ± 1.21 / 66.49 ± 1.21 @@ -1188,12 +1188,12 @@ title: Germanic NLG 🇪🇺 3,633 ± 1,236 / 777 ± 220 1.71 1.41 - 1.77 + 1.76 1.50 - 2.18 + 2.17 1.82 1.56 - 2.04 + 2.03 1.44 66.50 ± 2.00 / 45.33 ± 2.92 58.93 ± 1.72 / 72.70 ± 1.38 @@ -1317,12 +1317,12 @@ title: Germanic NLG 🇪🇺 436 ± 152 / 57 ± 21 1.81 1.69 - 2.00 + 1.99 1.51 - 1.90 - 2.23 + 1.89 + 2.22 1.53 - 1.96 + 1.95 1.67 35.79 ± 2.45 / 25.86 ± 1.74 53.69 ± 2.27 / 67.91 ± 1.55 @@ -1449,9 +1449,9 @@ title: Germanic NLG 🇪🇺 1.80 1.87 1.83 - 2.20 - 1.73 - 1.85 + 2.19 + 1.71 + 1.83 1.53 59.96 ± 1.64 / 41.55 ± 2.90 56.91 ± 2.34 / 71.25 ± 1.60 @@ -1575,12 +1575,12 @@ title: Germanic NLG 🇪🇺 908 ± 303 / 96 ± 36 1.89 1.63 - 2.09 + 2.07 1.77 2.22 - 2.09 + 2.08 1.73 - 2.02 + 2.01 1.58 57.52 ± 2.21 / 35.79 ± 1.78 49.73 ± 3.09 / 65.95 ± 2.22 @@ -1704,12 +1704,12 @@ title: Germanic NLG 🇪🇺 1,909 ± 646 / 248 ± 84 2.00 1.89 - 2.32 + 2.31 1.87 2.68 2.00 1.75 - 1.97 + 1.96 1.54 63.63 ± 2.58 / 40.18 ± 3.60 50.82 ± 1.22 / 64.57 ± 1.50 @@ -1832,11 +1832,11 @@ title: Germanic NLG 🇪🇺 True 1,208 ± 412 / 156 ± 53 2.07 - 2.02 - 2.22 + 2.01 + 2.21 1.69 2.67 - 2.00 + 1.99 2.01 2.25 1.72 @@ -1962,13 +1962,13 @@ title: Germanic NLG 🇪🇺 193 ± 87 / 55 ± 19 2.11 1.64 - 2.20 + 2.18 1.87 2.35 - 2.03 + 2.02 2.26 - 2.53 - 2.03 + 2.52 + 2.04 65.88 ± 2.11 / 55.11 ± 1.59 63.61 ± 1.57 / 76.00 ± 1.07 71.03 ± 2.52 / 85.46 ± 1.24 @@ -2089,15 +2089,15 @@ title: Germanic NLG 🇪🇺 4096 True 1,904 ± 475 / 361 ± 121 - 2.18 + 2.17 2.04 - 2.13 + 2.11 1.92 - 2.10 - 2.56 + 2.09 + 2.54 2.08 - 2.65 - 1.94 + 2.64 + 1.95 48.24 ± 9.94 / 31.90 ± 6.18 39.52 ± 7.04 / 57.63 ± 7.05 62.92 ± 7.59 / 79.60 ± 5.59 @@ -2211,148 +2211,19 @@ title: Germanic NLG 🇪🇺 14.0.4 14.0.4 - - meta-llama/Llama-3.1-8B-Instruct (few-shot) - 8030 - 128 - 131072 - True - 1,005 ± 330 / 196 ± 74 - 2.20 - 1.98 - 2.36 - 2.04 - 2.79 - 2.42 - 1.85 - 2.46 - 1.73 - 54.70 ± 1.69 / 38.11 ± 2.31 - 54.81 ± 1.51 / 67.88 ± 1.39 - 32.11 ± 1.93 / 63.11 ± 1.61 - 48.87 ± 1.18 / 59.47 ± 0.67 - 66.79 ± 0.20 / 19.88 ± 0.44 - 56.14 ± 2.09 / 67.01 ± 1.58 - 63.54 ± 2.14 / 75.61 ± 1.50 - 39.48 ± 2.00 / 54.53 ± 1.52 - 64.55 ± 1.69 / 56.81 ± 2.50 - 66.44 ± 1.38 / 60.02 ± 3.36 - 35.17 ± 0.32 / 38.11 ± 0.29 - 64.48 ± 0.14 / 15.14 ± 0.38 - 27.41 ± 1.97 / 54.94 ± 2.06 - 15.60 ± 2.05 / 46.51 ± 2.41 - 43.11 ± 2.22 / 69.74 ± 1.60 - 38.10 ± 0.57 / 52.64 ± 0.47 - 39.30 ± 1.01 / 54.03 ± 0.82 - 55.80 ± 2.68 / 34.65 ± 1.98 - 79.23 ± 0.48 / 76.86 ± 0.80 - 32.67 ± 2.18 / 63.89 ± 1.49 - 46.88 ± 1.47 / 58.66 ± 0.84 - 66.43 ± 0.07 / 18.43 ± 0.14 - 36.35 ± 1.43 / 51.97 ± 1.14 - 37.89 ± 1.45 / 53.38 ± 1.09 - 46.48 ± 1.98 / 24.57 ± 1.73 - 39.91 ± 2.35 / 57.39 ± 1.64 - 11.72 ± 1.81 / 51.67 ± 1.45 - 25.91 ± 0.99 / 53.39 ± 1.83 - 67.67 ± 0.21 / 18.97 ± 0.43 - 25.83 ± 1.36 / 44.48 ± 1.05 - 6.40 ± 3.07 / 50.66 ± 1.69 - 67.67 ± 1.31 / 59.86 ± 2.12 - 48.54 ± 2.43 / 49.17 ± 1.48 - 3.89 ± 2.71 / 36.33 ± 1.50 - 47.07 ± 2.63 / 69.50 ± 1.68 - 67.61 ± 1.23 / 60.39 ± 1.02 - 58.07 ± 2.32 / 70.76 ± 1.84 - 28.25 ± 3.57 / 59.54 ± 3.88 - 28.79 ± 2.02 / 55.82 ± 3.28 - 66.87 ± 0.54 / 21.31 ± 1.47 - 40.00 ± 0.97 / 54.88 ± 0.70 - 45.93 ± 1.38 / 58.95 ± 1.06 - 69.76 ± 1.36 / 57.66 ± 1.36 - 9.09 ± 1.42 / 20.14 ± 0.84 - 37.58 ± 3.42 / 66.98 ± 2.22 - 41.26 ± 2.09 / 65.63 ± 0.90 - 68.84 ± 0.62 / 21.41 ± 1.20 - 40.98 ± 0.71 / 55.51 ± 0.59 - 39.98 ± 1.59 / 54.32 ± 1.27 - 76.95 ± 0.95 / 72.47 ± 0.82 - 68.12 ± 0.92 / 72.48 ± 0.53 - 34.34 ± 3.37 / 65.84 ± 1.59 - 47.88 ± 3.37 / 76.21 ± 1.69 - 69.57 ± 0.25 / 26.30 ± 0.35 - 56.62 ± 0.49 / 67.29 ± 0.39 - 69.03 ± 1.19 / 76.69 ± 0.89 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 13.0.0 - 13.0.0 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.1 - 14.0.1 - 14.0.1 - 14.0.1 - 14.0.1 - 14.0.2 - 14.0.2 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 13.0.0 - 14.0.4 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 14.0.4 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - meta-llama/Meta-Llama-3-8B-Instruct (few-shot) 8030 128 8192 True - 1,007 ± 316 / 162 ± 45 - 2.21 + 1,483 ± 377 / 287 ± 97 + 2.22 2.14 - 2.40 + 2.43 2.08 2.73 - 2.27 + 2.26 1.94 2.33 1.82 @@ -2364,15 +2235,15 @@ title: Germanic NLG 🇪🇺 57.87 ± 1.67 / 67.43 ± 1.34 50.42 ± 3.32 / 65.43 ± 2.41 29.17 ± 2.24 / 44.59 ± 2.00 - 74.47 ± 1.47 / 65.57 ± 2.39 - 72.93 ± 1.00 / 65.44 ± 2.55 - 34.44 ± 0.42 / 37.94 ± 0.39 - 63.98 ± 0.50 / 14.75 ± 0.79 - 27.77 ± 1.63 / 61.75 ± 1.77 - 20.35 ± 1.92 / 57.74 ± 2.28 - 42.90 ± 3.57 / 69.90 ± 3.17 - 33.44 ± 0.67 / 48.76 ± 0.58 - 30.91 ± 1.88 / 45.85 ± 1.93 + 66.56 ± 1.70 / 58.47 ± 2.40 + 68.29 ± 1.47 / 61.22 ± 2.25 + 34.47 ± 0.44 / 37.96 ± 0.40 + 63.80 ± 0.09 / 13.37 ± 0.15 + 28.22 ± 1.37 / 59.51 ± 1.44 + 18.21 ± 2.21 / 52.84 ± 2.30 + 47.34 ± 1.99 / 73.23 ± 1.29 + 30.78 ± 0.81 / 47.33 ± 0.75 + 31.49 ± 1.29 / 48.15 ± 0.99 69.67 ± 1.30 / 52.94 ± 4.01 59.93 ± 4.70 / 67.54 ± 3.04 27.63 ± 3.19 / 60.85 ± 3.29 @@ -2420,15 +2291,15 @@ title: Germanic NLG 🇪🇺 12.6.1 12.6.1 12.6.1 - 12.6.1 - 12.6.1 - 14.0.4 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 12.6.1 12.6.1 12.6.1 @@ -2478,10 +2349,10 @@ title: Germanic NLG 🇪🇺 2,363 ± 794 / 311 ± 105 2.22 1.99 - 2.46 + 2.45 2.05 2.78 - 2.42 + 2.41 1.95 2.28 1.82 @@ -2598,6 +2469,135 @@ title: Germanic NLG 🇪🇺 14.0.4 14.0.4 + + meta-llama/Llama-3.1-8B-Instruct (few-shot) + 8030 + 128 + 131072 + True + 1,473 ± 377 / 283 ± 96 + 2.25 + 1.98 + 2.39 + 2.04 + 2.79 + 2.42 + 1.99 + 2.66 + 1.73 + 54.70 ± 1.69 / 38.11 ± 2.31 + 54.81 ± 1.51 / 67.88 ± 1.39 + 32.11 ± 1.93 / 63.11 ± 1.61 + 48.87 ± 1.18 / 59.47 ± 0.67 + 66.79 ± 0.20 / 19.88 ± 0.44 + 56.14 ± 2.09 / 67.01 ± 1.58 + 63.54 ± 2.14 / 75.61 ± 1.50 + 39.48 ± 2.00 / 54.53 ± 1.52 + 64.55 ± 1.69 / 56.81 ± 2.50 + 66.44 ± 1.38 / 60.02 ± 3.36 + 35.17 ± 0.32 / 38.11 ± 0.29 + 64.48 ± 0.14 / 15.14 ± 0.38 + 27.41 ± 1.97 / 54.94 ± 2.06 + 15.60 ± 2.05 / 46.51 ± 2.41 + 43.11 ± 2.22 / 69.74 ± 1.60 + 35.02 ± 1.24 / 51.13 ± 0.94 + 37.61 ± 0.88 / 53.12 ± 0.65 + 55.80 ± 2.68 / 34.65 ± 1.98 + 79.23 ± 0.48 / 76.86 ± 0.80 + 32.67 ± 2.18 / 63.89 ± 1.49 + 46.88 ± 1.47 / 58.66 ± 0.84 + 66.43 ± 0.07 / 18.43 ± 0.14 + 36.35 ± 1.43 / 51.97 ± 1.14 + 37.89 ± 1.45 / 53.38 ± 1.09 + 46.48 ± 1.98 / 24.57 ± 1.73 + 39.91 ± 2.35 / 57.39 ± 1.64 + 11.72 ± 1.81 / 51.67 ± 1.45 + 25.91 ± 0.99 / 53.39 ± 1.83 + 67.67 ± 0.21 / 18.97 ± 0.43 + 25.83 ± 1.36 / 44.48 ± 1.05 + 6.40 ± 3.07 / 50.66 ± 1.69 + 67.67 ± 1.31 / 59.86 ± 2.12 + 48.54 ± 2.43 / 49.17 ± 1.48 + 3.89 ± 2.71 / 36.33 ± 1.50 + 47.07 ± 2.63 / 69.50 ± 1.68 + 62.03 ± 1.17 / 45.31 ± 1.89 + 58.15 ± 2.40 / 70.81 ± 1.86 + 30.18 ± 1.92 / 63.41 ± 1.19 + 26.48 ± 0.98 / 52.41 ± 1.48 + 64.68 ± 0.10 / 15.03 ± 0.29 + 39.36 ± 0.79 / 54.48 ± 0.57 + 47.21 ± 1.02 / 60.39 ± 0.77 + 61.68 ± 1.94 / 42.64 ± 1.85 + 8.97 ± 1.44 / 20.07 ± 0.82 + 36.57 ± 1.77 / 65.25 ± 1.94 + 33.88 ± 1.83 / 62.17 ± 0.91 + 64.80 ± 0.38 / 16.87 ± 0.29 + 37.45 ± 0.61 / 53.03 ± 0.48 + 42.45 ± 1.72 / 56.78 ± 1.31 + 76.95 ± 0.95 / 72.47 ± 0.82 + 68.12 ± 0.92 / 72.48 ± 0.53 + 34.34 ± 3.37 / 65.84 ± 1.59 + 47.88 ± 3.37 / 76.21 ± 1.69 + 69.57 ± 0.25 / 26.30 ± 0.35 + 56.62 ± 0.49 / 67.29 ± 0.39 + 69.03 ± 1.19 / 76.69 ± 0.89 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.1 + 14.0.1 + 14.0.1 + 14.0.1 + 14.0.1 + 14.0.2 + 14.0.2 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + mistralai/Ministral-8B-Instruct-2410 (few-shot) 8020 @@ -2607,12 +2607,12 @@ title: Germanic NLG 🇪🇺 1,302 ± 323 / 253 ± 86 2.28 2.18 - 2.48 - 2.14 + 2.47 + 2.13 3.00 - 2.72 + 2.71 1.96 - 2.12 + 2.15 1.63 55.49 ± 2.05 / 34.11 ± 1.79 49.18 ± 1.89 / 65.27 ± 1.69 @@ -2656,10 +2656,10 @@ title: Germanic NLG 🇪🇺 64.03 ± 0.05 / 12.90 ± 0.19 39.61 ± 0.90 / 54.41 ± 0.67 60.04 ± 1.94 / 69.43 ± 1.54 - 66.51 ± 1.38 / 52.40 ± 2.62 - 11.91 ± 1.03 / 34.21 ± 1.08 - 34.46 ± 2.79 / 65.61 ± 2.58 - 59.23 ± 1.16 / 72.56 ± 0.80 + 63.30 ± 2.36 / 39.20 ± 2.16 + 11.82 ± 1.07 / 34.18 ± 1.11 + 32.20 ± 0.77 / 65.67 ± 0.69 + 59.45 ± 0.89 / 71.13 ± 0.60 68.87 ± 0.72 / 21.06 ± 1.13 40.81 ± 1.05 / 55.44 ± 0.80 60.68 ± 1.04 / 70.24 ± 0.85 @@ -2712,10 +2712,10 @@ title: Germanic NLG 🇪🇺 14.1.2 14.1.2 14.1.2 - 13.0.0 - 14.0.4 - 13.0.0 - 13.0.0 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 13.0.0 13.0.0 13.0.0 @@ -2736,12 +2736,12 @@ title: Germanic NLG 🇪🇺 5,535 ± 1,837 / 760 ± 256 2.29 2.06 - 2.52 - 2.05 + 2.51 + 2.04 3.14 - 2.59 + 2.58 1.92 - 2.33 + 2.32 1.72 51.20 ± 3.31 / 30.53 ± 2.46 50.95 ± 1.15 / 66.13 ± 0.87 @@ -2856,135 +2856,6 @@ title: Germanic NLG 🇪🇺 14.0.4 14.0.4 - - CohereForAI/aya-expanse-8b (few-shot) - 8028 - 256 - 8192 - False - 2,686 ± 685 / 491 ± 164 - 2.31 - 2.17 - 2.40 - 2.08 - 3.32 - 2.75 - 2.01 - 2.01 - 1.72 - 51.32 ± 3.82 / 25.54 ± 2.10 - 52.00 ± 1.67 / 66.25 ± 1.77 - 18.48 ± 2.44 / 52.18 ± 4.28 - 52.43 ± 1.19 / 62.08 ± 0.60 - 66.18 ± 0.84 / 21.10 ± 0.69 - 41.32 ± 1.11 / 55.43 ± 0.86 - 52.24 ± 1.55 / 68.01 ± 1.05 - 37.67 ± 0.80 / 53.26 ± 0.60 - 66.55 ± 2.12 / 39.28 ± 3.45 - 63.63 ± 1.62 / 37.25 ± 3.49 - 38.61 ± 2.28 / 51.46 ± 2.62 - 64.48 ± 0.61 / 17.46 ± 0.76 - 15.80 ± 2.22 / 51.42 ± 3.79 - 12.30 ± 2.38 / 51.96 ± 3.31 - 43.26 ± 2.53 / 71.49 ± 2.01 - 36.48 ± 0.85 / 52.32 ± 0.62 - 35.85 ± 1.48 / 51.83 ± 1.11 - 57.38 ± 1.93 / 29.69 ± 4.23 - 78.43 ± 0.93 / 74.54 ± 2.40 - 14.52 ± 2.43 / 45.18 ± 4.21 - 53.14 ± 1.81 / 63.00 ± 0.50 - 65.69 ± 0.22 / 19.95 ± 0.16 - 37.32 ± 0.70 / 52.95 ± 0.50 - 38.28 ± 1.31 / 53.70 ± 0.97 - 28.98 ± 2.63 / 21.75 ± 1.89 - 19.83 ± 4.76 / 41.64 ± 3.64 - 4.93 ± 1.06 / 49.69 ± 2.65 - 24.72 ± 2.22 / 54.41 ± 1.43 - 63.45 ± 1.92 / 16.81 ± 2.23 - 10.97 ± 1.13 / 33.51 ± 0.81 - 4.23 ± 1.80 / 49.31 ± 0.84 - 64.72 ± 1.73 / 47.25 ± 4.90 - 28.57 ± 4.08 / 50.22 ± 2.92 - 5.12 ± 1.16 / 51.09 ± 0.89 - 38.83 ± 3.81 / 56.89 ± 3.51 - 59.95 ± 1.43 / 39.14 ± 2.24 - 55.39 ± 1.97 / 69.86 ± 1.34 - 30.59 ± 1.76 / 64.21 ± 1.65 - 26.94 ± 1.06 / 57.13 ± 1.35 - 66.11 ± 0.37 / 18.21 ± 0.62 - 38.63 ± 1.03 / 54.04 ± 0.73 - 48.53 ± 1.17 / 61.25 ± 0.85 - 62.07 ± 1.67 / 37.68 ± 1.28 - 13.70 ± 1.36 / 34.90 ± 0.68 - 35.14 ± 2.33 / 66.66 ± 1.50 - 49.15 ± 1.48 / 68.82 ± 0.68 - 74.40 ± 0.20 / 31.66 ± 0.46 - 46.76 ± 0.71 / 60.01 ± 0.54 - 60.36 ± 0.88 / 70.23 ± 0.64 - 67.33 ± 1.57 / 53.00 ± 0.88 - 68.67 ± 0.74 / 66.23 ± 0.49 - 31.18 ± 1.63 / 65.23 ± 0.69 - 68.33 ± 2.04 / 84.26 ± 1.04 - 72.23 ± 0.14 / 30.67 ± 0.16 - 49.41 ± 1.09 / 62.03 ± 0.82 - 58.93 ± 1.50 / 68.81 ± 1.21 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.3.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 13.0.0 - 14.0.4 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - nvidia/mistral-nemo-minitron-8b-base (few-shot) 8414 @@ -2999,7 +2870,7 @@ title: Germanic NLG 🇪🇺 2.78 2.50 2.10 - 2.52 + 2.50 1.76 49.85 ± 2.30 / 33.79 ± 3.10 49.52 ± 1.96 / 66.14 ± 1.58 @@ -3115,133 +2986,133 @@ title: Germanic NLG 🇪🇺 14.1.1 - meta-llama/Llama-3.1-8B (few-shot) - 8030 - 128 - 131072 - True - 2,986 ± 823 / 276 ± 94 - 2.35 - 2.20 - 2.62 - 2.17 - 2.81 - 2.80 - 2.08 - 2.30 - 1.85 - 50.92 ± 1.88 / 34.24 ± 2.85 - 47.86 ± 1.66 / 62.47 ± 1.97 - 29.19 ± 2.02 / 59.61 ± 3.20 - 48.38 ± 5.07 / 55.27 ± 4.71 - 64.89 ± 0.69 / 18.31 ± 1.03 - 51.88 ± 2.75 / 63.27 ± 2.28 - 62.31 ± 1.65 / 74.08 ± 1.29 - 27.37 ± 2.18 / 44.83 ± 1.77 - 65.17 ± 2.02 / 52.91 ± 2.25 - 60.22 ± 2.29 / 50.51 ± 3.15 - 34.02 ± 0.93 / 37.19 ± 0.86 - 62.16 ± 0.46 / 14.17 ± 0.62 - 32.48 ± 3.31 / 63.48 ± 2.97 - 18.38 ± 4.77 / 49.87 ± 5.55 - 33.06 ± 4.39 / 58.56 ± 4.32 - 32.40 ± 1.19 / 48.21 ± 0.92 - 24.89 ± 2.57 / 43.29 ± 1.79 - 62.19 ± 2.34 / 43.29 ± 4.62 - 80.31 ± 0.45 / 76.50 ± 1.19 - 30.29 ± 3.08 / 56.52 ± 4.71 - 42.78 ± 7.55 / 49.56 ± 7.91 - 64.14 ± 0.58 / 18.03 ± 0.51 - 35.10 ± 1.17 / 50.10 ± 0.98 - 23.18 ± 2.01 / 41.44 ± 1.95 - 52.97 ± 1.54 / 38.17 ± 4.65 - 41.29 ± 2.30 / 59.30 ± 1.75 - 5.95 ± 2.81 / 40.86 ± 3.68 - 31.99 ± 2.60 / 60.26 ± 1.51 - 65.19 ± 0.73 / 17.47 ± 0.61 - 21.56 ± 1.35 / 40.99 ± 1.01 - 4.44 ± 1.77 / 54.52 ± 1.51 - 65.65 ± 3.31 / 63.81 ± 2.72 - 24.30 ± 7.03 / 35.85 ± 7.35 - 0.61 ± 1.75 / 35.51 ± 2.23 - 45.01 ± 1.79 / 65.74 ± 1.54 - 59.45 ± 1.64 / 46.60 ± 2.02 - 53.39 ± 5.35 / 65.74 ± 5.74 - 23.87 ± 5.74 / 57.17 ± 6.01 - 27.14 ± 2.19 / 54.63 ± 2.74 - 69.58 ± 1.37 / 28.95 ± 2.96 - 38.77 ± 0.95 / 53.99 ± 0.67 - 34.29 ± 1.05 / 49.79 ± 0.88 - 64.79 ± 1.96 / 45.48 ± 2.24 - 11.95 ± 2.83 / 37.12 ± 2.19 - 32.97 ± 2.68 / 58.52 ± 2.92 - 63.89 ± 1.06 / 74.73 ± 1.02 - 66.29 ± 1.29 / 20.14 ± 1.64 - 38.44 ± 1.33 / 53.68 ± 1.01 - 30.88 ± 2.27 / 47.18 ± 1.81 - 69.86 ± 2.10 / 62.68 ± 2.21 - 66.76 ± 0.72 / 68.58 ± 0.72 - 30.96 ± 2.46 / 61.29 ± 3.61 - 71.39 ± 2.20 / 84.24 ± 1.55 - 67.93 ± 0.44 / 22.00 ± 0.51 - 52.47 ± 0.85 / 64.25 ± 0.66 - 43.95 ± 3.26 / 57.04 ± 2.74 - 14.0.3 - 14.1.2 - 14.1.2 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.0 - 14.1.2 - 14.1.2 - 14.0.0 - 14.0.0 - 14.0.0 - 14.0.0 + CohereForAI/aya-expanse-8b (few-shot) + 8028 + 256 + 8192 + False + 2,686 ± 685 / 491 ± 164 + 2.33 + 2.17 + 2.39 + 2.08 + 3.32 + 2.74 + 2.00 + 2.20 + 1.72 + 51.32 ± 3.82 / 25.54 ± 2.10 + 52.00 ± 1.67 / 66.25 ± 1.77 + 18.48 ± 2.44 / 52.18 ± 4.28 + 52.43 ± 1.19 / 62.08 ± 0.60 + 66.18 ± 0.84 / 21.10 ± 0.69 + 41.32 ± 1.11 / 55.43 ± 0.86 + 52.24 ± 1.55 / 68.01 ± 1.05 + 37.67 ± 0.80 / 53.26 ± 0.60 + 66.55 ± 2.12 / 39.28 ± 3.45 + 63.63 ± 1.62 / 37.25 ± 3.49 + 38.61 ± 2.28 / 51.46 ± 2.62 + 64.48 ± 0.61 / 17.46 ± 0.76 + 15.80 ± 2.22 / 51.42 ± 3.79 + 12.30 ± 2.38 / 51.96 ± 3.31 + 43.26 ± 2.53 / 71.49 ± 2.01 + 36.48 ± 0.85 / 52.32 ± 0.62 + 35.85 ± 1.48 / 51.83 ± 1.11 + 57.38 ± 1.93 / 29.69 ± 4.23 + 78.43 ± 0.93 / 74.54 ± 2.40 + 14.52 ± 2.43 / 45.18 ± 4.21 + 53.14 ± 1.81 / 63.00 ± 0.50 + 65.69 ± 0.22 / 19.95 ± 0.16 + 37.32 ± 0.70 / 52.95 ± 0.50 + 38.28 ± 1.31 / 53.70 ± 0.97 + 28.98 ± 2.63 / 21.75 ± 1.89 + 19.83 ± 4.76 / 41.64 ± 3.64 + 4.93 ± 1.06 / 49.69 ± 2.65 + 24.72 ± 2.22 / 54.41 ± 1.43 + 63.45 ± 1.92 / 16.81 ± 2.23 + 10.97 ± 1.13 / 33.51 ± 0.81 + 4.23 ± 1.80 / 49.31 ± 0.84 + 64.72 ± 1.73 / 47.25 ± 4.90 + 28.57 ± 4.08 / 50.22 ± 2.92 + 5.12 ± 1.16 / 51.09 ± 0.89 + 38.83 ± 3.81 / 56.89 ± 3.51 + 59.95 ± 1.43 / 39.14 ± 2.24 + 55.39 ± 1.97 / 69.86 ± 1.34 + 30.59 ± 1.76 / 64.21 ± 1.65 + 26.94 ± 1.06 / 57.13 ± 1.35 + 66.11 ± 0.37 / 18.21 ± 0.62 + 38.63 ± 1.03 / 54.04 ± 0.73 + 48.53 ± 1.17 / 61.25 ± 0.85 + 53.02 ± 1.86 / 30.09 ± 1.16 + 13.68 ± 1.32 / 34.87 ± 0.67 + 29.97 ± 2.13 / 64.01 ± 1.12 + 53.40 ± 1.34 / 69.31 ± 0.65 + 73.93 ± 0.30 / 30.81 ± 0.43 + 38.81 ± 1.13 / 53.87 ± 0.90 + 40.95 ± 1.72 / 55.25 ± 1.38 + 67.33 ± 1.57 / 53.00 ± 0.88 + 68.67 ± 0.74 / 66.23 ± 0.49 + 31.18 ± 1.63 / 65.23 ± 0.69 + 68.33 ± 2.04 / 84.26 ± 1.04 + 72.23 ± 0.14 / 30.67 ± 0.16 + 49.41 ± 1.09 / 62.03 ± 0.82 + 58.93 ± 1.50 / 68.81 ± 1.21 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.3.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 14.0.4 - 14.1.2 - 14.1.2 + 14.0.4 + 14.0.4 14.0.4 14.1.2 14.1.2 14.1.2 14.1.2 - 13.0.0 - 13.0.0 - 13.0.0 - 12.11.0 - 12.11.0 - 12.11.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 12.11.0 - 12.11.0 - 12.11.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot) @@ -3252,12 +3123,12 @@ title: Germanic NLG 🇪🇺 9,015 ± 2,966 / 1,121 ± 510 2.37 2.26 - 2.28 + 2.27 1.95 3.12 - 2.69 + 2.68 2.24 - 2.61 + 2.60 1.80 49.18 ± 2.27 / 32.04 ± 1.45 49.76 ± 1.76 / 64.54 ± 1.57 @@ -3347,30 +3218,159 @@ title: Germanic NLG 🇪🇺 14.0.4 14.0.4 14.0.4 - 14.0.4 - 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + + + meta-llama/Llama-3.1-8B (few-shot) + 8030 + 128 + 131072 + True + 2,986 ± 823 / 276 ± 94 + 2.37 + 2.20 + 2.62 + 2.17 + 2.81 + 2.80 + 2.22 + 2.31 + 1.85 + 50.92 ± 1.88 / 34.24 ± 2.85 + 47.86 ± 1.66 / 62.47 ± 1.97 + 29.19 ± 2.02 / 59.61 ± 3.20 + 48.38 ± 5.07 / 55.27 ± 4.71 + 64.89 ± 0.69 / 18.31 ± 1.03 + 51.88 ± 2.75 / 63.27 ± 2.28 + 62.31 ± 1.65 / 74.08 ± 1.29 + 27.37 ± 2.18 / 44.83 ± 1.77 + 65.17 ± 2.02 / 52.91 ± 2.25 + 60.22 ± 2.29 / 50.51 ± 3.15 + 34.02 ± 0.93 / 37.19 ± 0.86 + 62.16 ± 0.46 / 14.17 ± 0.62 + 32.48 ± 3.31 / 63.48 ± 2.97 + 18.38 ± 4.77 / 49.87 ± 5.55 + 33.06 ± 4.39 / 58.56 ± 4.32 + 32.40 ± 1.19 / 48.21 ± 0.92 + 24.89 ± 2.57 / 43.29 ± 1.79 + 62.19 ± 2.34 / 43.29 ± 4.62 + 80.31 ± 0.45 / 76.50 ± 1.19 + 30.29 ± 3.08 / 56.52 ± 4.71 + 42.78 ± 7.55 / 49.56 ± 7.91 + 64.14 ± 0.58 / 18.03 ± 0.51 + 35.10 ± 1.17 / 50.10 ± 0.98 + 23.18 ± 2.01 / 41.44 ± 1.95 + 52.97 ± 1.54 / 38.17 ± 4.65 + 41.29 ± 2.30 / 59.30 ± 1.75 + 5.95 ± 2.81 / 40.86 ± 3.68 + 31.99 ± 2.60 / 60.26 ± 1.51 + 65.19 ± 0.73 / 17.47 ± 0.61 + 21.56 ± 1.35 / 40.99 ± 1.01 + 4.44 ± 1.77 / 54.52 ± 1.51 + 65.65 ± 3.31 / 63.81 ± 2.72 + 24.30 ± 7.03 / 35.85 ± 7.35 + 0.61 ± 1.75 / 35.51 ± 2.23 + 45.01 ± 1.79 / 65.74 ± 1.54 + 59.45 ± 1.64 / 46.60 ± 2.02 + 53.39 ± 5.35 / 65.74 ± 5.74 + 23.87 ± 5.74 / 57.17 ± 6.01 + 27.14 ± 2.19 / 54.63 ± 2.74 + 64.01 ± 1.18 / 17.85 ± 1.99 + 36.56 ± 0.73 / 52.11 ± 0.56 + 30.05 ± 2.57 / 46.82 ± 2.11 + 64.79 ± 1.96 / 45.48 ± 2.24 + 11.95 ± 2.83 / 37.12 ± 2.19 + 32.97 ± 2.68 / 58.52 ± 2.92 + 63.89 ± 1.06 / 74.73 ± 1.02 + 66.29 ± 1.29 / 20.14 ± 1.64 + 38.44 ± 1.33 / 53.68 ± 1.01 + 30.88 ± 2.27 / 47.18 ± 1.81 + 69.86 ± 2.10 / 62.68 ± 2.21 + 66.76 ± 0.72 / 68.58 ± 0.72 + 30.96 ± 2.46 / 61.29 ± 3.61 + 71.39 ± 2.20 / 84.24 ± 1.55 + 67.93 ± 0.44 / 22.00 ± 0.51 + 52.47 ± 0.85 / 64.25 ± 0.66 + 43.95 ± 3.26 / 57.04 ± 2.74 + 14.0.3 + 14.1.2 + 14.1.2 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.0.4 + 14.1.2 + 14.1.2 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.0 + 14.1.2 + 14.1.2 + 14.0.0 + 14.0.0 + 14.0.0 + 14.0.0 + 14.0.4 + 14.1.2 + 14.1.2 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 12.11.0 + 12.11.0 + 12.11.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 12.11.0 + 12.11.0 + 12.11.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 meta-llama/Meta-Llama-3-8B (few-shot) @@ -3378,16 +3378,16 @@ title: Germanic NLG 🇪🇺 128 8192 True - 1,335 ± 338 / 260 ± 88 - 2.37 - 2.23 - 2.48 - 2.18 + 1,477 ± 376 / 285 ± 97 + 2.39 + 2.22 + 2.67 + 2.17 2.87 - 2.85 + 2.84 2.00 - 2.48 - 1.89 + 2.46 + 1.90 49.46 ± 1.88 / 32.11 ± 2.41 51.16 ± 2.15 / 67.00 ± 1.51 23.01 ± 3.93 / 49.99 ± 4.63 @@ -3396,15 +3396,15 @@ title: Germanic NLG 🇪🇺 51.64 ± 2.09 / 63.34 ± 1.71 59.28 ± 1.85 / 72.62 ± 1.29 24.30 ± 3.14 / 42.50 ± 2.67 - 61.48 ± 1.83 / 47.65 ± 2.94 - 61.58 ± 2.21 / 50.10 ± 2.68 - 32.94 ± 0.86 / 37.52 ± 0.43 - 63.38 ± 1.15 / 15.74 ± 1.68 - 21.20 ± 6.57 / 52.29 ± 7.43 - 19.65 ± 4.32 / 56.66 ± 4.40 - 53.35 ± 4.33 / 74.98 ± 3.70 - 33.02 ± 1.35 / 49.25 ± 1.04 - 24.93 ± 3.13 / 42.47 ± 2.74 + 62.89 ± 2.01 / 50.34 ± 1.92 + 56.18 ± 3.26 / 47.30 ± 2.62 + 33.07 ± 0.73 / 37.60 ± 0.49 + 61.95 ± 0.69 / 14.09 ± 0.61 + 30.73 ± 3.19 / 64.06 ± 2.41 + 20.57 ± 3.62 / 54.22 ± 5.15 + 30.77 ± 4.33 / 54.66 ± 4.41 + 30.95 ± 1.33 / 47.50 ± 0.99 + 21.98 ± 2.12 / 41.07 ± 1.52 59.92 ± 2.46 / 40.98 ± 4.90 80.91 ± 0.41 / 78.09 ± 1.22 26.39 ± 3.47 / 52.38 ± 4.49 @@ -3452,15 +3452,15 @@ title: Germanic NLG 🇪🇺 14.0.3 14.0.3 14.0.3 - 12.6.1 - 12.6.1 + 14.1.2 + 14.1.2 14.1.2 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 14.0.4 14.1.2 14.1.2 @@ -3510,12 +3510,12 @@ title: Germanic NLG 🇪🇺 4,136 ± 1,282 / 668 ± 326 2.40 2.27 - 2.10 - 2.14 + 2.09 + 2.13 3.33 - 3.08 - 2.18 - 2.46 + 3.07 + 2.19 + 2.45 1.63 51.94 ± 2.00 / 27.59 ± 1.79 51.97 ± 1.36 / 68.62 ± 0.93 @@ -3639,12 +3639,12 @@ title: Germanic NLG 🇪🇺 5,993 ± 1,028 / 1,742 ± 561 2.41 2.13 - 2.35 + 2.34 2.18 3.23 - 3.32 - 1.98 - 2.26 + 3.31 + 2.01 + 2.25 1.86 52.61 ± 1.77 / 33.64 ± 2.67 49.81 ± 1.43 / 66.32 ± 1.25 @@ -3768,12 +3768,12 @@ title: Germanic NLG 🇪🇺 1,483 ± 321 / 379 ± 158 2.41 1.98 - 2.43 - 2.09 + 2.42 + 2.08 3.44 - 2.89 - 2.03 - 2.36 + 2.88 + 2.04 + 2.35 2.09 34.00 ± 2.69 / 25.49 ± 2.06 53.97 ± 1.21 / 68.40 ± 1.28 @@ -3895,15 +3895,15 @@ title: Germanic NLG 🇪🇺 8192 False 1,919 ± 645 / 248 ± 83 - 2.44 + 2.43 2.23 2.66 - 2.30 + 2.29 3.17 - 2.65 + 2.64 2.14 - 2.50 - 1.85 + 2.48 + 1.86 53.44 ± 1.53 / 33.98 ± 1.94 49.17 ± 1.04 / 61.82 ± 1.64 20.55 ± 1.56 / 51.55 ± 2.37 @@ -4025,12 +4025,12 @@ title: Germanic NLG 🇪🇺 True 1,118 ± 302 / 184 ± 63 2.49 - 2.40 - 2.55 - 2.31 - 3.43 - 2.86 - 2.12 + 2.39 + 2.54 + 2.30 + 3.44 + 2.85 + 2.11 2.52 1.75 44.92 ± 3.05 / 32.65 ± 2.50 @@ -4154,14 +4154,14 @@ title: Germanic NLG 🇪🇺 True 1,446 ± 354 / 295 ± 100 2.49 - 2.36 - 2.63 + 2.35 + 2.62 2.24 3.27 - 2.79 - 2.09 - 2.68 - 1.89 + 2.78 + 2.10 + 2.67 + 1.90 45.42 ± 2.88 / 32.66 ± 2.49 43.16 ± 1.69 / 54.53 ± 2.83 8.79 ± 3.23 / 38.38 ± 4.22 @@ -4275,135 +4275,6 @@ title: Germanic NLG 🇪🇺 9.2.0 9.2.0 - - NorwAI/NorwAI-Mixtral-8x7B (few-shot) - 46998 - 68 - 32768 - True - 2,368 ± 793 / 317 ± 108 - 2.52 - 2.05 - 2.36 - 2.22 - 3.25 - 2.80 - 2.36 - 2.74 - 2.41 - 51.95 ± 1.51 / 31.96 ± 1.74 - 52.11 ± 1.49 / 67.70 ± 1.08 - 44.47 ± 2.01 / 69.57 ± 2.71 - 43.32 ± 8.90 / 50.67 ± 8.15 - 64.64 ± 0.79 / 21.13 ± 0.55 - 77.33 ± 0.74 / 82.90 ± 0.58 - 79.05 ± 1.54 / 86.04 ± 1.07 - 23.81 ± 4.55 / 40.25 ± 3.94 - 58.53 ± 2.06 / 39.86 ± 1.77 - 60.26 ± 1.71 / 41.24 ± 1.50 - 59.48 ± 2.27 / 73.61 ± 2.04 - 54.54 ± 7.15 / 13.03 ± 2.14 - 51.85 ± 2.78 / 73.40 ± 2.33 - 41.89 ± 3.17 / 69.00 ± 2.68 - 25.62 ± 3.95 / 43.70 ± 5.67 - 43.35 ± 0.76 / 56.97 ± 0.60 - 26.91 ± 5.35 / 42.50 ± 4.87 - 57.01 ± 1.32 / 33.14 ± 2.45 - 80.12 ± 0.88 / 81.23 ± 0.76 - 43.04 ± 1.96 / 67.69 ± 2.83 - 30.44 ± 7.83 / 39.20 ± 7.83 - 63.55 ± 0.52 / 17.61 ± 0.71 - 41.92 ± 0.69 / 56.24 ± 0.52 - 19.13 ± 2.25 / 37.62 ± 1.27 - 36.73 ± 4.50 / 27.29 ± 1.83 - 34.34 ± 3.99 / 53.29 ± 3.80 - 6.57 ± 2.18 / 46.35 ± 4.24 - 22.59 ± 4.98 / 48.68 ± 4.19 - 58.69 ± 2.45 / 14.60 ± 1.15 - 12.07 ± 1.85 / 33.86 ± 1.36 - 2.87 ± 1.72 / 52.79 ± 1.13 - 62.60 ± 3.09 / 54.91 ± 2.40 - 31.36 ± 5.40 / 50.13 ± 5.67 - 5.21 ± 3.15 / 46.32 ± 4.42 - 34.26 ± 2.20 / 51.27 ± 2.30 - 57.26 ± 1.41 / 42.54 ± 2.77 - 54.57 ± 4.98 / 68.40 ± 4.08 - 26.52 ± 4.63 / 59.83 ± 5.12 - 19.96 ± 1.50 / 43.10 ± 2.18 - 61.10 ± 0.91 / 14.12 ± 1.59 - 43.30 ± 1.06 / 57.22 ± 0.87 - 23.82 ± 3.21 / 41.16 ± 2.65 - 62.76 ± 3.54 / 40.29 ± 1.82 - 13.83 ± 1.32 / 37.70 ± 1.94 - 24.44 ± 2.86 / 58.02 ± 2.28 - 26.17 ± 2.88 / 37.61 ± 2.20 - 63.34 ± 0.59 / 18.36 ± 0.69 - 38.50 ± 0.76 / 52.93 ± 0.49 - 20.26 ± 3.39 / 38.63 ± 3.03 - 63.12 ± 2.34 / 56.38 ± 1.23 - 66.47 ± 1.61 / 69.35 ± 1.31 - 38.82 ± 3.65 / 67.96 ± 2.36 - 29.16 ± 3.45 / 46.82 ± 4.35 - 57.65 ± 5.62 / 18.79 ± 1.79 - 54.57 ± 1.21 / 65.11 ± 0.83 - 30.13 ± 5.52 / 42.55 ± 4.99 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - mistralai/Mistral-7B-v0.3 (few-shot) 7248 @@ -4413,12 +4284,12 @@ title: Germanic NLG 🇪🇺 1,364 ± 343 / 266 ± 90 2.52 2.27 - 2.68 + 2.67 2.25 3.33 - 2.82 + 2.81 2.13 - 2.79 + 2.78 1.93 43.60 ± 2.94 / 32.17 ± 2.26 45.92 ± 1.50 / 61.91 ± 1.50 @@ -4533,6 +4404,135 @@ title: Germanic NLG 🇪🇺 12.10.4 12.10.4 + + NorwAI/NorwAI-Mixtral-8x7B (few-shot) + 46998 + 68 + 32768 + True + 2,368 ± 793 / 317 ± 108 + 2.53 + 2.05 + 2.39 + 2.22 + 3.29 + 2.79 + 2.36 + 2.73 + 2.41 + 51.95 ± 1.51 / 31.96 ± 1.74 + 52.11 ± 1.49 / 67.70 ± 1.08 + 44.47 ± 2.01 / 69.57 ± 2.71 + 43.32 ± 8.90 / 50.67 ± 8.15 + 64.64 ± 0.79 / 21.13 ± 0.55 + 77.33 ± 0.74 / 82.90 ± 0.58 + 79.05 ± 1.54 / 86.04 ± 1.07 + 23.81 ± 4.55 / 40.25 ± 3.94 + 58.53 ± 2.06 / 39.86 ± 1.77 + 60.26 ± 1.71 / 41.24 ± 1.50 + 59.48 ± 2.27 / 73.61 ± 2.04 + 54.54 ± 7.15 / 13.03 ± 2.14 + 51.85 ± 2.78 / 73.40 ± 2.33 + 41.89 ± 3.17 / 69.00 ± 2.68 + 25.62 ± 3.95 / 43.70 ± 5.67 + 43.35 ± 0.76 / 56.97 ± 0.60 + 26.91 ± 5.35 / 42.50 ± 4.87 + 57.01 ± 1.32 / 33.14 ± 2.45 + 80.12 ± 0.88 / 81.23 ± 0.76 + 43.04 ± 1.96 / 67.69 ± 2.83 + 30.44 ± 7.83 / 39.20 ± 7.83 + 63.55 ± 0.52 / 17.61 ± 0.71 + 41.92 ± 0.69 / 56.24 ± 0.52 + 19.13 ± 2.25 / 37.62 ± 1.27 + 36.73 ± 4.50 / 27.29 ± 1.83 + 34.34 ± 3.99 / 53.29 ± 3.80 + 6.57 ± 2.18 / 46.35 ± 4.24 + 22.59 ± 4.98 / 48.68 ± 4.19 + 58.69 ± 2.45 / 14.60 ± 1.15 + 12.07 ± 1.85 / 33.86 ± 1.36 + 2.87 ± 1.72 / 52.79 ± 1.13 + 62.60 ± 3.09 / 54.91 ± 2.40 + 31.36 ± 5.40 / 50.13 ± 5.67 + 5.21 ± 3.15 / 46.32 ± 4.42 + 34.26 ± 2.20 / 51.27 ± 2.30 + 57.26 ± 1.41 / 42.54 ± 2.77 + 54.57 ± 4.98 / 68.40 ± 4.08 + 26.52 ± 4.63 / 59.83 ± 5.12 + 19.96 ± 1.50 / 43.10 ± 2.18 + 61.10 ± 0.91 / 14.12 ± 1.59 + 43.30 ± 1.06 / 57.22 ± 0.87 + 23.82 ± 3.21 / 41.16 ± 2.65 + 62.76 ± 3.54 / 40.29 ± 1.82 + 13.83 ± 1.32 / 37.70 ± 1.94 + 24.44 ± 2.86 / 58.02 ± 2.28 + 26.17 ± 2.88 / 37.61 ± 2.20 + 63.34 ± 0.59 / 18.36 ± 0.69 + 38.50 ± 0.76 / 52.93 ± 0.49 + 20.26 ± 3.39 / 38.63 ± 3.03 + 63.12 ± 2.34 / 56.38 ± 1.23 + 66.47 ± 1.61 / 69.35 ± 1.31 + 38.82 ± 3.65 / 67.96 ± 2.36 + 29.16 ± 3.45 / 46.82 ± 4.35 + 57.65 ± 5.62 / 18.79 ± 1.79 + 54.57 ± 1.21 / 65.11 ± 0.83 + 30.13 ± 5.52 / 42.55 ± 4.99 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + ibm-granite/granite-3.0-8b-base (few-shot) 8171 @@ -4542,12 +4542,12 @@ title: Germanic NLG 🇪🇺 2,515 ± 625 / 476 ± 159 2.54 2.29 - 2.53 - 2.27 + 2.52 + 2.26 3.55 - 2.86 + 2.85 2.24 - 2.62 + 2.64 1.97 44.58 ± 2.62 / 33.50 ± 2.75 47.16 ± 1.36 / 64.63 ± 1.18 @@ -4591,13 +4591,13 @@ title: Germanic NLG 🇪🇺 65.08 ± 1.62 / 20.35 ± 3.02 35.21 ± 1.13 / 51.21 ± 0.96 26.48 ± 1.04 / 43.71 ± 0.74 - 52.26 ± 1.87 / 42.18 ± 1.90 + 52.32 ± 1.98 / 41.98 ± 1.88 8.46 ± 1.09 / 21.30 ± 0.67 42.42 ± 3.42 / 68.81 ± 2.66 - 53.11 ± 1.79 / 63.80 ± 1.61 - 66.01 ± 0.59 / 18.49 ± 0.79 - 34.35 ± 0.90 / 50.52 ± 0.73 - 31.89 ± 1.58 / 48.04 ± 1.12 + 53.12 ± 1.81 / 63.79 ± 1.63 + 66.00 ± 0.61 / 18.47 ± 0.79 + 34.34 ± 0.87 / 50.51 ± 0.72 + 26.46 ± 3.88 / 43.33 ± 3.00 55.76 ± 2.15 / 52.69 ± 1.24 66.89 ± 1.11 / 69.52 ± 0.94 36.60 ± 2.37 / 67.85 ± 1.19 @@ -4647,13 +4647,13 @@ title: Germanic NLG 🇪🇺 14.1.2 14.1.2 14.1.2 - 14.0.4 + 14.1.2 14.1.2 14.1.2 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 14.0.4 14.1.2 14.1.2 @@ -4670,14 +4670,14 @@ title: Germanic NLG 🇪🇺 False 2,707 ± 688 / 497 ± 166 2.58 - 2.35 - 2.71 + 2.34 + 2.70 2.28 - 3.56 - 3.06 + 3.60 + 3.05 2.47 - 2.29 - 1.93 + 2.27 + 1.94 47.08 ± 3.39 / 32.34 ± 2.97 47.16 ± 1.21 / 63.47 ± 1.57 8.41 ± 2.40 / 37.31 ± 1.66 @@ -4798,14 +4798,14 @@ title: Germanic NLG 🇪🇺 32768 False 2,370 ± 416 / 711 ± 242 - 2.64 - 2.34 - 2.76 - 2.40 + 2.63 + 2.33 + 2.74 + 2.39 3.60 - 2.85 - 2.36 - 2.68 + 2.84 + 2.35 + 2.67 2.13 44.89 ± 2.46 / 29.13 ± 1.92 48.09 ± 1.00 / 65.40 ± 0.75 @@ -4928,13 +4928,13 @@ title: Germanic NLG 🇪🇺 False 2,088 ± 352 / 706 ± 214 2.67 - 2.51 + 2.50 2.77 2.40 3.38 - 3.12 + 3.11 2.21 - 2.81 + 2.80 2.18 40.19 ± 2.55 / 29.73 ± 1.44 42.31 ± 1.55 / 59.29 ± 2.00 @@ -5058,10 +5058,10 @@ title: Germanic NLG 🇪🇺 5,054 ± 1,200 / 1,056 ± 339 2.67 2.33 - 2.67 + 2.66 2.15 3.47 - 3.23 + 3.22 2.37 3.11 2.06 @@ -5185,15 +5185,15 @@ title: Germanic NLG 🇪🇺 131073 False 10,424 ± 2,641 / 2,081 ± 666 - 2.69 - 2.47 - 2.96 + 2.68 + 2.46 + 2.94 2.41 3.53 - 2.81 - 2.50 - 2.81 - 2.02 + 2.80 + 2.49 + 2.80 + 2.03 41.12 ± 3.39 / 32.50 ± 2.74 42.77 ± 2.76 / 54.70 ± 4.19 11.52 ± 3.01 / 49.37 ± 4.12 @@ -5269,172 +5269,43 @@ title: Germanic NLG 🇪🇺 13.0.0 13.0.0 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.3.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 14.1.2 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - - - mistralai/Mistral-7B-Instruct-v0.1 (few-shot) - 7242 - 32 - 32768 - False - 634 ± 179 / 110 ± 35 - 2.75 - 2.51 - 2.82 - 2.52 - 3.50 - 3.15 - 2.49 - 2.95 - 2.07 - 37.93 ± 2.71 / 23.54 ± 1.99 - 44.49 ± 2.56 / 60.64 ± 3.00 - 14.09 ± 2.94 / 42.43 ± 3.30 - 51.38 ± 2.31 / 58.78 ± 1.27 - 65.80 ± 0.93 / 19.91 ± 1.41 - 45.07 ± 1.34 / 58.18 ± 1.03 - 35.36 ± 2.19 / 55.82 ± 1.48 - 14.85 ± 1.19 / 35.26 ± 1.24 - 50.08 ± 1.54 / 34.52 ± 1.17 - 51.27 ± 1.52 / 33.37 ± 2.37 - 43.65 ± 1.98 / 60.88 ± 1.36 - 62.39 ± 0.76 / 14.24 ± 0.81 - 14.09 ± 2.85 / 44.91 ± 3.95 - 8.28 ± 1.82 / 47.22 ± 3.72 - 37.23 ± 3.15 / 63.67 ± 2.98 - 20.44 ± 1.03 / 39.51 ± 0.72 - 15.87 ± 1.29 / 35.89 ± 1.06 - 45.01 ± 2.11 / 27.59 ± 3.35 - 73.33 ± 1.98 / 76.19 ± 1.59 - 11.59 ± 3.45 / 40.89 ± 4.15 - 52.12 ± 1.42 / 59.29 ± 1.17 - 63.10 ± 0.60 / 18.05 ± 0.36 - 24.03 ± 1.09 / 42.32 ± 0.70 - 15.37 ± 0.71 / 35.78 ± 0.69 - 36.04 ± 2.59 / 24.74 ± 2.79 - 12.93 ± 5.42 / 30.40 ± 3.15 - -0.36 ± 1.36 / 33.94 ± 0.32 - 18.06 ± 3.16 / 42.57 ± 2.89 - 62.80 ± 1.69 / 15.23 ± 1.01 - 5.44 ± 1.14 / 28.13 ± 1.06 - 6.35 ± 2.71 / 50.49 ± 1.57 - 55.42 ± 2.12 / 46.41 ± 2.50 - 15.85 ± 6.84 / 36.28 ± 7.13 - 1.11 ± 2.41 / 36.79 ± 4.00 - 33.54 ± 1.29 / 50.80 ± 1.31 - 47.19 ± 3.74 / 33.02 ± 2.09 - 47.26 ± 3.14 / 63.48 ± 2.94 - 22.32 ± 1.78 / 56.73 ± 4.00 - 24.36 ± 3.78 / 54.61 ± 4.44 - 67.75 ± 1.10 / 25.91 ± 2.95 - 26.79 ± 1.01 / 44.58 ± 0.85 - 20.33 ± 1.63 / 39.63 ± 1.09 - 52.72 ± 2.58 / 33.51 ± 1.22 - 7.91 ± 2.16 / 27.82 ± 1.97 - 18.14 ± 2.10 / 55.42 ± 3.05 - 52.75 ± 0.88 / 67.15 ± 1.08 - 64.77 ± 0.97 / 16.55 ± 0.81 - 26.06 ± 0.77 / 44.08 ± 0.51 - 14.26 ± 1.48 / 35.14 ± 1.18 - 57.58 ± 2.30 / 47.94 ± 2.89 - 61.44 ± 2.02 / 69.47 ± 0.98 - 34.92 ± 2.40 / 66.67 ± 1.41 - 65.38 ± 1.76 / 81.90 ± 0.57 - 69.62 ± 0.31 / 24.65 ± 0.44 - 38.40 ± 0.98 / 53.43 ± 0.76 - 35.72 ± 1.56 / 49.69 ± 1.42 - 9.3.1 - 9.3.1 - 9.3.1 - 12.4.0 - 12.6.1 - 10.0.1 - 10.0.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 12.4.0 - 9.3.1 - 9.3.1 - 12.4.0 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 12.4.0 - 12.4.0 - 9.3.1 - 9.3.1 - 9.3.1 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 13.3.0 - 9.3.1 - 12.4.0 - 12.4.0 - 12.10.8 - 12.1.0 - 9.3.1 - 13.3.0 - 9.3.1 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 14.1.2 + 13.0.0 13.0.0 - 12.9.0 - 12.9.0 - 12.9.0 - 12.9.0 - 12.9.0 - 12.9.0 - 12.9.0 - 9.3.1 - 9.3.1 - 9.3.1 - 12.4.0 - 12.4.0 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 12.4.0 - 12.4.0 - 9.3.1 - 9.3.1 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 nvidia/mistral-nemo-minitron-8b-instruct (few-shot) @@ -5443,13 +5314,13 @@ title: Germanic NLG 🇪🇺 8192 True 3,161 ± 676 / 1,247 ± 481 - 2.75 + 2.74 2.67 - 2.86 + 2.84 2.56 3.45 - 2.99 - 2.37 + 2.98 + 2.38 2.95 2.12 49.01 ± 1.94 / 29.13 ± 2.09 @@ -5572,15 +5443,15 @@ title: Germanic NLG 🇪🇺 32768 True 2,219 ± 427 / 717 ± 224 - 2.75 + 2.74 2.54 - 2.86 + 2.85 2.42 3.45 - 3.20 + 3.19 2.26 - 2.98 - 2.25 + 2.97 + 2.26 37.93 ± 3.09 / 29.50 ± 2.18 44.62 ± 1.98 / 62.62 ± 1.54 0.28 ± 0.54 / 33.48 ± 0.24 @@ -5694,6 +5565,135 @@ title: Germanic NLG 🇪🇺 12.2.0 12.2.0 + + mistralai/Mistral-7B-Instruct-v0.1 (few-shot) + 7242 + 32 + 32768 + False + 634 ± 179 / 110 ± 35 + 2.75 + 2.51 + 2.81 + 2.52 + 3.50 + 3.14 + 2.48 + 2.94 + 2.07 + 37.93 ± 2.71 / 23.54 ± 1.99 + 44.49 ± 2.56 / 60.64 ± 3.00 + 14.09 ± 2.94 / 42.43 ± 3.30 + 51.38 ± 2.31 / 58.78 ± 1.27 + 65.80 ± 0.93 / 19.91 ± 1.41 + 45.07 ± 1.34 / 58.18 ± 1.03 + 35.36 ± 2.19 / 55.82 ± 1.48 + 14.85 ± 1.19 / 35.26 ± 1.24 + 50.08 ± 1.54 / 34.52 ± 1.17 + 51.27 ± 1.52 / 33.37 ± 2.37 + 43.65 ± 1.98 / 60.88 ± 1.36 + 62.39 ± 0.76 / 14.24 ± 0.81 + 14.09 ± 2.85 / 44.91 ± 3.95 + 8.28 ± 1.82 / 47.22 ± 3.72 + 37.23 ± 3.15 / 63.67 ± 2.98 + 20.44 ± 1.03 / 39.51 ± 0.72 + 15.87 ± 1.29 / 35.89 ± 1.06 + 45.01 ± 2.11 / 27.59 ± 3.35 + 73.33 ± 1.98 / 76.19 ± 1.59 + 11.59 ± 3.45 / 40.89 ± 4.15 + 52.12 ± 1.42 / 59.29 ± 1.17 + 63.10 ± 0.60 / 18.05 ± 0.36 + 24.03 ± 1.09 / 42.32 ± 0.70 + 15.37 ± 0.71 / 35.78 ± 0.69 + 36.04 ± 2.59 / 24.74 ± 2.79 + 12.93 ± 5.42 / 30.40 ± 3.15 + -0.36 ± 1.36 / 33.94 ± 0.32 + 18.06 ± 3.16 / 42.57 ± 2.89 + 62.80 ± 1.69 / 15.23 ± 1.01 + 5.44 ± 1.14 / 28.13 ± 1.06 + 6.35 ± 2.71 / 50.49 ± 1.57 + 55.42 ± 2.12 / 46.41 ± 2.50 + 15.85 ± 6.84 / 36.28 ± 7.13 + 1.11 ± 2.41 / 36.79 ± 4.00 + 33.54 ± 1.29 / 50.80 ± 1.31 + 47.19 ± 3.74 / 33.02 ± 2.09 + 47.26 ± 3.14 / 63.48 ± 2.94 + 22.32 ± 1.78 / 56.73 ± 4.00 + 24.36 ± 3.78 / 54.61 ± 4.44 + 67.75 ± 1.10 / 25.91 ± 2.95 + 26.79 ± 1.01 / 44.58 ± 0.85 + 20.33 ± 1.63 / 39.63 ± 1.09 + 52.72 ± 2.58 / 33.51 ± 1.22 + 7.91 ± 2.16 / 27.82 ± 1.97 + 18.14 ± 2.10 / 55.42 ± 3.05 + 52.75 ± 0.88 / 67.15 ± 1.08 + 64.77 ± 0.97 / 16.55 ± 0.81 + 26.06 ± 0.77 / 44.08 ± 0.51 + 14.26 ± 1.48 / 35.14 ± 1.18 + 57.58 ± 2.30 / 47.94 ± 2.89 + 61.44 ± 2.02 / 69.47 ± 0.98 + 34.92 ± 2.40 / 66.67 ± 1.41 + 65.38 ± 1.76 / 81.90 ± 0.57 + 69.62 ± 0.31 / 24.65 ± 0.44 + 38.40 ± 0.98 / 53.43 ± 0.76 + 35.72 ± 1.56 / 49.69 ± 1.42 + 9.3.1 + 9.3.1 + 9.3.1 + 12.4.0 + 12.6.1 + 10.0.1 + 10.0.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 12.4.0 + 9.3.1 + 9.3.1 + 12.4.0 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 12.4.0 + 12.4.0 + 9.3.1 + 9.3.1 + 9.3.1 + 13.3.0 + 9.3.1 + 12.4.0 + 12.4.0 + 12.10.8 + 12.1.0 + 9.3.1 + 13.3.0 + 9.3.1 + 13.0.0 + 12.9.0 + 12.9.0 + 12.9.0 + 12.9.0 + 12.9.0 + 12.9.0 + 12.9.0 + 9.3.1 + 9.3.1 + 9.3.1 + 12.4.0 + 12.4.0 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 12.4.0 + 12.4.0 + 9.3.1 + 9.3.1 + ibm-granite/granite-8b-code-base-4k (few-shot) 8055 @@ -5702,13 +5702,13 @@ title: Germanic NLG 🇪🇺 True 2,313 ± 423 / 682 ± 210 2.76 - 2.68 - 2.86 + 2.67 + 2.85 2.55 3.51 - 3.03 - 2.43 - 2.82 + 3.02 + 2.45 + 2.81 2.21 48.44 ± 1.69 / 36.99 ± 1.77 39.07 ± 1.03 / 56.85 ± 1.91 @@ -5834,11 +5834,11 @@ title: Germanic NLG 🇪🇺 2.68 2.73 2.53 - 3.21 + 3.20 3.07 - 2.60 - 2.90 - 2.50 + 2.63 + 2.87 + 2.51 50.83 ± 1.31 / 41.56 ± 1.86 53.23 ± 2.63 / 64.70 ± 3.82 23.02 ± 6.31 / 53.19 ± 7.11 @@ -5959,14 +5959,14 @@ title: Germanic NLG 🇪🇺 4096 False 2,643 ± 455 / 800 ± 247 - 2.83 - 2.59 - 2.95 + 2.82 + 2.58 + 2.94 2.61 3.52 - 3.26 - 2.58 - 2.91 + 3.25 + 2.57 + 2.90 2.22 35.44 ± 3.00 / 24.63 ± 1.65 44.88 ± 1.45 / 62.35 ± 1.33 @@ -6090,13 +6090,13 @@ title: Germanic NLG 🇪🇺 277 ± 77 / 70 ± 25 2.85 2.28 - 3.14 + 3.12 2.71 3.36 - 3.27 + 3.26 2.73 - 2.84 - 2.47 + 2.83 + 2.48 59.48 ± 1.97 / 42.21 ± 2.53 56.46 ± 2.39 / 71.07 ± 1.48 20.57 ± 3.78 / 49.85 ± 2.21 @@ -6218,14 +6218,14 @@ title: Germanic NLG 🇪🇺 True 5,617 ± 995 / 1,623 ± 540 2.89 - 2.76 - 2.95 + 2.75 + 2.94 2.62 - 3.81 - 3.15 - 2.61 - 2.96 - 2.28 + 3.82 + 3.14 + 2.63 + 2.95 + 2.29 45.90 ± 2.53 / 33.00 ± 1.93 37.11 ± 1.88 / 56.47 ± 1.59 11.70 ± 2.16 / 50.31 ± 3.91 @@ -6346,14 +6346,14 @@ title: Germanic NLG 🇪🇺 4096 True 10,246 ± 3,021 / 1,629 ± 550 - 3.02 + 3.01 2.96 - 3.23 - 2.77 - 3.86 - 3.56 + 3.22 + 2.76 + 3.84 + 3.54 2.60 - 2.92 + 2.91 2.23 37.37 ± 2.46 / 26.81 ± 2.24 31.44 ± 1.82 / 48.96 ± 2.35 @@ -6475,15 +6475,15 @@ title: Germanic NLG 🇪🇺 4096 True 6,275 ± 1,193 / 1,755 ± 578 - 3.18 - 3.20 - 3.41 + 3.17 + 3.19 + 3.39 2.76 - 3.52 - 3.33 + 3.55 + 3.32 3.19 - 3.44 - 2.56 + 3.43 + 2.57 28.18 ± 3.39 / 24.25 ± 3.30 29.32 ± 7.19 / 41.08 ± 8.29 2.90 ± 2.18 / 37.93 ± 4.98 @@ -6604,14 +6604,14 @@ title: Germanic NLG 🇪🇺 4096 True 10,504 ± 3,028 / 1,678 ± 559 - 3.18 - 3.26 - 3.33 - 2.92 + 3.17 + 3.22 + 3.32 + 2.91 4.01 - 3.83 - 2.79 - 3.02 + 3.81 + 2.80 + 3.01 2.30 31.80 ± 2.87 / 23.06 ± 2.09 6.85 ± 2.25 / 19.42 ± 0.91 @@ -6733,15 +6733,15 @@ title: Germanic NLG 🇪🇺 4096 False 1,254 ± 328 / 243 ± 83 - 3.23 + 3.22 2.92 - 3.37 - 2.85 - 3.78 - 3.53 + 3.35 + 2.84 + 3.79 + 3.51 3.01 - 3.37 - 2.99 + 3.36 + 3.00 34.66 ± 1.19 / 21.37 ± 1.52 21.93 ± 3.72 / 31.67 ± 4.68 1.50 ± 1.04 / 33.84 ± 0.24 @@ -6862,14 +6862,14 @@ title: Germanic NLG 🇪🇺 2048 True 9,059 ± 1,947 / 2,201 ± 728 - 3.27 - 3.10 - 3.33 + 3.26 + 3.09 + 3.32 3.05 4.04 - 3.64 - 3.07 - 3.34 + 3.63 + 3.06 + 3.33 2.59 37.21 ± 2.75 / 27.74 ± 2.59 31.54 ± 2.39 / 50.61 ± 2.88 @@ -6992,14 +6992,14 @@ title: Germanic NLG 🇪🇺 True 2,732 ± 868 / 662 ± 238 3.31 - 2.94 - 3.71 - 3.01 - 3.79 - 3.63 + 2.93 + 3.70 + 3.00 + 3.83 + 3.62 3.38 - 3.33 - 2.71 + 3.32 + 2.72 38.62 ± 3.40 / 27.71 ± 3.01 35.47 ± 1.35 / 52.70 ± 2.05 5.07 ± 1.76 / 43.91 ± 4.86 @@ -7121,13 +7121,13 @@ title: Germanic NLG 🇪🇺 True 1,438 ± 410 / 233 ± 79 3.33 - 2.98 - 3.40 - 3.07 + 2.95 + 3.38 + 3.08 3.79 - 3.60 + 3.58 3.24 - 3.52 + 3.51 3.07 29.49 ± 2.73 / 21.57 ± 2.07 13.77 ± 3.72 / 23.78 ± 3.10 @@ -7249,15 +7249,15 @@ title: Germanic NLG 🇪🇺 8192 True 15,971 ± 3,654 / 3,609 ± 1,197 - 3.41 - 3.22 - 3.79 - 3.02 + 3.40 + 3.21 + 3.77 + 3.01 3.76 - 3.95 + 3.94 3.31 - 3.80 - 2.41 + 3.79 + 2.42 29.44 ± 1.81 / 20.31 ± 1.68 18.49 ± 2.47 / 35.29 ± 2.83 1.73 ± 1.63 / 38.18 ± 4.15 @@ -7378,14 +7378,14 @@ title: Germanic NLG 🇪🇺 4096 True 15,009 ± 4,072 / 2,702 ± 878 - 3.41 - 3.10 - 3.42 - 2.98 + 3.40 + 3.09 + 3.40 + 2.97 4.02 - 4.09 - 3.14 - 3.58 + 4.07 + 3.15 + 3.57 2.93 19.61 ± 2.68 / 17.44 ± 2.64 37.92 ± 1.74 / 46.23 ± 1.91 @@ -7507,15 +7507,15 @@ title: Germanic NLG 🇪🇺 8192 True 16,249 ± 3,690 / 3,689 ± 1,226 - 3.49 - 3.38 - 3.71 - 2.98 - 3.92 - 4.10 + 3.48 + 3.40 + 3.70 + 2.97 + 3.91 + 4.08 3.31 - 3.83 - 2.65 + 3.82 + 2.66 24.47 ± 3.42 / 18.70 ± 2.18 9.93 ± 2.70 / 23.57 ± 3.00 1.22 ± 1.81 / 35.31 ± 1.93 @@ -7629,6 +7629,135 @@ title: Germanic NLG 🇪🇺 13.1.0 13.1.0 + + tiiuae/Falcon3-1B-Instruct (few-shot) + 1669 + 131 + 8192 + True + 9,270 ± 2,690 / 1,434 ± 437 + 3.73 + 3.71 + 3.97 + 3.58 + 4.14 + 4.26 + 3.40 + 4.04 + 2.73 + 20.03 ± 1.60 / 17.52 ± 1.50 + 15.96 ± 2.77 / 38.74 ± 2.88 + 0.86 ± 1.76 / 42.48 ± 3.82 + 28.98 ± 1.93 / 35.38 ± 2.10 + 56.75 ± 0.67 / 10.37 ± 0.34 + 2.15 ± 1.69 / 25.19 ± 0.96 + 2.51 ± 2.17 / 35.61 ± 1.10 + 0.88 ± 0.95 / 25.68 ± 0.52 + 29.25 ± 2.15 / 28.34 ± 2.25 + 25.45 ± 1.96 / 24.62 ± 1.86 + 11.28 ± 0.71 / 22.46 ± 2.54 + 55.58 ± 1.17 / 8.04 ± 0.65 + 1.52 ± 1.26 / 40.82 ± 4.08 + 0.52 ± 1.60 / 41.23 ± 4.21 + 8.47 ± 0.94 / 20.43 ± 2.10 + 2.62 ± 1.32 / 26.23 ± 0.92 + -0.06 ± 0.97 / 24.96 ± 0.73 + 26.41 ± 2.74 / 25.18 ± 2.46 + 25.99 ± 3.51 / 35.36 ± 4.53 + 1.64 ± 1.90 / 37.52 ± 2.69 + 21.39 ± 1.66 / 26.70 ± 1.83 + 58.09 ± 0.53 / 13.29 ± 0.62 + 3.51 ± 1.35 / 27.14 ± 1.02 + 1.05 ± 1.05 / 25.62 ± 0.84 + 9.39 ± 3.31 / 9.28 ± 3.38 + 6.44 ± 2.00 / 22.24 ± 3.62 + -0.72 ± 1.67 / 43.21 ± 3.74 + 3.34 ± 0.98 / 21.41 ± 2.93 + 59.37 ± 0.99 / 11.96 ± 0.51 + -1.22 ± 0.96 / 23.83 ± 1.00 + 4.20 ± 2.70 / 53.16 ± 2.28 + 14.18 ± 3.48 / 13.89 ± 3.46 + 2.36 ± 2.76 / 17.70 ± 4.11 + 2.29 ± 2.66 / 48.34 ± 3.37 + 6.35 ± 1.25 / 12.91 ± 1.66 + 32.54 ± 1.23 / 30.63 ± 1.12 + 22.27 ± 4.97 / 36.09 ± 3.66 + 7.18 ± 1.13 / 51.77 ± 1.56 + 16.72 ± 0.88 / 38.07 ± 0.76 + 58.08 ± 0.37 / 10.60 ± 0.37 + 6.57 ± 1.03 / 29.10 ± 0.93 + 5.16 ± 1.64 / 28.12 ± 1.34 + 28.25 ± 3.03 / 25.24 ± 2.38 + 3.73 ± 1.83 / 15.20 ± 2.26 + 0.76 ± 1.10 / 33.57 ± 0.34 + 19.08 ± 2.27 / 28.16 ± 2.64 + 57.20 ± 1.38 / 10.23 ± 0.61 + 5.45 ± 0.96 / 28.87 ± 0.86 + 2.34 ± 1.16 / 26.19 ± 0.80 + 43.00 ± 1.94 / 39.96 ± 1.87 + 54.47 ± 1.37 / 64.39 ± 0.56 + 17.44 ± 1.67 / 50.79 ± 2.44 + 53.15 ± 1.38 / 70.06 ± 0.79 + 67.33 ± 0.15 / 22.37 ± 0.27 + 20.49 ± 1.00 / 38.88 ± 0.87 + 17.50 ± 0.84 / 36.85 ± 0.65 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + PleIAs/Pleias-1.2b-Preview (few-shot) 1195 @@ -7636,15 +7765,15 @@ title: Germanic NLG 🇪🇺 2048 True 10,756 ± 3,589 / 1,157 ± 670 - 3.83 - 3.44 - 3.95 + 3.82 + 3.45 + 3.93 3.60 4.03 - 4.06 + 4.04 3.84 - 4.01 - 3.68 + 4.00 + 3.69 28.30 ± 2.45 / 22.93 ± 1.82 28.95 ± 4.05 / 48.32 ± 5.01 0.20 ± 0.52 / 34.12 ± 1.62 @@ -7765,15 +7894,15 @@ title: Germanic NLG 🇪🇺 4096 True 6,513 ± 1,241 / 1,282 ± 644 - 3.85 + 3.84 3.63 - 3.92 - 3.40 - 4.07 - 4.31 - 3.78 - 3.85 - 3.83 + 3.91 + 3.39 + 4.06 + 4.28 + 3.75 + 3.84 + 3.84 15.93 ± 3.91 / 14.68 ± 2.81 13.01 ± 2.33 / 28.28 ± 4.63 0.05 ± 1.37 / 40.73 ± 3.78 @@ -7895,13 +8024,13 @@ title: Germanic NLG 🇪🇺 True 22,023 ± 6,203 / 3,675 ± 1,231 3.97 - 3.87 - 4.24 + 3.86 + 4.25 3.88 4.19 - 4.29 - 3.81 - 4.13 + 4.27 + 3.84 + 4.12 3.34 12.68 ± 1.39 / 12.32 ± 1.19 3.61 ± 2.69 / 19.01 ± 3.95 @@ -8024,14 +8153,14 @@ title: Germanic NLG 🇪🇺 True 2,519 ± 841 / 323 ± 104 3.97 - 3.51 + 3.52 4.06 - 3.86 - 4.11 - 4.36 - 4.09 - 3.97 - 3.79 + 3.85 + 4.12 + 4.34 + 4.10 + 3.96 + 3.80 16.17 ± 3.44 / 14.33 ± 1.92 29.12 ± 4.09 / 49.93 ± 4.45 -0.47 ± 0.62 / 33.18 ± 0.28 @@ -8153,14 +8282,14 @@ title: Germanic NLG 🇪🇺 True 21,777 ± 6,115 / 3,617 ± 1,211 4.05 - 3.96 + 3.99 4.26 3.97 4.29 - 4.23 + 4.20 4.07 - 4.26 - 3.33 + 4.25 + 3.34 8.97 ± 3.18 / 8.62 ± 2.72 2.66 ± 2.70 / 16.29 ± 2.34 1.65 ± 1.38 / 44.50 ± 3.21 @@ -8281,15 +8410,15 @@ title: Germanic NLG 🇪🇺 8192 True 26,346 ± 7,812 / 4,082 ± 1,372 - 4.20 - 4.21 - 4.15 - 3.94 + 4.19 + 4.20 + 4.09 + 3.93 4.23 - 4.34 - 4.20 - 4.52 - 4.01 + 4.31 + 4.22 + 4.51 + 4.03 13.72 ± 1.83 / 13.41 ± 1.52 3.79 ± 3.11 / 21.06 ± 4.74 -0.45 ± 0.70 / 39.69 ± 4.95 @@ -8403,135 +8532,6 @@ title: Germanic NLG 🇪🇺 13.1.0 13.1.0 - - PleIAs/Pleias-350m-Preview (few-shot) - 353 - 66 - 2048 - True - 10,242 ± 3,432 / 1,335 ± 484 - 4.23 - 4.13 - 4.28 - 4.16 - 4.23 - 4.26 - 4.15 - 4.53 - 4.07 - 13.84 ± 1.95 / 13.12 ± 1.60 - 9.47 ± 3.30 / 25.66 ± 3.36 - -0.36 ± 1.60 / 39.52 ± 3.19 - 22.10 ± 2.03 / 26.85 ± 2.16 - 45.28 ± 4.20 / 6.64 ± 0.74 - -0.27 ± 1.66 / 25.68 ± 1.51 - 2.54 ± 3.58 / 36.82 ± 2.34 - -0.66 ± 0.98 / 24.57 ± 0.57 - 26.59 ± 2.14 / 26.61 ± 2.49 - 26.78 ± 1.46 / 26.94 ± 1.92 - 7.91 ± 2.20 / 17.44 ± 3.42 - 45.83 ± 1.92 / 5.61 ± 0.51 - 0.28 ± 1.08 / 36.00 ± 3.05 - 0.04 ± 1.65 / 41.17 ± 5.30 - 0.65 ± 0.40 / 2.88 ± 1.01 - 0.57 ± 1.71 / 24.14 ± 1.57 - -0.66 ± 0.78 / 24.58 ± 0.58 - 22.09 ± 2.91 / 21.86 ± 2.85 - 14.15 ± 7.21 / 25.90 ± 4.05 - -0.04 ± 1.25 / 36.18 ± 3.31 - 21.60 ± 1.52 / 26.25 ± 1.48 - 27.08 ± 10.78 / 5.28 ± 2.27 - -0.65 ± 0.97 / 22.89 ± 0.69 - 0.48 ± 0.88 / 25.21 ± 0.51 - 17.73 ± 1.58 / 18.22 ± 1.53 - 2.38 ± 2.00 / 21.79 ± 2.03 - -0.18 ± 1.21 / 37.94 ± 4.17 - 1.59 ± 0.83 / 12.57 ± 3.10 - 52.81 ± 2.78 / 8.12 ± 0.57 - 0.89 ± 1.75 / 25.64 ± 1.59 - -1.11 ± 3.80 / 55.19 ± 1.68 - 31.99 ± 3.53 / 32.31 ± 3.75 - 0.00 ± 0.00 / 13.57 ± 0.43 - 0.48 ± 1.60 / 37.69 ± 4.37 - 0.29 ± 0.29 / 0.89 ± 0.74 - 25.30 ± 2.32 / 24.21 ± 2.13 - 13.90 ± 5.43 / 30.42 ± 6.17 - -0.25 ± 1.90 / 39.25 ± 4.50 - 6.12 ± 0.63 / 15.65 ± 1.66 - 41.90 ± 7.85 / 7.52 ± 1.26 - -1.81 ± 1.16 / 22.39 ± 0.62 - 0.00 ± 0.84 / 24.71 ± 0.61 - 24.47 ± 2.03 / 26.64 ± 2.70 - 3.57 ± 2.03 / 16.42 ± 3.18 - -2.03 ± 1.35 / 39.46 ± 4.09 - 10.18 ± 1.78 / 17.17 ± 2.17 - 44.43 ± 2.69 / 7.67 ± 0.47 - -0.11 ± 1.20 / 24.70 ± 0.92 - -0.01 ± 1.24 / 24.56 ± 0.75 - 31.79 ± 3.88 / 31.32 ± 2.81 - 19.13 ± 9.92 / 33.51 ± 6.97 - -0.03 ± 1.07 / 36.37 ± 2.34 - 12.35 ± 1.80 / 21.93 ± 1.63 - 46.13 ± 6.88 / 11.20 ± 1.59 - -1.20 ± 0.68 / 23.43 ± 0.94 - 0.47 ± 1.47 / 25.42 ± 1.15 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.1.2 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - HuggingFaceTB/SmolLM2-135M-Instruct (few-shot) 135 @@ -8539,15 +8539,15 @@ title: Germanic NLG 🇪🇺 8192 True 25,602 ± 7,583 / 3,953 ± 1,325 - 4.24 - 4.03 - 4.27 + 4.22 + 4.02 + 4.21 4.12 4.32 - 4.34 + 4.31 4.31 - 4.55 - 3.95 + 4.54 + 3.97 12.11 ± 1.07 / 11.48 ± 1.07 2.61 ± 3.22 / 18.95 ± 3.93 0.25 ± 1.87 / 39.65 ± 4.00 @@ -8661,6 +8661,135 @@ title: Germanic NLG 🇪🇺 13.1.0 13.1.0 + + PleIAs/Pleias-350m-Preview (few-shot) + 353 + 66 + 2048 + True + 10,242 ± 3,432 / 1,335 ± 484 + 4.22 + 4.13 + 4.30 + 4.12 + 4.23 + 4.24 + 4.15 + 4.52 + 4.08 + 13.84 ± 1.95 / 13.12 ± 1.60 + 9.47 ± 3.30 / 25.66 ± 3.36 + -0.36 ± 1.60 / 39.52 ± 3.19 + 22.10 ± 2.03 / 26.85 ± 2.16 + 45.28 ± 4.20 / 6.64 ± 0.74 + -0.27 ± 1.66 / 25.68 ± 1.51 + 2.54 ± 3.58 / 36.82 ± 2.34 + -0.66 ± 0.98 / 24.57 ± 0.57 + 26.59 ± 2.14 / 26.61 ± 2.49 + 26.78 ± 1.46 / 26.94 ± 1.92 + 7.91 ± 2.20 / 17.44 ± 3.42 + 45.83 ± 1.92 / 5.61 ± 0.51 + 0.28 ± 1.08 / 36.00 ± 3.05 + 0.04 ± 1.65 / 41.17 ± 5.30 + 0.65 ± 0.40 / 2.88 ± 1.01 + 0.57 ± 1.71 / 24.14 ± 1.57 + -0.66 ± 0.78 / 24.58 ± 0.58 + 22.09 ± 2.91 / 21.86 ± 2.85 + 14.15 ± 7.21 / 25.90 ± 4.05 + -0.04 ± 1.25 / 36.18 ± 3.31 + 21.60 ± 1.52 / 26.25 ± 1.48 + 27.08 ± 10.78 / 5.28 ± 2.27 + -0.65 ± 0.97 / 22.89 ± 0.69 + 0.48 ± 0.88 / 25.21 ± 0.51 + 17.73 ± 1.58 / 18.22 ± 1.53 + 2.38 ± 2.00 / 21.79 ± 2.03 + -0.18 ± 1.21 / 37.94 ± 4.17 + 1.59 ± 0.83 / 12.57 ± 3.10 + 52.81 ± 2.78 / 8.12 ± 0.57 + 0.89 ± 1.75 / 25.64 ± 1.59 + -1.11 ± 3.80 / 55.19 ± 1.68 + 31.99 ± 3.53 / 32.31 ± 3.75 + 0.00 ± 0.00 / 13.57 ± 0.43 + 0.48 ± 1.60 / 37.69 ± 4.37 + 0.29 ± 0.29 / 0.89 ± 0.74 + 25.30 ± 2.32 / 24.21 ± 2.13 + 13.90 ± 5.43 / 30.42 ± 6.17 + -0.25 ± 1.90 / 39.25 ± 4.50 + 6.12 ± 0.63 / 15.65 ± 1.66 + 41.90 ± 7.85 / 7.52 ± 1.26 + -1.81 ± 1.16 / 22.39 ± 0.62 + 0.00 ± 0.84 / 24.71 ± 0.61 + 24.47 ± 2.03 / 26.64 ± 2.70 + 3.57 ± 2.03 / 16.42 ± 3.18 + -2.03 ± 1.35 / 39.46 ± 4.09 + 10.18 ± 1.78 / 17.17 ± 2.17 + 44.43 ± 2.69 / 7.67 ± 0.47 + -0.11 ± 1.20 / 24.70 ± 0.92 + -0.01 ± 1.24 / 24.56 ± 0.75 + 31.79 ± 3.88 / 31.32 ± 2.81 + 19.13 ± 9.92 / 33.51 ± 6.97 + -0.03 ± 1.07 / 36.37 ± 2.34 + 12.35 ± 1.80 / 21.93 ± 1.63 + 46.13 ± 6.88 / 11.20 ± 1.59 + -1.20 ± 0.68 / 23.43 ± 0.94 + 0.47 ± 1.47 / 25.42 ± 1.15 + 14.0.4 + 14.1.2 + 14.1.2 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.1.2 + 14.0.4 + 14.1.2 + 14.1.2 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.1.2 + 14.1.2 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.1.2 + 14.1.2 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.1.2 + 14.1.2 + 14.0.4 + 14.0.4 + 14.1.2 + 14.1.2 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.1.2 + 14.1.2 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.1.2 + 14.1.2 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + PleIAs/Pleias-Pico (few-shot) 353 @@ -8668,15 +8797,15 @@ title: Germanic NLG 🇪🇺 2048 True 2,331 ± 787 / 301 ± 97 - 4.27 - 4.03 - 4.41 - 4.18 + 4.26 + 3.99 + 4.42 + 4.19 4.47 - 4.41 + 4.38 4.16 - 4.32 - 4.19 + 4.31 + 4.20 10.59 ± 2.24 / 10.29 ± 1.37 13.31 ± 3.23 / 34.38 ± 3.13 0.52 ± 0.78 / 33.76 ± 0.37 @@ -8797,15 +8926,15 @@ title: Germanic NLG 🇪🇺 8192 True 7,692 ± 1,423 / 1,960 ± 644 - 4.72 - 4.68 - 4.69 - 4.43 + 4.71 + 4.67 + 4.67 + 4.42 4.80 - 4.74 - 4.69 - 4.96 - 4.79 + 4.70 + 4.70 + 4.95 + 4.81 0.00 ± 0.00 / 0.00 ± 0.00 0.00 ± 0.00 / 18.12 ± 0.19 0.00 ± 0.00 / 33.25 ± 0.23 diff --git a/germanic-nlu.csv b/germanic-nlu.csv index 6dcbb47b..b262b7ee 100644 --- a/germanic-nlu.csv +++ b/germanic-nlu.csv @@ -1,96 +1,97 @@ model_id,num_model_parameters,vocabulary_size,max_sequence_length,commercially_licensed,merge,speed,rank,da_rank,no_rank,sv_rank,is_rank,fo_rank,de_rank,nl_rank,en_rank,dansk,angry_tweets,scala_da,scandiqa_da,norne_nb,norne_nn,norec,scala_nb,scala_nn,norquad,suc3,swerec,scala_sv,scandiqa_sv,mim_gold_ner,hotter_and_colder_sentiment,scala_is,nqii,fone,fosent,scala_fo,foqa,germeval,sb10k,scala_de,germanquad,conll_nl,dutch_social,scala_nl,squad_nl,conll_en,sst5,scala_en,squad -"gpt-4-1106-preview (few-shot, val)",-1,100,128000,True,False,576,1.46,1.19,1.46,1.19,1.26,1.65,1.52,2.06,1.39,66.8,61.62,66.84,56.85,77.48,78.7,62.55,74.45,56.31,44.67,74.45,77.59,71.35,56.56,86.37,49.59,43.03,37.26,86.51,38.22,35.09,58.65,68.94,60.47,51.26,30.04,66.44,14.22,72.3,57.81,81.79,67.55,51.21,66.6 -meta-llama/Llama-3.1-405B-Instruct-FP8 (few-shot),405869,128,131072,True,False,799,1.67,1.21,1.91,1.21,1.9,1.99,1.56,2.27,1.34,71.94,61.26,64.94,56.0,82.44,82.17,40.55,63.91,45.93,45.33,76.27,80.7,68.85,56.41,66.4,53.84,26.22,26.49,81.95,53.25,14.29,60.41,72.87,60.79,50.25,28.34,69.12,11.23,68.74,55.25,82.86,70.6,53.8,62.69 -microsoft/mdeberta-v3-base,279,251,512,True,False,20637,1.67,1.63,1.45,1.38,1.32,2.26,1.66,2.28,1.38,72.9,43.38,67.05,42.15,91.9,86.81,53.69,70.55,61.21,48.82,78.84,75.24,72.3,44.74,81.12,45.3,54.11,30.93,88.6,6.7,46.81,20.96,77.42,50.9,59.38,20.28,84.47,5.16,71.23,46.43,91.83,53.75,62.11,62.1 -"gpt-4o-2024-05-13 (few-shot, val)",-1,200,128000,True,False,916,1.69,1.36,1.46,1.21,1.26,2.84,1.71,2.23,1.46,71.15,49.42,64.59,57.35,79.07,81.56,66.66,64.53,54.7,43.51,76.66,77.16,68.99,57.96,81.19,49.86,51.1,29.64,81.86,27.3,-0.97,56.45,69.99,54.82,43.66,30.06,76.75,10.8,56.26,55.55,83.48,62.74,46.56,65.41 +"gpt-4-1106-preview (few-shot, val)",-1,100,128000,True,False,576,1.46,1.19,1.45,1.19,1.26,1.65,1.53,2.06,1.39,66.8,61.62,66.84,56.85,77.48,78.7,62.55,74.45,56.31,44.67,74.45,77.59,71.35,56.56,86.37,49.59,43.03,37.26,86.51,38.22,35.09,58.65,68.94,60.47,51.26,30.04,66.44,14.22,72.3,57.81,81.79,67.55,51.21,66.6 +meta-llama/Llama-3.1-405B-Instruct-FP8 (few-shot),405869,128,131072,True,False,799,1.67,1.21,1.9,1.21,1.9,1.99,1.56,2.26,1.34,71.94,61.26,64.94,56.0,82.44,82.17,40.55,63.91,45.93,45.33,76.27,80.7,68.85,56.41,66.4,53.84,26.22,26.49,81.95,53.25,14.29,60.41,72.87,60.79,50.25,28.34,69.12,11.23,68.74,55.25,82.86,70.6,53.8,62.69 +microsoft/mdeberta-v3-base,279,251,512,True,False,20637,1.67,1.63,1.44,1.38,1.31,2.26,1.66,2.28,1.38,72.9,43.38,67.05,42.15,91.9,86.81,53.69,70.55,61.21,48.82,78.84,75.24,72.3,44.74,81.12,45.3,54.11,30.93,88.6,6.7,46.81,20.96,77.42,50.9,59.38,20.28,84.47,5.16,71.23,46.43,91.83,53.75,62.11,62.1 +"gpt-4o-2024-05-13 (few-shot, val)",-1,200,128000,True,False,916,1.7,1.36,1.5,1.21,1.26,2.84,1.71,2.23,1.46,71.15,49.42,64.59,57.35,79.07,81.56,66.66,64.53,54.7,43.51,76.66,77.16,68.99,57.96,81.19,49.86,51.1,29.64,81.86,27.3,-0.97,56.45,69.99,54.82,43.66,30.06,76.75,10.8,56.26,55.55,83.48,62.74,46.56,65.41 google/rembert,576,250,256,True,False,11736,1.71,1.54,1.36,1.32,1.57,2.96,1.18,2.35,1.38,70.19,50.19,69.72,39.85,88.7,86.11,54.19,69.83,54.84,58.18,78.23,75.99,72.17,46.0,78.05,36.87,48.29,29.38,87.35,0.04,14.65,36.1,77.62,60.65,62.6,33.62,75.49,4.79,66.47,55.7,90.17,51.74,55.55,69.02 -meta-llama/Llama-3.1-70B-Instruct (few-shot),70554,128,131072,True,False,1409,1.71,1.35,1.92,1.25,1.95,2.03,1.53,2.28,1.38,68.57,60.52,57.57,54.33,80.05,80.67,40.65,56.42,38.21,49.22,72.16,81.69,63.97,57.99,69.95,51.27,18.75,30.79,79.04,52.33,15.72,59.08,72.11,59.87,46.82,30.64,68.82,11.41,61.66,55.43,83.16,69.96,50.83,60.82 -intfloat/multilingual-e5-large,560,250,512,True,False,6732,1.76,1.53,1.48,1.36,2.5,3.07,1.38,1.39,1.34,69.5,55.07,57.67,46.71,89.86,84.32,61.52,62.34,34.88,53.01,80.36,79.65,63.15,46.99,78.43,48.52,10.78,13.79,88.39,18.28,2.85,31.03,79.73,64.78,47.24,28.11,82.31,32.64,58.51,45.32,91.69,64.37,53.58,60.47 -setu4993/LaBSE,471,501,512,True,False,25418,1.76,1.74,1.61,1.56,2.05,2.55,1.52,1.44,1.61,71.24,46.5,52.92,40.08,90.58,85.21,54.26,59.44,49.3,46.42,77.78,73.58,60.36,41.71,80.45,46.4,36.92,11.75,89.16,21.57,22.76,30.55,79.44,58.65,52.19,23.66,82.02,33.99,60.77,41.55,90.33,52.93,50.7,53.77 -"claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,1.78,1.28,1.7,1.38,1.71,2.38,1.78,2.47,1.55,65.88,63.61,71.03,46.24,74.23,70.5,50.92,76.1,72.03,40.57,70.22,77.7,74.34,49.32,61.7,51.24,52.43,22.92,72.52,8.17,32.38,45.34,61.83,61.59,46.4,23.77,62.41,12.64,74.06,35.77,82.11,67.01,51.09,52.41 -"gpt-4o-2024-05-13 (zero-shot, val)",-1,200,8191,True,False,637,1.79,1.51,2.0,1.33,1.38,2.23,1.87,2.38,1.6,64.8,53.07,64.18,49.02,77.72,71.7,36.27,71.7,58.79,40.95,75.06,74.85,65.23,53.02,72.85,53.43,49.26,27.36,68.0,27.3,28.09,58.59,67.18,50.12,44.98,27.01,69.12,12.36,58.88,45.88,81.23,63.46,46.45,57.64 -Qwen/QwQ-32B-Preview (few-shot),32764,152,32768,True,False,2258,1.81,1.54,2.12,1.43,2.08,2.31,1.39,2.33,1.29,64.66,53.42,53.93,55.55,75.68,75.89,38.41,56.42,39.34,44.35,69.73,78.76,57.57,56.43,65.08,48.71,20.51,28.29,77.19,45.55,16.92,47.51,70.66,59.33,54.53,32.66,71.32,9.12,63.96,58.36,76.84,68.94,57.74,71.22 -Qwen/Qwen2.5-72B-Instruct (few-shot),72706,152,32768,True,False,1219,1.81,1.46,2.09,1.5,2.1,2.0,1.56,2.35,1.46,63.81,54.5,57.19,55.77,72.21,70.24,39.85,63.14,43.24,43.41,62.12,79.89,61.71,54.99,68.54,47.72,22.98,26.72,76.56,46.61,23.45,55.28,68.94,58.78,52.66,27.62,67.16,9.84,66.06,50.91,75.84,68.66,56.46,58.39 -meta-llama/Llama-3.3-70B-Instruct (few-shot),70554,128,131072,True,False,1353,1.85,1.46,2.01,1.34,2.14,2.15,1.75,2.42,1.52,67.07,59.89,56.56,49.02,79.84,79.93,41.11,57.84,43.52,40.92,71.98,81.15,64.46,51.22,69.04,50.79,20.36,24.56,78.46,51.6,17.25,50.98,70.47,60.55,50.13,20.42,70.37,10.87,62.87,44.3,82.35,71.07,51.27,50.23 -intfloat/multilingual-e5-large-instruct,560,250,514,True,False,5947,1.86,1.79,1.84,1.44,2.51,2.99,1.38,1.65,1.3,69.86,55.45,31.14,45.51,89.27,83.78,63.35,55.71,12.32,38.74,79.5,79.48,53.01,45.68,78.32,50.1,8.11,13.93,88.64,23.63,2.05,24.09,79.77,65.72,49.82,28.42,83.68,27.19,51.8,46.07,91.43,66.42,53.05,61.34 -ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot),28411,256,4096,True,False,3633,1.87,1.41,2.0,1.52,2.27,2.15,1.74,2.39,1.46,66.5,58.93,57.27,55.02,76.25,77.91,40.54,59.75,47.82,40.99,62.91,79.51,60.28,55.44,67.23,50.38,20.01,21.18,81.3,60.99,12.49,48.47,73.31,58.02,45.12,24.67,68.17,10.56,56.89,53.05,81.06,68.92,49.06,61.27 +meta-llama/Llama-3.1-70B-Instruct (few-shot),70554,128,131072,True,False,1409,1.71,1.35,1.96,1.25,1.95,2.03,1.53,2.27,1.38,68.57,60.52,57.57,54.33,80.05,80.67,40.65,56.42,38.21,49.22,72.16,81.69,63.97,57.99,69.95,51.27,18.75,30.79,79.04,52.33,15.72,59.08,72.11,59.87,46.82,30.64,68.82,11.41,61.66,55.43,83.16,69.96,50.83,60.82 +intfloat/multilingual-e5-large,560,250,512,True,False,6732,1.76,1.53,1.49,1.36,2.5,3.07,1.38,1.39,1.34,69.5,55.07,57.67,46.71,89.86,84.32,61.52,62.34,34.88,53.01,80.36,79.65,63.15,46.99,78.43,48.52,10.78,13.79,88.39,18.28,2.85,31.03,79.73,64.78,47.24,28.11,82.31,32.64,58.51,45.32,91.69,64.37,53.58,60.47 +setu4993/LaBSE,471,501,512,True,False,25418,1.76,1.74,1.6,1.55,2.04,2.55,1.52,1.44,1.62,71.24,46.5,52.92,40.08,90.58,85.21,54.26,59.44,49.3,46.42,77.78,73.58,60.36,41.71,80.45,46.4,36.92,11.75,89.16,21.57,22.76,30.55,79.44,58.65,52.19,23.66,82.02,33.99,60.77,41.55,90.33,52.93,50.7,53.77 +"claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,1.78,1.28,1.66,1.38,1.7,2.38,1.79,2.47,1.55,65.88,63.61,71.03,46.24,74.23,70.5,50.92,76.1,72.03,40.57,70.22,77.7,74.34,49.32,61.7,51.24,52.43,22.92,72.52,8.17,32.38,45.34,61.83,61.59,46.4,23.77,62.41,12.64,74.06,35.77,82.11,67.01,51.09,52.41 +"gpt-4o-2024-05-13 (zero-shot, val)",-1,200,8191,True,False,637,1.79,1.51,2.0,1.32,1.38,2.22,1.87,2.37,1.61,64.8,53.07,64.18,49.02,77.72,71.7,36.27,71.7,58.79,40.95,75.06,74.85,65.23,53.02,72.85,53.43,49.26,27.36,68.0,27.3,28.09,58.59,67.18,50.12,44.98,27.01,69.12,12.36,58.88,45.88,81.23,63.46,46.45,57.64 +Qwen/QwQ-32B-Preview (few-shot),32764,152,32768,True,False,2258,1.81,1.54,2.11,1.43,2.08,2.31,1.38,2.32,1.29,64.66,53.42,53.93,55.55,75.68,75.89,38.41,56.42,39.34,44.35,69.73,78.76,57.57,56.43,65.08,48.71,20.51,28.29,77.19,45.55,16.92,47.51,70.66,59.33,54.53,32.66,71.32,9.12,63.96,58.36,76.84,68.94,57.74,71.22 +Qwen/Qwen2.5-72B-Instruct (few-shot),72706,152,32768,True,False,1219,1.82,1.46,2.13,1.5,2.1,2.0,1.56,2.35,1.46,63.81,54.5,57.19,55.77,72.21,70.24,39.85,63.14,43.24,43.41,62.12,79.89,61.71,54.99,68.54,47.72,22.98,26.72,76.56,46.61,23.45,55.28,68.94,58.78,52.66,27.62,67.16,9.84,66.06,50.91,75.84,68.66,56.46,58.39 +meta-llama/Llama-3.3-70B-Instruct (few-shot),70554,128,131072,True,False,1353,1.85,1.46,2.0,1.34,2.14,2.15,1.75,2.41,1.52,67.07,59.89,56.56,49.02,79.84,79.93,41.11,57.84,43.52,40.92,71.98,81.15,64.46,51.22,69.04,50.79,20.36,24.56,78.46,51.6,17.25,50.98,70.47,60.55,50.13,20.42,70.37,10.87,62.87,44.3,82.35,71.07,51.27,50.23 +intfloat/multilingual-e5-large-instruct,560,250,514,True,False,5947,1.86,1.79,1.8,1.44,2.51,3.0,1.38,1.65,1.3,69.86,55.45,31.14,45.51,89.27,83.78,63.35,55.71,12.32,38.74,79.5,79.48,53.01,45.68,78.32,50.1,8.11,13.93,88.64,23.63,2.05,24.09,79.77,65.72,49.82,28.42,83.68,27.19,51.8,46.07,91.43,66.42,53.05,61.34 +ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot),28411,256,4096,True,False,3633,1.87,1.41,1.99,1.52,2.27,2.16,1.74,2.39,1.46,66.5,58.93,57.27,55.02,76.25,77.91,40.54,59.75,47.82,40.99,62.91,79.51,60.28,55.44,67.23,50.38,20.01,21.18,81.3,60.99,12.49,48.47,73.31,58.02,45.12,24.67,68.17,10.56,56.89,53.05,81.06,68.92,49.06,61.27 FacebookAI/xlm-roberta-large,560,250,512,True,False,17897,1.93,1.6,1.53,1.53,2.16,3.4,1.26,2.22,1.76,72.74,48.33,57.3,43.57,91.66,86.19,50.25,55.51,43.89,57.57,80.33,76.63,49.72,46.64,82.83,48.41,22.78,15.72,87.85,5.14,1.17,27.72,80.64,63.02,54.83,29.09,83.49,8.82,64.8,50.72,89.81,41.97,35.55,68.88 -CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,2.03,1.79,2.46,1.73,2.43,2.34,1.75,2.22,1.53,63.63,50.82,35.58,54.33,73.2,72.26,35.8,36.86,23.4,40.32,65.01,77.68,34.06,56.78,62.68,31.96,11.81,30.49,73.26,47.71,10.41,57.08,68.62,56.2,43.36,28.5,68.58,14.41,55.01,58.63,78.35,67.62,46.5,63.2 -"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,2.04,1.9,2.08,2.03,1.84,2.54,1.92,2.34,1.67,59.96,56.91,67.13,17.52,72.74,69.17,67.45,74.27,54.83,3.67,62.45,77.69,68.93,12.11,64.69,56.18,45.52,15.8,73.8,39.45,34.78,0.87,65.51,55.16,44.6,21.87,68.71,20.33,49.52,34.06,77.38,66.75,52.43,41.03 -nvidia/Llama-3.1-Nemotron-70B-Instruct-HF (few-shot),70554,128,131072,True,False,1208,2.05,1.75,2.19,1.69,2.42,2.32,1.86,2.57,1.58,58.34,59.14,56.46,39.77,69.21,70.45,39.87,57.8,40.31,40.97,58.65,81.81,63.69,42.29,54.15,51.96,19.63,21.22,64.88,52.56,14.28,50.11,61.98,57.07,45.61,24.89,55.08,10.8,61.31,49.8,73.66,68.56,51.33,56.87 -intfloat/multilingual-e5-base,278,250,512,True,False,14965,2.08,1.87,2.04,1.64,2.55,3.17,1.81,2.0,1.52,68.7,49.88,44.2,39.9,88.26,81.37,54.61,50.35,22.15,31.77,79.02,76.06,50.19,40.65,75.46,42.08,15.21,10.82,87.44,10.97,7.38,14.8,74.79,63.29,45.32,16.42,79.12,27.67,39.28,35.71,89.65,61.46,51.32,50.78 -"gpt-4o-mini-2024-07-18 (zero-shot, val)",-1,200,8191,True,False,908,2.09,1.71,2.48,1.87,2.31,2.41,1.89,2.41,1.68,57.52,49.73,57.56,51.79,60.43,55.59,39.82,54.84,33.8,36.55,52.47,73.55,52.27,48.95,31.11,50.02,35.85,26.93,53.92,35.05,23.12,54.99,59.19,52.66,46.66,26.02,64.15,12.67,62.44,45.65,75.8,61.65,47.74,56.98 -"gpt-4-1106-preview (zero-shot, val)",-1,100,8191,True,False,436,2.1,1.85,2.39,1.7,2.07,2.57,1.73,2.43,2.03,35.79,53.69,62.98,51.96,60.16,48.74,39.62,71.38,42.94,36.04,51.31,73.54,66.39,52.22,30.39,47.47,43.3,29.82,57.52,13.18,28.03,59.06,57.47,58.67,54.55,27.02,55.72,11.13,67.28,54.2,42.4,65.24,44.59,62.94 -AI-Sweden-Models/roberta-large-1160k,355,50,512,True,False,14014,2.14,1.32,1.14,1.18,2.87,3.23,2.66,3.37,1.33,74.16,51.2,73.87,49.34,92.01,87.17,60.11,72.85,65.56,60.38,82.65,77.25,77.9,49.64,74.3,38.53,2.06,11.47,88.24,6.42,1.73,35.08,68.93,46.81,3.39,18.62,70.92,3.5,2.06,41.4,89.53,53.9,55.31,69.89 -mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,2.2,1.96,2.48,1.86,2.43,2.74,1.93,2.53,1.7,52.62,50.07,37.37,54.87,63.92,62.15,46.68,33.38,19.99,31.87,60.92,79.78,34.88,50.35,55.09,39.6,8.23,30.78,67.97,45.19,5.21,43.26,59.1,55.3,37.84,31.71,64.81,12.99,39.38,49.08,65.31,68.87,43.07,63.97 -meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1007,2.26,2.12,2.48,2.06,2.51,2.6,1.85,2.6,1.85,57.74,48.43,27.12,46.76,74.47,72.93,34.44,27.77,20.35,42.9,69.67,59.93,27.63,49.84,60.2,38.09,9.14,28.66,70.61,45.78,4.58,50.67,68.18,58.33,29.12,28.68,68.72,14.67,32.91,45.36,75.02,67.64,32.29,54.84 -nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,2.26,2.06,2.32,1.93,2.42,2.83,2.06,2.71,1.78,49.85,49.52,32.35,52.54,64.15,62.16,55.29,32.3,22.82,32.62,58.75,79.59,33.09,47.28,53.28,41.8,11.07,29.74,66.18,39.48,5.78,44.81,59.82,50.22,30.43,30.22,66.29,12.71,31.39,48.33,67.52,69.03,40.51,58.12 -meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1005,2.31,1.97,2.63,1.99,2.64,2.74,1.91,2.82,1.82,54.7,54.81,32.11,48.87,64.55,66.44,35.17,27.41,15.6,43.11,55.8,79.23,32.67,46.88,46.48,39.91,11.72,25.91,67.67,48.54,3.89,47.07,67.61,58.07,28.25,28.79,69.76,9.09,37.58,41.26,76.95,68.12,34.34,47.88 -ZurichNLP/unsup-simcse-xlm-roberta-base,278,250,512,True,False,34520,2.35,2.33,2.21,1.93,2.85,3.39,2.15,1.92,2.01,65.1,45.07,26.83,29.92,86.56,80.57,49.62,38.45,11.38,31.5,75.49,71.12,36.69,33.55,75.27,46.33,3.47,7.67,84.14,21.2,1.33,14.2,74.5,58.23,34.74,11.19,78.45,22.67,54.92,31.82,85.88,51.46,35.83,43.26 -meta-llama/Llama-3.1-8B (few-shot),8030,128,131072,True,False,2986,2.37,2.17,2.74,2.0,2.49,3.12,2.17,2.52,1.75,50.92,47.86,29.19,48.38,65.17,60.22,34.02,32.48,18.38,33.06,62.19,80.31,30.29,42.78,52.97,41.29,5.95,31.99,65.65,24.3,0.61,45.01,59.45,53.39,23.87,27.14,64.79,11.95,32.97,63.89,69.86,66.76,30.96,71.39 -meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1335,2.37,2.16,2.57,1.97,2.58,3.18,2.02,2.65,1.82,49.46,51.16,23.01,49.75,61.48,61.58,32.94,21.2,19.65,53.35,59.92,80.91,26.39,47.69,50.45,34.68,8.69,31.94,61.11,19.4,2.02,50.34,56.0,56.4,22.01,35.39,62.26,10.45,30.3,62.99,66.31,64.3,28.18,70.38 -mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot),341029,256,4096,True,False,1904,2.39,2.06,2.46,2.05,2.15,2.85,2.38,2.95,2.19,48.24,39.52,62.92,36.92,56.41,55.6,25.18,62.56,53.09,42.57,48.92,62.08,68.93,36.4,37.98,47.34,50.14,16.27,55.31,51.62,15.07,18.22,50.17,43.41,40.59,19.75,47.6,10.62,61.64,24.02,51.92,67.01,45.98,30.12 -mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,2.39,2.2,2.6,2.1,2.86,3.05,2.06,2.55,1.72,55.49,49.18,7.4,57.72,67.24,66.08,31.41,28.72,20.55,40.6,54.76,73.32,16.17,57.94,47.1,24.43,7.19,28.73,65.55,29.49,2.05,47.72,60.5,50.39,30.86,30.53,66.51,11.91,34.46,59.23,72.4,63.46,35.86,68.42 -cardiffnlp/twitter-xlm-roberta-base,278,250,512,True,False,34475,2.41,2.09,2.07,1.84,2.61,3.79,2.26,2.37,2.25,70.1,45.3,51.74,22.01,87.7,81.41,48.34,55.3,37.46,24.49,72.49,70.69,56.6,31.89,72.69,35.62,28.72,8.46,83.96,5.16,1.05,0.0,74.89,63.01,36.6,0.65,77.15,18.78,56.72,14.61,87.09,55.4,39.78,6.2 -mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot),46703,32,32768,True,False,5535,2.41,2.08,2.71,2.11,3.11,2.87,2.04,2.63,1.7,51.2,50.95,33.44,46.85,55.02,57.37,36.76,30.73,18.96,41.01,46.15,80.33,32.89,46.51,35.0,23.93,9.69,21.38,63.43,43.99,9.38,25.35,58.07,52.18,41.45,26.87,58.8,12.5,45.22,47.03,70.02,69.48,44.59,55.7 -Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,2.44,2.24,2.14,2.1,3.09,3.4,2.2,2.72,1.64,51.94,51.97,29.99,38.99,66.22,64.14,55.48,26.13,17.32,49.75,56.28,77.51,23.25,47.09,42.23,27.93,6.38,19.39,62.2,26.68,7.07,11.97,61.37,51.38,35.58,19.92,62.86,15.11,39.11,36.48,72.77,70.12,44.68,57.17 -CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,2.46,2.15,2.63,2.06,3.22,3.08,2.11,2.64,1.78,51.32,52.0,18.48,52.43,66.55,63.63,38.61,15.8,12.3,43.26,57.38,78.43,14.52,53.14,28.98,19.83,4.93,24.72,64.72,28.57,5.12,38.83,59.95,55.39,30.59,26.94,62.07,13.7,35.14,49.15,67.33,68.67,31.18,68.33 -NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,2.46,2.51,2.42,1.73,2.94,3.03,2.34,2.89,1.81,49.18,49.76,41.28,12.83,58.05,59.65,57.94,51.36,42.84,14.72,57.66,80.04,45.21,52.73,41.84,38.12,7.47,20.87,58.62,40.74,5.58,28.04,56.14,53.33,29.49,18.49,62.81,11.28,28.57,38.75,63.77,69.23,38.49,57.03 +CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,2.02,1.78,2.42,1.73,2.43,2.34,1.75,2.21,1.53,63.63,50.82,35.58,54.33,73.2,72.26,35.8,36.86,23.4,40.32,65.01,77.68,34.06,56.78,62.68,31.96,11.81,30.49,73.26,47.71,10.41,57.08,68.62,56.2,43.36,28.5,68.58,14.41,55.01,58.63,78.35,67.62,46.5,63.2 +"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,2.04,1.9,2.08,2.03,1.83,2.54,1.9,2.34,1.67,59.96,56.91,67.13,17.52,72.74,69.17,67.45,74.27,54.83,3.67,62.45,77.69,68.93,12.11,64.69,56.18,45.52,15.8,73.8,39.45,34.78,0.87,65.51,55.16,44.6,21.87,68.71,20.33,49.52,34.06,77.38,66.75,52.43,41.03 +nvidia/Llama-3.1-Nemotron-70B-Instruct-HF (few-shot),70554,128,131072,True,False,1208,2.05,1.75,2.19,1.69,2.42,2.32,1.86,2.56,1.58,58.34,59.14,56.46,39.77,69.21,70.45,39.87,57.8,40.31,40.97,58.65,81.81,63.69,42.29,54.15,51.96,19.63,21.22,64.88,52.56,14.28,50.11,61.98,57.07,45.61,24.89,55.08,10.8,61.31,49.8,73.66,68.56,51.33,56.87 +intfloat/multilingual-e5-base,278,250,512,True,False,14965,2.08,1.87,2.04,1.64,2.55,3.18,1.8,2.0,1.52,68.7,49.88,44.2,39.9,88.26,81.37,54.61,50.35,22.15,31.77,79.02,76.06,50.19,40.65,75.46,42.08,15.21,10.82,87.44,10.97,7.38,14.8,74.79,63.29,45.32,16.42,79.12,27.67,39.28,35.71,89.65,61.46,51.32,50.78 +"gpt-4-1106-preview (zero-shot, val)",-1,100,8191,True,False,436,2.09,1.84,2.38,1.7,2.07,2.56,1.73,2.43,2.03,35.79,53.69,62.98,51.96,60.16,48.74,39.62,71.38,42.94,36.04,51.31,73.54,66.39,52.22,30.39,47.47,43.3,29.82,57.52,13.18,28.03,59.06,57.47,58.67,54.55,27.02,55.72,11.13,67.28,54.2,42.4,65.24,44.59,62.94 +"gpt-4o-mini-2024-07-18 (zero-shot, val)",-1,200,8191,True,False,908,2.09,1.71,2.47,1.87,2.3,2.4,1.89,2.41,1.68,57.52,49.73,57.56,51.79,60.43,55.59,39.82,54.84,33.8,36.55,52.47,73.55,52.27,48.95,31.11,50.02,35.85,26.93,53.92,35.05,23.12,54.99,59.19,52.66,46.66,26.02,64.15,12.67,62.44,45.65,75.8,61.65,47.74,56.98 +AI-Sweden-Models/roberta-large-1160k,355,50,512,True,False,14014,2.14,1.32,1.14,1.18,2.87,3.23,2.66,3.37,1.34,74.16,51.2,73.87,49.34,92.01,87.17,60.11,72.85,65.56,60.38,82.65,77.25,77.9,49.64,74.3,38.53,2.06,11.47,88.24,6.42,1.73,35.08,68.93,46.81,3.39,18.62,70.92,3.5,2.06,41.4,89.53,53.9,55.31,69.89 +mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,2.2,1.96,2.49,1.86,2.42,2.74,1.93,2.53,1.7,52.62,50.07,37.37,54.87,63.92,62.15,46.68,33.38,19.99,31.87,60.92,79.78,34.88,50.35,55.09,39.6,8.23,30.78,67.97,45.19,5.21,43.26,59.1,55.3,37.84,31.71,64.81,12.99,39.38,49.08,65.31,68.87,43.07,63.97 +nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,2.26,2.06,2.33,1.93,2.42,2.83,2.06,2.69,1.78,49.85,49.52,32.35,52.54,64.15,62.16,55.29,32.3,22.82,32.62,58.75,79.59,33.09,47.28,53.28,41.8,11.07,29.74,66.18,39.48,5.78,44.81,59.82,50.22,30.43,30.22,66.29,12.71,31.39,48.33,67.52,69.03,40.51,58.12 +meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1483,2.27,2.12,2.54,2.05,2.51,2.6,1.85,2.62,1.85,57.74,48.43,27.12,46.76,66.56,68.29,34.47,28.22,18.21,47.34,69.67,59.93,27.63,49.84,60.2,38.09,9.14,28.66,70.61,45.78,4.58,50.67,68.18,58.33,29.12,28.68,68.72,14.67,32.91,45.36,75.02,67.64,32.29,54.84 +ZurichNLP/unsup-simcse-xlm-roberta-base,278,250,512,True,False,34520,2.35,2.33,2.21,1.92,2.85,3.39,2.15,1.92,2.01,65.1,45.07,26.83,29.92,86.56,80.57,49.62,38.45,11.38,31.5,75.49,71.12,36.69,33.55,75.27,46.33,3.47,7.67,84.14,21.2,1.33,14.2,74.5,58.23,34.74,11.19,78.45,22.67,54.92,31.82,85.88,51.46,35.83,43.26 +meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1473,2.35,1.97,2.62,1.98,2.63,2.74,2.04,3.0,1.82,54.7,54.81,32.11,48.87,64.55,66.44,35.17,27.41,15.6,43.11,55.8,79.23,32.67,46.88,46.48,39.91,11.72,25.91,67.67,48.54,3.89,47.07,62.03,58.15,30.18,26.48,61.68,8.97,36.57,33.88,76.95,68.12,34.34,47.88 +meta-llama/Llama-3.1-8B (few-shot),8030,128,131072,True,False,2986,2.37,2.17,2.74,2.0,2.49,3.12,2.17,2.54,1.75,50.92,47.86,29.19,48.38,65.17,60.22,34.02,32.48,18.38,33.06,62.19,80.31,30.29,42.78,52.97,41.29,5.95,31.99,65.65,24.3,0.61,45.01,59.45,53.39,23.87,27.14,64.79,11.95,32.97,63.89,69.86,66.76,30.96,71.39 +mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot),341029,256,4096,True,False,1904,2.38,2.05,2.45,2.05,2.14,2.84,2.38,2.95,2.19,48.24,39.52,62.92,36.92,56.41,55.6,25.18,62.56,53.09,42.57,48.92,62.08,68.93,36.4,37.98,47.34,50.14,16.27,55.31,51.62,15.07,18.22,50.17,43.41,40.59,19.75,47.6,10.62,61.64,24.02,51.92,67.01,45.98,30.12 +meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1477,2.39,2.16,2.79,1.97,2.57,3.18,2.02,2.63,1.82,49.46,51.16,23.01,49.75,62.89,56.18,33.07,30.73,20.57,30.77,59.92,80.91,26.39,47.69,50.45,34.68,8.69,31.94,61.11,19.4,2.02,50.34,56.0,56.4,22.01,35.39,62.26,10.45,30.3,62.99,66.31,64.3,28.18,70.38 +mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,2.4,2.2,2.6,2.1,2.85,3.04,2.06,2.59,1.72,55.49,49.18,7.4,57.72,67.24,66.08,31.41,28.72,20.55,40.6,54.76,73.32,16.17,57.94,47.1,24.43,7.19,28.73,65.55,29.49,2.05,47.72,60.5,50.39,30.86,30.53,63.3,11.82,32.2,59.45,72.4,63.46,35.86,68.42 +mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot),46703,32,32768,True,False,5535,2.4,2.08,2.7,2.11,3.1,2.87,2.04,2.63,1.7,51.2,50.95,33.44,46.85,55.02,57.37,36.76,30.73,18.96,41.01,46.15,80.33,32.89,46.51,35.0,23.93,9.69,21.38,63.43,43.99,9.38,25.35,58.07,52.18,41.45,26.87,58.8,12.5,45.22,47.03,70.02,69.48,44.59,55.7 +cardiffnlp/twitter-xlm-roberta-base,278,250,512,True,False,34475,2.41,2.09,2.07,1.84,2.6,3.79,2.26,2.37,2.26,70.1,45.3,51.74,22.01,87.7,81.41,48.34,55.3,37.46,24.49,72.49,70.69,56.6,31.89,72.69,35.62,28.72,8.46,83.96,5.16,1.05,0.0,74.89,63.01,36.6,0.65,77.15,18.78,56.72,14.61,87.09,55.4,39.78,6.2 +Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,2.44,2.24,2.19,2.09,3.09,3.39,2.2,2.72,1.64,51.94,51.97,29.99,38.99,66.22,64.14,55.48,26.13,17.32,49.75,56.28,77.51,23.25,47.09,42.23,27.93,6.38,19.39,62.2,26.68,7.07,11.97,61.37,51.38,35.58,19.92,62.86,15.11,39.11,36.48,72.77,70.12,44.68,57.17 +NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,2.46,2.51,2.42,1.72,2.93,3.02,2.34,2.89,1.81,49.18,49.76,41.28,12.83,58.05,59.65,57.94,51.36,42.84,14.72,57.66,80.04,45.21,52.73,41.84,38.12,7.47,20.87,58.62,40.74,5.58,28.04,56.14,53.33,29.49,18.49,62.81,11.28,28.57,38.75,63.77,69.23,38.49,57.03 +CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,2.47,2.15,2.64,2.05,3.21,3.08,2.11,2.73,1.78,51.32,52.0,18.48,52.43,66.55,63.63,38.61,15.8,12.3,43.26,57.38,78.43,14.52,53.14,28.98,19.83,4.93,24.72,64.72,28.57,5.12,38.83,59.95,55.39,30.59,26.94,53.02,13.68,29.97,53.4,67.33,68.67,31.18,68.33 NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,2.48,2.01,2.19,2.07,2.97,3.12,2.33,3.02,2.17,51.95,52.11,44.47,43.32,58.53,60.26,59.48,51.85,41.89,25.62,57.01,80.12,43.04,30.44,36.73,34.34,6.57,22.59,62.6,31.36,5.21,34.26,57.26,54.57,26.52,19.96,62.76,13.83,24.44,26.17,63.12,66.47,38.82,29.16 -mistralai/Mistral-7B-v0.1 (few-shot),7242,32,32768,True,False,1446,2.48,2.36,2.66,2.11,2.91,3.11,2.09,2.81,1.77,45.42,43.16,8.79,59.43,52.0,55.12,47.25,8.66,6.8,46.86,53.34,80.0,4.61,58.99,50.69,24.38,1.46,27.11,62.63,25.57,2.84,44.06,55.37,54.27,23.12,31.89,58.15,7.94,25.41,62.56,63.4,68.17,30.92,73.45 -ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,2.51,2.29,2.56,2.15,3.34,3.18,2.13,2.73,1.68,44.92,49.31,10.14,57.34,53.79,56.13,51.36,6.83,8.09,48.01,44.94,76.78,16.96,56.83,42.67,9.95,1.11,22.25,59.96,28.33,2.24,39.52,54.68,55.48,26.89,31.27,53.62,13.37,23.47,61.2,66.17,68.03,39.76,71.21 -CohereForAI/c4ai-command-r-v01 (few-shot),34981,256,8192,False,False,1919,2.53,2.14,2.86,2.27,3.06,2.96,2.21,2.89,1.85,53.44,49.17,20.55,51.7,61.54,60.94,35.73,21.33,13.2,32.36,47.15,80.24,11.35,49.93,42.29,38.87,0.28,18.74,64.53,44.01,2.39,34.84,62.43,53.9,29.68,19.35,61.21,9.03,38.15,44.91,71.96,67.26,40.81,49.79 -mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,2.54,2.32,2.72,2.12,3.0,3.2,2.17,2.99,1.82,43.6,45.92,15.43,59.13,50.56,52.65,44.61,12.1,9.3,45.15,49.18,79.08,11.06,58.98,46.73,26.28,1.5,25.17,61.32,26.73,1.3,44.98,55.41,52.58,24.1,31.52,56.52,7.02,23.41,61.9,61.02,67.29,30.1,73.59 -senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.56,2.23,2.5,2.24,3.16,3.63,2.1,2.68,1.97,52.61,49.81,19.64,48.03,64.37,62.77,50.6,18.09,12.25,38.34,58.9,67.74,16.52,49.41,56.71,7.92,3.44,21.55,67.42,20.01,7.02,0.65,64.38,54.44,26.03,25.68,64.25,13.66,28.59,49.64,70.62,67.78,30.99,49.56 -microsoft/xlm-align-base,278,250,512,True,False,14744,2.57,2.32,2.03,2.14,2.77,3.75,2.13,2.77,2.66,70.36,47.83,11.87,29.87,90.07,85.65,54.46,12.16,8.99,49.24,78.6,73.67,15.41,32.41,78.01,38.76,5.92,10.47,85.97,2.54,0.02,0.72,79.38,58.58,15.34,16.58,78.85,11.8,14.56,42.08,88.62,11.09,8.46,49.64 -utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.57,2.17,2.65,2.05,3.44,3.22,2.14,2.73,2.13,34.0,53.97,32.21,57.1,40.91,42.91,52.62,9.7,11.98,47.36,40.59,76.02,33.98,56.98,24.92,9.76,0.15,28.18,54.79,35.84,0.0,41.94,40.85,56.53,24.74,38.2,43.06,11.95,40.85,63.42,44.81,62.54,28.1,71.71 -ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4097,True,False,2515,2.58,2.19,2.53,2.09,3.58,3.24,2.24,2.86,1.92,44.58,47.16,19.2,58.41,49.94,52.17,53.27,17.22,12.01,45.04,44.8,75.92,24.84,56.71,37.82,-0.29,-0.12,21.59,61.47,24.35,1.44,41.54,50.43,57.84,22.58,27.96,52.26,8.46,42.42,53.11,55.76,66.89,36.6,67.55 -CohereForAI/aya-23-8B (few-shot),8028,256,8192,False,False,2707,2.64,2.29,2.82,2.07,3.45,3.37,2.42,2.74,1.95,47.08,47.16,8.41,58.83,60.94,59.61,35.73,6.18,4.0,46.52,60.04,76.21,7.54,58.6,47.16,0.33,3.84,21.75,62.22,17.34,0.01,38.7,51.2,47.79,18.04,29.46,60.81,7.9,31.12,63.0,56.16,68.27,23.82,74.23 -Twitter/twhin-bert-base,279,250,512,True,False,11514,2.66,2.42,2.61,2.03,2.84,3.72,2.3,3.05,2.28,60.01,42.17,29.43,29.79,84.11,77.22,37.02,35.42,6.87,25.98,70.17,66.62,46.72,31.38,70.38,40.22,11.09,7.67,84.0,3.05,1.94,0.02,70.35,55.03,43.87,2.81,74.03,9.53,39.12,7.71,87.77,41.09,32.26,35.15 -ibm-granite/granite-8b-code-base-4k (few-shot),8055,49,4096,True,False,2313,2.66,2.5,2.77,2.24,3.41,3.38,2.33,2.79,1.87,48.44,39.07,9.72,51.18,68.4,65.15,42.0,5.2,3.32,37.51,59.77,74.45,3.97,50.18,50.89,9.52,0.5,17.43,66.82,21.19,-0.36,36.47,59.07,49.75,14.71,29.45,63.29,13.81,8.16,56.64,72.76,62.35,21.57,69.8 -timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,2.68,2.31,2.65,1.96,3.51,3.58,2.33,3.13,1.96,42.43,47.82,16.51,56.95,48.97,51.52,49.05,14.37,9.96,44.07,44.14,80.14,34.23,57.07,36.47,1.84,2.54,18.66,58.96,8.97,0.0,39.2,50.66,54.79,20.17,27.86,54.56,8.43,10.99,55.91,59.1,68.41,25.43,71.89 -sentence-transformers/paraphrase-xlm-r-multilingual-v1,278,250,512,True,False,20154,2.69,2.41,2.72,2.26,3.13,3.74,2.59,2.42,2.26,61.17,46.39,38.61,19.9,81.26,74.05,49.93,38.26,25.17,0.0,70.22,71.33,39.6,18.65,65.15,33.26,1.15,9.56,80.92,12.97,1.19,0.0,69.45,57.94,21.81,0.33,70.59,21.37,45.86,5.2,84.05,54.92,45.85,4.13 -mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,2.71,2.29,2.87,2.36,3.5,3.23,2.4,2.9,2.1,44.89,48.09,19.06,51.56,53.42,54.34,38.79,17.06,11.0,35.74,47.92,62.9,19.95,52.51,34.8,17.64,5.46,12.66,61.28,32.07,1.68,39.0,55.1,47.69,24.14,23.93,55.56,12.37,21.5,50.77,62.11,59.91,30.66,58.27 -sentence-transformers/stsb-xlm-r-multilingual,278,250,512,True,False,15040,2.72,2.54,2.75,2.2,3.08,3.52,2.59,2.88,2.21,58.52,42.26,34.8,19.6,80.08,74.59,52.16,36.3,14.21,0.0,68.94,72.77,40.21,20.09,66.23,37.79,0.04,10.04,82.97,18.07,2.93,0.0,67.47,52.85,29.59,0.73,66.85,20.56,35.56,5.04,82.39,57.35,47.29,4.29 -occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,2.73,2.57,2.85,2.27,3.32,3.48,2.28,2.99,2.1,40.19,42.31,1.14,57.89,45.5,45.96,44.46,0.0,0.0,52.19,47.67,71.73,7.9,57.78,40.71,14.7,0.71,20.66,60.37,8.21,0.0,43.69,52.63,43.16,27.09,34.01,53.78,7.78,16.23,63.09,56.9,62.1,20.17,75.29 -Twitter/twhin-bert-large,561,250,512,True,False,9707,2.75,2.5,2.97,2.28,3.06,3.77,2.28,3.1,2.05,66.39,39.36,7.06,33.88,86.26,80.1,34.17,12.11,4.28,11.74,74.26,63.35,16.07,36.77,71.48,32.07,2.2,8.19,84.73,-0.64,1.37,4.15,74.36,53.52,22.26,11.68,77.35,6.55,18.25,28.37,89.5,45.98,30.58,48.44 -Geotrend/distilbert-base-25lang-cased,109,85,512,True,False,26099,2.77,2.6,2.65,2.3,3.21,3.72,2.45,2.9,2.36,58.44,31.81,34.13,27.6,83.59,80.29,33.19,32.6,24.97,19.93,70.56,60.69,30.83,31.41,65.76,29.56,2.08,6.81,83.21,3.9,2.37,1.35,72.97,41.51,45.39,1.89,75.02,7.45,45.28,20.18,87.08,36.77,37.1,26.99 -ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,2.78,2.61,2.8,2.28,3.61,3.45,2.58,2.95,1.97,45.9,37.11,11.7,50.11,66.91,62.82,40.71,9.5,6.74,32.83,52.85,73.93,8.27,48.49,43.2,2.54,0.0,14.28,60.46,21.59,0.51,33.54,54.45,43.62,15.24,26.0,60.72,12.38,10.96,51.2,72.59,61.61,18.37,66.68 -occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,2.78,2.58,2.87,2.22,3.4,3.55,2.3,3.16,2.14,37.93,44.62,0.28,58.05,45.28,46.0,44.95,0.0,0.0,43.88,49.02,76.56,2.18,58.98,40.08,16.23,1.59,15.98,58.67,10.39,0.0,40.95,51.39,47.3,21.83,31.55,51.31,7.41,13.04,59.28,55.37,63.32,18.92,72.38 -meta-llama/Llama-3.2-3B-Instruct (few-shot),3213,128,131073,False,False,10424,2.8,2.5,3.11,2.33,3.52,3.12,2.62,3.17,2.01,41.12,42.77,11.52,51.14,49.66,51.98,44.13,0.67,1.11,28.62,43.74,76.98,16.01,48.38,27.57,10.07,-1.39,22.98,58.24,32.79,1.77,45.13,55.52,50.52,9.87,20.2,43.66,12.87,17.94,47.77,68.44,66.0,32.04,49.54 -mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32768,False,False,634,2.8,2.49,2.86,2.36,3.58,3.53,2.51,3.11,1.97,37.93,44.49,14.09,51.38,50.08,51.27,43.65,14.09,8.28,37.23,45.01,73.33,11.59,52.12,36.04,12.93,-0.36,18.06,55.42,15.85,1.11,33.54,47.19,47.26,22.32,24.36,52.72,7.91,18.14,52.75,57.58,61.44,34.92,65.38 -"claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,2.85,2.14,3.23,2.53,3.36,3.64,2.56,2.79,2.55,59.48,56.46,20.57,38.23,69.39,62.76,3.97,31.65,5.86,36.65,57.06,59.89,9.3,39.97,34.99,31.19,-10.68,23.65,51.06,-3.58,4.1,45.29,55.59,43.73,23.74,21.36,61.15,12.71,35.26,41.27,74.35,31.19,21.76,45.7 -NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,2.86,2.82,2.81,2.5,3.29,3.36,2.57,3.16,2.37,50.83,53.23,23.02,0.0,65.75,70.12,41.9,47.88,35.66,0.03,69.54,79.55,28.27,0.02,62.28,38.08,2.85,0.86,65.06,48.44,3.3,0.0,67.63,55.79,24.45,1.17,69.37,14.35,29.13,0.34,76.84,67.91,30.61,0.1 -meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4096,False,False,2643,2.88,2.5,3.09,2.48,3.51,3.62,2.61,3.13,2.09,35.44,44.88,9.74,55.04,44.99,49.09,41.56,3.04,4.03,33.77,39.72,66.18,6.74,54.05,41.1,13.59,-1.07,16.13,59.77,13.24,-0.54,31.87,50.09,46.52,15.23,25.54,50.23,10.07,14.73,53.42,62.53,62.23,22.71,64.45 -nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,3.03,2.89,3.15,2.84,3.74,3.29,2.59,3.44,2.32,49.01,47.95,32.89,0.0,63.7,62.53,34.35,31.53,22.71,0.06,48.51,78.68,29.18,0.0,43.65,10.7,10.77,0.29,67.72,42.98,6.4,0.07,60.69,53.77,38.53,0.0,60.11,11.12,32.68,0.0,72.63,65.74,43.43,0.0 -ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,3.06,2.89,3.26,2.55,3.94,3.94,2.57,3.16,2.18,37.37,31.44,5.27,48.41,44.89,48.08,32.29,7.49,4.65,26.37,40.68,68.96,4.77,49.73,23.14,5.07,0.18,14.15,45.56,7.44,0.92,20.82,47.31,48.28,14.08,28.37,49.25,9.45,11.87,54.2,52.79,65.92,16.74,64.92 -sentence-transformers/quora-distilbert-multilingual,135,120,512,True,False,26458,3.06,2.99,3.11,2.64,3.25,3.71,3.12,2.72,2.91,54.48,36.6,8.84,13.97,77.81,72.22,44.59,8.98,5.72,0.0,65.5,68.36,14.81,16.11,63.36,33.1,1.02,6.48,82.91,9.77,1.67,0.0,64.12,49.66,0.58,0.05,67.89,23.25,21.36,4.5,81.71,50.69,2.16,4.19 -MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,3.07,2.93,3.17,2.38,3.48,3.62,3.26,3.4,2.32,28.18,29.32,2.9,56.48,36.96,39.38,32.67,2.18,5.33,45.23,41.49,75.64,0.66,57.48,31.81,18.33,3.63,16.72,40.18,14.19,0.31,41.6,32.33,26.39,1.44,28.15,36.61,8.77,3.52,59.51,47.2,64.82,7.57,73.88 -ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,3.13,2.84,3.67,2.43,3.75,3.98,3.0,3.19,2.16,38.62,35.47,5.07,45.21,53.93,54.04,23.83,3.91,1.55,2.37,51.76,70.61,6.24,44.67,38.52,4.29,0.0,12.94,60.88,0.16,-0.35,18.54,49.38,41.72,7.67,13.7,50.88,12.39,3.31,48.44,60.64,61.2,7.63,69.83 -sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking,135,120,512,True,False,33753,3.14,2.99,3.11,2.64,3.29,3.71,3.12,3.34,2.91,54.48,36.6,8.84,15.42,77.81,72.22,44.59,8.98,5.72,0.0,65.5,68.33,14.81,16.11,59.15,33.1,0.8,6.14,82.91,9.77,1.67,0.0,63.78,49.69,0.74,0.02,65.04,17.4,-0.95,3.94,81.71,50.69,2.16,4.16 -ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,3.16,2.92,3.3,2.56,3.93,3.97,2.93,3.33,2.31,37.21,31.54,6.3,44.86,53.78,55.14,26.21,3.9,2.42,24.86,50.1,65.67,4.55,42.83,33.57,0.6,0.0,11.27,56.88,3.8,-0.21,13.72,49.16,35.17,9.79,22.48,48.53,10.15,4.88,45.38,58.3,59.01,10.33,65.04 -sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2,118,250,512,True,False,29201,3.16,2.63,2.91,2.45,3.32,3.75,3.08,4.2,2.97,56.75,44.48,26.74,17.89,78.31,72.13,47.53,26.92,14.63,0.0,66.5,72.19,28.75,15.91,62.44,31.32,1.91,3.69,82.24,6.35,2.84,0.0,60.54,54.99,0.52,0.8,59.61,0.0,-0.04,3.28,77.5,53.1,-0.35,3.13 -dbmdz/bert-base-historic-multilingual-cased,111,32,512,True,False,20047,3.17,3.17,3.19,2.33,3.46,3.83,2.9,3.54,2.95,47.61,24.17,8.14,25.19,68.63,67.7,25.68,6.73,3.35,22.57,68.83,64.25,28.62,28.78,56.62,24.79,-1.21,6.01,80.45,0.9,2.52,0.58,65.35,37.77,16.07,5.67,56.69,9.29,3.02,22.14,77.64,12.42,13.65,33.29 -ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,3.31,3.41,3.44,2.77,4.09,4.13,3.03,3.31,2.34,31.8,6.85,0.97,49.83,40.08,43.96,31.9,-0.07,1.27,23.32,36.01,57.18,1.52,51.04,18.07,0.65,-0.72,12.27,41.27,5.4,-0.2,19.69,40.61,31.86,5.36,25.99,42.52,9.91,0.69,56.95,49.44,66.65,12.56,63.29 -EuropeanParliament/EUBERT,94,66,512,True,False,20070,3.33,3.1,3.65,3.06,3.56,4.05,3.07,3.24,2.93,41.09,27.33,21.58,20.68,49.92,44.37,19.81,8.64,3.11,15.89,38.36,59.0,19.4,19.23,29.71,36.28,2.71,6.5,59.5,0.94,3.25,5.56,49.95,40.29,25.88,2.59,49.54,14.86,27.9,20.65,71.43,17.53,23.93,31.01 -openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,3.35,3.05,3.55,2.85,3.79,3.82,3.14,3.57,3.02,34.66,21.93,1.5,52.36,37.36,42.83,16.02,-0.08,2.29,31.6,35.02,51.8,6.15,50.85,28.74,4.3,0.06,17.41,46.64,16.72,-1.54,18.69,39.39,23.6,7.68,25.3,39.24,4.25,11.48,54.18,50.73,27.52,2.96,63.42 -utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,3.4,3.05,3.43,2.75,4.08,4.36,3.19,3.66,2.65,19.61,37.92,2.81,50.05,31.43,36.92,30.63,0.98,1.67,33.24,27.41,72.24,0.13,49.77,13.29,6.83,0.92,7.49,33.53,4.25,-2.32,15.41,28.49,43.18,2.92,23.26,32.45,7.03,5.58,51.18,37.47,58.61,5.3,63.26 -HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,3.42,3.25,3.69,2.77,3.78,4.26,3.38,3.99,2.25,29.44,18.49,1.73,44.39,37.6,38.38,24.05,3.56,2.61,13.58,37.37,64.46,4.49,43.92,26.23,6.86,2.69,10.84,40.28,3.94,-0.26,10.68,32.54,27.03,8.95,18.38,31.84,1.56,5.05,40.55,47.58,66.78,20.53,58.07 -sentence-transformers/distiluse-base-multilingual-cased-v1,135,120,512,True,False,34042,3.43,3.33,3.62,3.06,3.54,3.72,3.56,3.29,3.31,46.78,27.78,3.04,15.52,60.76,59.62,25.98,2.65,3.47,0.2,49.86,60.06,3.18,16.08,45.47,35.18,1.14,1.63,75.63,9.82,3.65,0.0,28.29,51.7,2.12,0.03,58.67,17.82,9.27,2.17,71.33,36.75,0.24,1.3 -sentence-transformers/distiluse-base-multilingual-cased,135,120,512,True,False,19206,3.47,3.66,3.51,3.08,3.5,3.72,3.39,3.61,3.3,26.96,30.13,2.01,8.25,63.79,60.96,32.83,1.09,0.18,0.0,51.67,63.04,2.32,8.93,47.62,33.35,2.64,1.27,77.16,10.41,4.09,0.0,40.2,48.71,5.53,0.06,56.98,9.66,19.37,3.11,71.9,36.22,0.47,2.44 -openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.53,3.28,3.63,3.21,3.83,3.92,3.55,3.79,3.03,29.49,13.77,0.0,51.53,34.78,39.0,10.69,6.17,5.9,31.25,37.17,20.2,6.13,46.66,26.58,-0.79,0.63,15.14,39.78,16.03,-0.48,20.04,38.81,10.59,0.91,22.54,42.35,0.78,-0.02,47.61,44.48,23.69,8.52,56.97 -sentence-transformers/distiluse-base-multilingual-cased-v2,135,120,512,True,False,33247,3.53,3.66,3.51,3.08,3.46,3.72,3.39,4.1,3.3,26.96,30.13,2.01,8.22,63.79,60.96,32.83,1.09,0.18,0.0,51.67,62.71,2.32,8.76,48.62,34.19,2.64,1.22,77.0,9.59,4.09,0.0,41.82,49.38,4.77,0.05,49.82,2.7,6.6,2.13,71.9,36.22,0.47,2.4 -HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,3.57,3.52,3.87,2.71,3.86,4.41,3.61,4.02,2.55,24.47,9.93,1.22,42.09,26.7,28.23,23.25,-0.47,0.26,13.4,35.96,68.31,3.61,43.26,20.5,10.09,0.83,10.84,27.91,0.77,-0.48,16.56,28.67,19.69,5.07,18.43,22.84,4.6,2.55,40.33,41.57,62.32,8.04,56.01 -PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,3.71,3.27,3.94,3.22,4.0,4.4,3.68,3.88,3.25,28.3,28.95,0.2,36.39,38.96,40.42,19.42,-0.13,0.77,4.7,36.29,39.68,0.96,32.64,22.56,0.53,-0.26,11.77,38.91,-1.72,0.66,4.82,34.68,21.76,0.85,14.3,38.22,4.99,1.85,27.77,40.45,47.89,0.28,26.77 -3ebdola/Dialectal-Arabic-XLM-R-Base,278,250,512,True,False,12783,3.76,3.69,3.91,3.47,3.49,4.0,3.74,4.02,3.75,36.51,22.07,1.63,3.09,55.55,53.53,12.69,2.79,1.66,0.0,42.78,44.95,1.43,8.71,46.95,20.94,1.4,10.48,73.34,1.49,0.97,0.0,30.18,32.66,2.1,0.46,44.46,8.39,2.07,4.3,68.25,1.92,1.08,2.79 -dbmdz/bert-tiny-historic-multilingual-cased,5,32,512,True,False,78027,3.8,3.82,3.99,3.61,3.8,3.98,3.75,4.02,3.44,33.62,20.71,1.19,4.19,46.11,35.18,19.19,2.76,0.42,0.0,26.87,57.41,-1.06,5.54,43.93,9.46,0.04,6.13,72.08,-1.81,2.65,0.0,33.18,33.61,1.83,0.0,41.38,8.45,1.55,4.4,65.85,25.85,1.21,4.0 -PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.94,3.72,4.0,3.37,4.24,4.6,3.9,3.94,3.78,15.93,13.01,0.05,36.85,28.82,27.81,18.74,-0.46,-0.84,12.66,21.42,45.75,-0.25,32.71,18.86,-0.67,-0.76,8.09,22.75,-0.03,-0.78,7.75,23.08,7.41,0.89,17.32,31.13,7.24,1.23,32.13,27.37,36.35,-0.37,7.42 -PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,3.95,3.43,4.11,3.75,4.14,4.63,4.06,4.01,3.49,16.17,29.12,-0.47,34.8,27.47,23.82,22.22,-2.06,-0.77,2.48,14.09,23.71,1.74,32.0,9.9,2.13,0.02,10.64,20.76,-1.78,0.87,3.58,24.32,15.58,1.25,6.82,23.58,7.9,1.79,26.11,21.6,45.04,-0.46,33.46 -HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.07,4.13,4.37,3.9,4.33,4.54,4.02,4.29,2.98,8.97,2.66,1.65,24.92,20.37,21.27,7.6,1.31,0.51,4.8,13.64,9.34,2.2,26.06,13.6,3.12,0.28,4.09,26.85,3.07,-0.12,1.39,18.77,12.59,1.64,9.27,15.68,6.73,0.63,19.73,30.73,59.51,1.55,49.03 -HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.07,4.01,4.34,3.93,4.31,4.6,3.95,4.23,3.19,12.68,3.61,1.79,28.12,26.6,23.7,6.21,-0.39,0.21,4.65,18.22,11.52,1.72,27.27,13.43,3.82,1.14,3.71,28.14,-0.56,-0.06,2.43,19.94,19.64,0.0,8.78,20.95,6.84,-1.5,22.67,31.14,43.97,3.49,47.91 -PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.18,3.94,4.37,4.0,4.33,4.52,4.02,4.45,3.81,13.84,9.47,-0.36,22.1,26.59,26.78,7.91,0.28,0.04,0.65,22.09,14.15,-0.04,21.6,17.73,2.38,-0.18,1.59,31.99,0.0,0.48,0.29,25.3,13.9,-0.25,6.12,24.47,3.57,-2.03,10.18,31.79,19.13,-0.03,12.35 -PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.23,4.04,4.4,3.97,4.35,4.65,4.15,4.48,3.82,10.59,13.31,0.52,16.61,25.02,21.59,8.05,-0.15,-0.97,0.37,16.28,17.38,-0.45,17.78,13.8,2.17,-0.63,1.29,22.55,0.67,0.87,0.27,21.03,10.99,0.13,3.61,21.32,4.37,-0.19,9.38,27.45,27.39,0.31,15.62 -fresh-xlm-roberta-base,278,250,512,True,False,2214,4.24,4.12,4.28,3.94,4.03,4.26,4.27,4.79,4.21,16.04,17.37,1.34,1.58,25.49,25.94,12.6,0.5,1.83,0.0,11.91,51.11,0.86,2.0,17.34,25.25,-0.06,1.02,48.7,1.07,2.37,0.0,8.03,23.44,-0.17,0.0,13.09,0.92,1.93,0.26,34.64,4.0,1.33,0.43 -HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.33,4.14,4.31,4.25,4.33,4.58,4.53,4.75,3.74,13.72,3.79,-0.45,14.69,24.37,24.69,8.84,-1.2,-0.5,0.16,19.15,-3.03,0.06,14.18,14.74,3.13,-0.25,1.35,25.51,-0.24,0.46,0.15,16.89,2.74,-0.34,0.28,17.49,2.01,-0.02,0.53,31.26,26.69,1.78,13.88 -HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.37,4.24,4.52,4.18,4.37,4.57,4.49,4.79,3.79,12.11,2.61,0.25,14.02,20.89,19.62,2.78,-0.98,0.93,0.15,17.09,7.41,0.47,11.73,13.7,3.01,-0.83,0.94,23.22,3.78,0.41,0.54,15.54,2.51,0.36,1.77,15.82,-0.62,1.16,3.25,29.96,18.64,1.85,26.9 -ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.82,4.68,4.86,4.8,4.66,5.0,4.77,5.08,4.72,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.3,0.0,0.0,0.0,0.74,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.48,0.01 +mistralai/Mistral-7B-v0.1 (few-shot),7242,32,32768,True,False,1446,2.48,2.35,2.68,2.1,2.9,3.11,2.09,2.8,1.77,45.42,43.16,8.79,59.43,52.0,55.12,47.25,8.66,6.8,46.86,53.34,80.0,4.61,58.99,50.69,24.38,1.46,27.11,62.63,25.57,2.84,44.06,55.37,54.27,23.12,31.89,58.15,7.94,25.41,62.56,63.4,68.17,30.92,73.45 +ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,2.5,2.28,2.54,2.15,3.33,3.18,2.13,2.72,1.68,44.92,49.31,10.14,57.34,53.79,56.13,51.36,6.83,8.09,48.01,44.94,76.78,16.96,56.83,42.67,9.95,1.11,22.25,59.96,28.33,2.24,39.52,54.68,55.48,26.89,31.27,53.62,13.37,23.47,61.2,66.17,68.03,39.76,71.21 +CohereForAI/c4ai-command-r-v01 (few-shot),34981,256,8192,False,False,1919,2.53,2.14,2.87,2.26,3.05,2.96,2.21,2.89,1.85,53.44,49.17,20.55,51.7,61.54,60.94,35.73,21.33,13.2,32.36,47.15,80.24,11.35,49.93,42.29,38.87,0.28,18.74,64.53,44.01,2.39,34.84,62.43,53.9,29.68,19.35,61.21,9.03,38.15,44.91,71.96,67.26,40.81,49.79 +mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,2.54,2.31,2.71,2.11,2.99,3.2,2.17,2.98,1.82,43.6,45.92,15.43,59.13,50.56,52.65,44.61,12.1,9.3,45.15,49.18,79.08,11.06,58.98,46.73,26.28,1.5,25.17,61.32,26.73,1.3,44.98,55.41,52.58,24.1,31.52,56.52,7.02,23.41,61.9,61.02,67.29,30.1,73.59 +senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.56,2.23,2.52,2.23,3.15,3.63,2.1,2.67,1.97,52.61,49.81,19.64,48.03,64.37,62.77,50.6,18.09,12.25,38.34,58.9,67.74,16.52,49.41,56.71,7.92,3.44,21.55,67.42,20.01,7.02,0.65,64.38,54.44,26.03,25.68,64.25,13.66,28.59,49.64,70.62,67.78,30.99,49.56 +utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.56,2.16,2.64,2.04,3.43,3.22,2.13,2.72,2.13,34.0,53.97,32.21,57.1,40.91,42.91,52.62,9.7,11.98,47.36,40.59,76.02,33.98,56.98,24.92,9.76,0.15,28.18,54.79,35.84,0.0,41.94,40.85,56.53,24.74,38.2,43.06,11.95,40.85,63.42,44.81,62.54,28.1,71.71 +ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4097,True,False,2515,2.58,2.19,2.52,2.09,3.58,3.24,2.24,2.86,1.92,44.58,47.16,19.2,58.41,49.94,52.17,53.27,17.22,12.01,45.04,44.8,75.92,24.84,56.71,37.82,-0.29,-0.12,21.59,61.47,24.35,1.44,41.54,50.43,57.84,22.58,27.96,52.32,8.46,42.42,53.12,55.76,66.89,36.6,67.55 +microsoft/xlm-align-base,278,250,512,True,False,14744,2.58,2.32,2.08,2.14,2.77,3.75,2.17,2.77,2.67,70.36,47.83,11.87,29.87,90.07,85.65,54.46,12.16,8.99,49.24,78.6,73.67,15.41,32.41,78.01,38.76,5.92,10.47,85.97,2.54,0.02,0.72,79.38,58.58,15.34,16.58,78.85,11.8,14.56,42.08,88.62,11.09,8.46,49.64 +CohereForAI/aya-23-8B (few-shot),8028,256,8192,False,False,2707,2.63,2.29,2.8,2.06,3.44,3.36,2.42,2.72,1.95,47.08,47.16,8.41,58.83,60.94,59.61,35.73,6.18,4.0,46.52,60.04,76.21,7.54,58.6,47.16,0.33,3.84,21.75,62.22,17.34,0.01,38.7,51.2,47.79,18.04,29.46,60.81,7.9,31.12,63.0,56.16,68.27,23.82,74.23 +Twitter/twhin-bert-base,279,250,512,True,False,11514,2.66,2.42,2.61,2.03,2.83,3.72,2.31,3.05,2.28,60.01,42.17,29.43,29.79,84.11,77.22,37.02,35.42,6.87,25.98,70.17,66.62,46.72,31.38,70.38,40.22,11.09,7.67,84.0,3.05,1.94,0.02,70.35,55.03,43.87,2.81,74.03,9.53,39.12,7.71,87.77,41.09,32.26,35.15 +ibm-granite/granite-8b-code-base-4k (few-shot),8055,49,4096,True,False,2313,2.66,2.5,2.76,2.23,3.4,3.38,2.33,2.79,1.87,48.44,39.07,9.72,51.18,68.4,65.15,42.0,5.2,3.32,37.51,59.77,74.45,3.97,50.18,50.89,9.52,0.5,17.43,66.82,21.19,-0.36,36.47,59.07,49.75,14.71,29.45,63.29,13.81,8.16,56.64,72.76,62.35,21.57,69.8 +timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,2.68,2.31,2.64,1.96,3.51,3.58,2.34,3.12,1.96,42.43,47.82,16.51,56.95,48.97,51.52,49.05,14.37,9.96,44.07,44.14,80.14,34.23,57.07,36.47,1.84,2.54,18.66,58.96,8.97,0.0,39.2,50.66,54.79,20.17,27.86,54.56,8.43,10.99,55.91,59.1,68.41,25.43,71.89 +sentence-transformers/paraphrase-xlm-r-multilingual-v1,278,250,512,True,False,20154,2.69,2.41,2.73,2.25,3.13,3.74,2.6,2.41,2.26,61.17,46.39,38.61,19.9,81.26,74.05,49.93,38.26,25.17,0.0,70.22,71.33,39.6,18.65,65.15,33.26,1.15,9.56,80.92,12.97,1.19,0.0,69.45,57.94,21.81,0.33,70.59,21.37,45.86,5.2,84.05,54.92,45.85,4.13 +mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,2.7,2.29,2.86,2.35,3.49,3.23,2.4,2.9,2.1,44.89,48.09,19.06,51.56,53.42,54.34,38.79,17.06,11.0,35.74,47.92,62.9,19.95,52.51,34.8,17.64,5.46,12.66,61.28,32.07,1.68,39.0,55.1,47.69,24.14,23.93,55.56,12.37,21.5,50.77,62.11,59.91,30.66,58.27 +sentence-transformers/stsb-xlm-r-multilingual,278,250,512,True,False,15040,2.72,2.54,2.75,2.2,3.07,3.53,2.59,2.88,2.22,58.52,42.26,34.8,19.6,80.08,74.59,52.16,36.3,14.21,0.0,68.94,72.77,40.21,20.09,66.23,37.79,0.04,10.04,82.97,18.07,2.93,0.0,67.47,52.85,29.59,0.73,66.85,20.56,35.56,5.04,82.39,57.35,47.29,4.29 +occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,2.73,2.56,2.85,2.26,3.31,3.48,2.27,2.99,2.1,40.19,42.31,1.14,57.89,45.5,45.96,44.46,0.0,0.0,52.19,47.67,71.73,7.9,57.78,40.71,14.7,0.71,20.66,60.37,8.21,0.0,43.69,52.63,43.16,27.09,34.01,53.78,7.78,16.23,63.09,56.9,62.1,20.17,75.29 +Twitter/twhin-bert-large,561,250,512,True,False,9707,2.75,2.5,2.97,2.28,3.06,3.78,2.29,3.1,2.05,66.39,39.36,7.06,33.88,86.26,80.1,34.17,12.11,4.28,11.74,74.26,63.35,16.07,36.77,71.48,32.07,2.2,8.19,84.73,-0.64,1.37,4.15,74.36,53.52,22.26,11.68,77.35,6.55,18.25,28.37,89.5,45.98,30.58,48.44 +Geotrend/distilbert-base-25lang-cased,109,85,512,True,False,26099,2.77,2.59,2.65,2.29,3.21,3.72,2.45,2.9,2.36,58.44,31.81,34.13,27.6,83.59,80.29,33.19,32.6,24.97,19.93,70.56,60.69,30.83,31.41,65.76,29.56,2.08,6.81,83.21,3.9,2.37,1.35,72.97,41.51,45.39,1.89,75.02,7.45,45.28,20.18,87.08,36.77,37.1,26.99 +ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,2.78,2.6,2.8,2.28,3.61,3.45,2.58,2.95,1.98,45.9,37.11,11.7,50.11,66.91,62.82,40.71,9.5,6.74,32.83,52.85,73.93,8.27,48.49,43.2,2.54,0.0,14.28,60.46,21.59,0.51,33.54,54.45,43.62,15.24,26.0,60.72,12.38,10.96,51.2,72.59,61.61,18.37,66.68 +occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,2.78,2.58,2.86,2.22,3.4,3.54,2.3,3.16,2.15,37.93,44.62,0.28,58.05,45.28,46.0,44.95,0.0,0.0,43.88,49.02,76.56,2.18,58.98,40.08,16.23,1.59,15.98,58.67,10.39,0.0,40.95,51.39,47.3,21.83,31.55,51.31,7.41,13.04,59.28,55.37,63.32,18.92,72.38 +meta-llama/Llama-3.2-3B-Instruct (few-shot),3213,128,131073,False,False,10424,2.79,2.5,3.11,2.32,3.52,3.11,2.62,3.17,2.01,41.12,42.77,11.52,51.14,49.66,51.98,44.13,0.67,1.11,28.62,43.74,76.98,16.01,48.38,27.57,10.07,-1.39,22.98,58.24,32.79,1.77,45.13,55.52,50.52,9.87,20.2,43.66,12.87,17.94,47.77,68.44,66.0,32.04,49.54 +mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32768,False,False,634,2.8,2.48,2.84,2.36,3.58,3.52,2.51,3.11,1.97,37.93,44.49,14.09,51.38,50.08,51.27,43.65,14.09,8.28,37.23,45.01,73.33,11.59,52.12,36.04,12.93,-0.36,18.06,55.42,15.85,1.11,33.54,47.19,47.26,22.32,24.36,52.72,7.91,18.14,52.75,57.58,61.44,34.92,65.38 +"claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,2.84,2.14,3.21,2.52,3.35,3.63,2.56,2.79,2.56,59.48,56.46,20.57,38.23,69.39,62.76,3.97,31.65,5.86,36.65,57.06,59.89,9.3,39.97,34.99,31.19,-10.68,23.65,51.06,-3.58,4.1,45.29,55.59,43.73,23.74,21.36,61.15,12.71,35.26,41.27,74.35,31.19,21.76,45.7 +NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,2.86,2.82,2.81,2.5,3.29,3.36,2.57,3.14,2.38,50.83,53.23,23.02,0.0,65.75,70.12,41.9,47.88,35.66,0.03,69.54,79.55,28.27,0.02,62.28,38.08,2.85,0.86,65.06,48.44,3.3,0.0,67.63,55.79,24.45,1.17,69.37,14.35,29.13,0.34,76.84,67.91,30.61,0.1 +meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4096,False,False,2643,2.87,2.5,3.08,2.48,3.5,3.61,2.61,3.12,2.09,35.44,44.88,9.74,55.04,44.99,49.09,41.56,3.04,4.03,33.77,39.72,66.18,6.74,54.05,41.1,13.59,-1.07,16.13,59.77,13.24,-0.54,31.87,50.09,46.52,15.23,25.54,50.23,10.07,14.73,53.42,62.53,62.23,22.71,64.45 +nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,3.03,2.89,3.15,2.83,3.73,3.29,2.6,3.46,2.32,49.01,47.95,32.89,0.0,63.7,62.53,34.35,31.53,22.71,0.06,48.51,78.68,29.18,0.0,43.65,10.7,10.77,0.29,67.72,42.98,6.4,0.07,60.69,53.77,38.53,0.0,60.11,11.12,32.68,0.0,72.63,65.74,43.43,0.0 +ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,3.05,2.89,3.25,2.54,3.91,3.93,2.57,3.15,2.18,37.37,31.44,5.27,48.41,44.89,48.08,32.29,7.49,4.65,26.37,40.68,68.96,4.77,49.73,23.14,5.07,0.18,14.15,45.56,7.44,0.92,20.82,47.31,48.28,14.08,28.37,49.25,9.45,11.87,54.2,52.79,65.92,16.74,64.92 +MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,3.06,2.92,3.15,2.38,3.47,3.61,3.26,3.39,2.33,28.18,29.32,2.9,56.48,36.96,39.38,32.67,2.18,5.33,45.23,41.49,75.64,0.66,57.48,31.81,18.33,3.63,16.72,40.18,14.19,0.31,41.6,32.33,26.39,1.44,28.15,36.61,8.77,3.52,59.51,47.2,64.82,7.57,73.88 +sentence-transformers/quora-distilbert-multilingual,135,120,512,True,False,26458,3.06,2.99,3.11,2.63,3.25,3.71,3.13,2.72,2.92,54.48,36.6,8.84,13.97,77.81,72.22,44.59,8.98,5.72,0.0,65.5,68.36,14.81,16.11,63.36,33.1,1.02,6.48,82.91,9.77,1.67,0.0,64.12,49.66,0.58,0.05,67.89,23.25,21.36,4.5,81.71,50.69,2.16,4.19 +ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,3.12,2.83,3.66,2.43,3.75,3.97,3.0,3.19,2.16,38.62,35.47,5.07,45.21,53.93,54.04,23.83,3.91,1.55,2.37,51.76,70.61,6.24,44.67,38.52,4.29,0.0,12.94,60.88,0.16,-0.35,18.54,49.38,41.72,7.67,13.7,50.88,12.39,3.31,48.44,60.64,61.2,7.63,69.83 +sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking,135,120,512,True,False,33753,3.14,2.99,3.11,2.63,3.28,3.71,3.15,3.34,2.92,54.48,36.6,8.84,15.42,77.81,72.22,44.59,8.98,5.72,0.0,65.5,68.33,14.81,16.11,59.15,33.1,0.8,6.14,82.91,9.77,1.67,0.0,63.78,49.69,0.74,0.02,65.04,17.4,-0.95,3.94,81.71,50.69,2.16,4.16 +ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,3.15,2.91,3.29,2.55,3.92,3.96,2.93,3.33,2.31,37.21,31.54,6.3,44.86,53.78,55.14,26.21,3.9,2.42,24.86,50.1,65.67,4.55,42.83,33.57,0.6,0.0,11.27,56.88,3.8,-0.21,13.72,49.16,35.17,9.79,22.48,48.53,10.15,4.88,45.38,58.3,59.01,10.33,65.04 +dbmdz/bert-base-historic-multilingual-cased,111,32,512,True,False,20047,3.17,3.16,3.18,2.32,3.46,3.84,2.9,3.54,2.96,47.61,24.17,8.14,25.19,68.63,67.7,25.68,6.73,3.35,22.57,68.83,64.25,28.62,28.78,56.62,24.79,-1.21,6.01,80.45,0.9,2.52,0.58,65.35,37.77,16.07,5.67,56.69,9.29,3.02,22.14,77.64,12.42,13.65,33.29 +sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2,118,250,512,True,False,29201,3.17,2.63,2.9,2.45,3.32,3.75,3.09,4.2,2.98,56.75,44.48,26.74,17.89,78.31,72.13,47.53,26.92,14.63,0.0,66.5,72.19,28.75,15.91,62.44,31.32,1.91,3.69,82.24,6.35,2.84,0.0,60.54,54.99,0.52,0.8,59.61,0.0,-0.04,3.28,77.5,53.1,-0.35,3.13 +ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,3.3,3.35,3.44,2.76,4.08,4.12,3.03,3.3,2.34,31.8,6.85,0.97,49.83,40.08,43.96,31.9,-0.07,1.27,23.32,36.01,57.18,1.52,51.04,18.07,0.65,-0.72,12.27,41.27,5.4,-0.2,19.69,40.61,31.86,5.36,25.99,42.52,9.91,0.69,56.95,49.44,66.65,12.56,63.29 +EuropeanParliament/EUBERT,94,66,512,True,False,20070,3.33,3.1,3.65,3.05,3.55,4.05,3.07,3.23,2.93,41.09,27.33,21.58,20.68,49.92,44.37,19.81,8.64,3.11,15.89,38.36,59.0,19.4,19.23,29.71,36.28,2.71,6.5,59.5,0.94,3.25,5.56,49.95,40.29,25.88,2.59,49.54,14.86,27.9,20.65,71.43,17.53,23.93,31.01 +openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,3.35,3.04,3.55,2.84,3.79,3.82,3.14,3.57,3.03,34.66,21.93,1.5,52.36,37.36,42.83,16.02,-0.08,2.29,31.6,35.02,51.8,6.15,50.85,28.74,4.3,0.06,17.41,46.64,16.72,-1.54,18.69,39.39,23.6,7.68,25.3,39.24,4.25,11.48,54.18,50.73,27.52,2.96,63.42 +utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,3.39,3.04,3.42,2.74,4.07,4.35,3.19,3.65,2.66,19.61,37.92,2.81,50.05,31.43,36.92,30.63,0.98,1.67,33.24,27.41,72.24,0.13,49.77,13.29,6.83,0.92,7.49,33.53,4.25,-2.32,15.41,28.49,43.18,2.92,23.26,32.45,7.03,5.58,51.18,37.47,58.61,5.3,63.26 +HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,3.42,3.24,3.69,2.76,3.77,4.25,3.38,3.98,2.25,29.44,18.49,1.73,44.39,37.6,38.38,24.05,3.56,2.61,13.58,37.37,64.46,4.49,43.92,26.23,6.86,2.69,10.84,40.28,3.94,-0.26,10.68,32.54,27.03,8.95,18.38,31.84,1.56,5.05,40.55,47.58,66.78,20.53,58.07 +sentence-transformers/distiluse-base-multilingual-cased-v1,135,120,512,True,False,34042,3.43,3.32,3.62,3.05,3.53,3.72,3.57,3.29,3.32,46.78,27.78,3.04,15.52,60.76,59.62,25.98,2.65,3.47,0.2,49.86,60.06,3.18,16.08,45.47,35.18,1.14,1.63,75.63,9.82,3.65,0.0,28.29,51.7,2.12,0.03,58.67,17.82,9.27,2.17,71.33,36.75,0.24,1.3 +sentence-transformers/distiluse-base-multilingual-cased,135,120,512,True,False,19206,3.47,3.65,3.52,3.07,3.49,3.72,3.39,3.61,3.31,26.96,30.13,2.01,8.25,63.79,60.96,32.83,1.09,0.18,0.0,51.67,63.04,2.32,8.93,47.62,33.35,2.64,1.27,77.16,10.41,4.09,0.0,40.2,48.71,5.53,0.06,56.98,9.66,19.37,3.11,71.9,36.22,0.47,2.44 +openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.52,3.23,3.62,3.24,3.82,3.91,3.55,3.79,3.03,29.49,13.77,0.0,51.53,34.78,39.0,10.69,6.17,5.9,31.25,37.17,20.2,6.13,46.66,26.58,-0.79,0.63,15.14,39.78,16.03,-0.48,20.04,38.81,10.59,0.91,22.54,42.35,0.78,-0.02,47.61,44.48,23.69,8.52,56.97 +sentence-transformers/distiluse-base-multilingual-cased-v2,135,120,512,True,False,33247,3.53,3.65,3.52,3.07,3.45,3.72,3.39,4.09,3.31,26.96,30.13,2.01,8.22,63.79,60.96,32.83,1.09,0.18,0.0,51.67,62.71,2.32,8.76,48.62,34.19,2.64,1.22,77.0,9.59,4.09,0.0,41.82,49.38,4.77,0.05,49.82,2.7,6.6,2.13,71.9,36.22,0.47,2.4 +HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,3.57,3.57,3.87,2.7,3.85,4.39,3.61,4.02,2.55,24.47,9.93,1.22,42.09,26.7,28.23,23.25,-0.47,0.26,13.4,35.96,68.31,3.61,43.26,20.5,10.09,0.83,10.84,27.91,0.77,-0.48,16.56,28.67,19.69,5.07,18.43,22.84,4.6,2.55,40.33,41.57,62.32,8.04,56.01 +PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,3.7,3.26,3.93,3.21,3.99,4.39,3.68,3.87,3.26,28.3,28.95,0.2,36.39,38.96,40.42,19.42,-0.13,0.77,4.7,36.29,39.68,0.96,32.64,22.56,0.53,-0.26,11.77,38.91,-1.72,0.66,4.82,34.68,21.76,0.85,14.3,38.22,4.99,1.85,27.77,40.45,47.89,0.28,26.77 +3ebdola/Dialectal-Arabic-XLM-R-Base,278,250,512,True,False,12783,3.76,3.69,3.9,3.46,3.48,4.0,3.74,4.02,3.76,36.51,22.07,1.63,3.09,55.55,53.53,12.69,2.79,1.66,0.0,42.78,44.95,1.43,8.71,46.95,20.94,1.4,10.48,73.34,1.49,0.97,0.0,30.18,32.66,2.1,0.46,44.46,8.39,2.07,4.3,68.25,1.92,1.08,2.79 +dbmdz/bert-tiny-historic-multilingual-cased,5,32,512,True,False,78027,3.8,3.81,3.98,3.6,3.79,3.98,3.76,4.02,3.45,33.62,20.71,1.19,4.19,46.11,35.18,19.19,2.76,0.42,0.0,26.87,57.41,-1.06,5.54,43.93,9.46,0.04,6.13,72.08,-1.81,2.65,0.0,33.18,33.61,1.83,0.0,41.38,8.45,1.55,4.4,65.85,25.85,1.21,4.0 +tiiuae/Falcon3-1B-Instruct (few-shot),1669,131,8192,True,False,9270,3.84,3.71,4.14,3.75,4.37,4.52,3.42,4.24,2.6,20.03,15.96,0.86,28.98,29.25,25.45,11.28,1.52,0.52,8.47,26.41,25.99,1.64,21.39,9.39,6.44,-0.72,3.34,14.18,2.36,2.29,6.35,32.54,22.27,7.18,16.72,28.25,3.73,0.76,19.08,43.0,54.47,17.44,53.15 +PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.93,3.68,3.99,3.36,4.23,4.59,3.9,3.93,3.79,15.93,13.01,0.05,36.85,28.82,27.81,18.74,-0.46,-0.84,12.66,21.42,45.75,-0.25,32.71,18.86,-0.67,-0.76,8.09,22.75,-0.03,-0.78,7.75,23.08,7.41,0.89,17.32,31.13,7.24,1.23,32.13,27.37,36.35,-0.37,7.42 +PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,3.95,3.47,4.1,3.71,4.14,4.61,4.05,4.01,3.49,16.17,29.12,-0.47,34.8,27.47,23.82,22.22,-2.06,-0.77,2.48,14.09,23.71,1.74,32.0,9.9,2.13,0.02,10.64,20.76,-1.78,0.87,3.58,24.32,15.58,1.25,6.82,23.58,7.9,1.79,26.11,21.6,45.04,-0.46,33.46 +HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.07,4.11,4.36,3.92,4.33,4.53,4.02,4.28,2.98,8.97,2.66,1.65,24.92,20.37,21.27,7.6,1.31,0.51,4.8,13.64,9.34,2.2,26.06,13.6,3.12,0.28,4.09,26.85,3.07,-0.12,1.39,18.77,12.59,1.64,9.27,15.68,6.73,0.63,19.73,30.73,59.51,1.55,49.03 +HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.07,3.99,4.33,3.95,4.31,4.59,3.95,4.23,3.19,12.68,3.61,1.79,28.12,26.6,23.7,6.21,-0.39,0.21,4.65,18.22,11.52,1.72,27.27,13.43,3.82,1.14,3.71,28.14,-0.56,-0.06,2.43,19.94,19.64,0.0,8.78,20.95,6.84,-1.5,22.67,31.14,43.97,3.49,47.91 +PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.17,3.99,4.36,3.91,4.33,4.5,4.02,4.45,3.82,13.84,9.47,-0.36,22.1,26.59,26.78,7.91,0.28,0.04,0.65,22.09,14.15,-0.04,21.6,17.73,2.38,-0.18,1.59,31.99,0.0,0.48,0.29,25.3,13.9,-0.25,6.12,24.47,3.57,-2.03,10.18,31.79,19.13,-0.03,12.35 +PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.23,3.99,4.4,4.0,4.35,4.63,4.15,4.47,3.83,10.59,13.31,0.52,16.61,25.02,21.59,8.05,-0.15,-0.97,0.37,16.28,17.38,-0.45,17.78,13.8,2.17,-0.63,1.29,22.55,0.67,0.87,0.27,21.03,10.99,0.13,3.61,21.32,4.37,-0.19,9.38,27.45,27.39,0.31,15.62 +fresh-xlm-roberta-base,278,250,512,True,False,2214,4.23,4.11,4.28,3.93,4.02,4.25,4.28,4.78,4.22,16.04,17.37,1.34,1.58,25.49,25.94,12.6,0.5,1.83,0.0,11.91,51.11,0.86,2.0,17.34,25.25,-0.06,1.02,48.7,1.07,2.37,0.0,8.03,23.44,-0.17,0.0,13.09,0.92,1.93,0.26,34.64,4.0,1.33,0.43 +HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.33,4.18,4.3,4.22,4.33,4.57,4.53,4.74,3.74,13.72,3.79,-0.45,14.69,24.37,24.69,8.84,-1.2,-0.5,0.16,19.15,-3.03,0.06,14.18,14.74,3.13,-0.25,1.35,25.51,-0.24,0.46,0.15,16.89,2.74,-0.34,0.28,17.49,2.01,-0.02,0.53,31.26,26.69,1.78,13.88 +HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.37,4.23,4.51,4.2,4.37,4.55,4.49,4.79,3.8,12.11,2.61,0.25,14.02,20.89,19.62,2.78,-0.98,0.93,0.15,17.09,7.41,0.47,11.73,13.7,3.01,-0.83,0.94,23.22,3.78,0.41,0.54,15.54,2.51,0.36,1.77,15.82,-0.62,1.16,3.25,29.96,18.64,1.85,26.9 +ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.81,4.67,4.85,4.77,4.65,4.97,4.76,5.07,4.73,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.3,0.0,0.0,0.0,0.74,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.48,0.01 diff --git a/germanic-nlu.md b/germanic-nlu.md index 1b06b0a9..e48e21a4 100644 --- a/germanic-nlu.md +++ b/germanic-nlu.md @@ -3,7 +3,7 @@ layout: leaderboard title: Germanic NLU 🇪🇺 --- -
Last updated: 10/01/2025 12:30:47 CET
+
Last updated: 11/01/2025 11:03:57 CET
@@ -112,11 +112,11 @@ title: Germanic NLU 🇪🇺 576 ± 221 / 81 ± 28 1.46 1.19 - 1.46 + 1.45 1.19 1.26 1.65 - 1.52 + 1.53 2.06 1.39 66.80 ± 3.01 / 45.69 ± 2.85 @@ -197,12 +197,12 @@ title: Germanic NLU 🇪🇺 799 ± 246 / 112 ± 38 1.67 1.21 - 1.91 + 1.90 1.21 1.90 1.99 1.56 - 2.27 + 2.26 1.34 71.94 ± 1.37 / 61.59 ± 1.44 61.26 ± 1.20 / 73.27 ± 0.95 @@ -282,9 +282,9 @@ title: Germanic NLU 🇪🇺 20,637 ± 3,925 / 4,497 ± 1,502 1.67 1.63 - 1.45 + 1.44 1.38 - 1.32 + 1.31 2.26 1.66 2.28 @@ -365,9 +365,9 @@ title: Germanic NLU 🇪🇺 128000 True 916 ± 329 / 114 ± 38 - 1.69 + 1.70 1.36 - 1.46 + 1.50 1.21 1.26 2.84 @@ -537,12 +537,12 @@ title: Germanic NLU 🇪🇺 1,409 ± 457 / 186 ± 63 1.71 1.35 - 1.92 + 1.96 1.25 1.95 2.03 1.53 - 2.28 + 2.27 1.38 68.57 ± 0.95 / 50.90 ± 2.30 60.52 ± 1.22 / 72.91 ± 1.05 @@ -622,7 +622,7 @@ title: Germanic NLU 🇪🇺 6,732 ± 1,273 / 1,633 ± 523 1.76 1.53 - 1.48 + 1.49 1.36 2.50 3.07 @@ -707,13 +707,13 @@ title: Germanic NLU 🇪🇺 25,418 ± 6,435 / 4,536 ± 1,452 1.76 1.74 - 1.61 - 1.56 - 2.05 + 1.60 + 1.55 + 2.04 2.55 1.52 1.44 - 1.61 + 1.62 71.24 ± 1.63 / 66.41 ± 1.64 46.50 ± 1.57 / 64.31 ± 1.21 52.92 ± 4.42 / 75.11 ± 3.22 @@ -792,11 +792,11 @@ title: Germanic NLU 🇪🇺 193 ± 87 / 55 ± 19 1.78 1.28 - 1.70 + 1.66 1.38 - 1.71 + 1.70 2.38 - 1.78 + 1.79 2.47 1.55 65.88 ± 2.11 / 55.11 ± 1.59 @@ -878,12 +878,12 @@ title: Germanic NLU 🇪🇺 1.79 1.51 2.00 - 1.33 + 1.32 1.38 - 2.23 + 2.22 1.87 - 2.38 - 1.60 + 2.37 + 1.61 64.80 ± 1.56 / 45.57 ± 1.47 53.07 ± 1.45 / 68.64 ± 1.04 64.18 ± 3.36 / 81.90 ± 1.68 @@ -962,12 +962,12 @@ title: Germanic NLU 🇪🇺 2,258 ± 1,221 / 198 ± 67 1.81 1.54 - 2.12 + 2.11 1.43 2.08 2.31 - 1.39 - 2.33 + 1.38 + 2.32 1.29 64.66 ± 1.67 / 48.66 ± 3.44 53.42 ± 1.21 / 66.49 ± 1.21 @@ -1045,9 +1045,9 @@ title: Germanic NLU 🇪🇺 32768 True 1,219 ± 412 / 158 ± 53 - 1.81 + 1.82 1.46 - 2.09 + 2.13 1.50 2.10 2.00 @@ -1132,12 +1132,12 @@ title: Germanic NLU 🇪🇺 1,353 ± 443 / 180 ± 61 1.85 1.46 - 2.01 + 2.00 1.34 2.14 2.15 1.75 - 2.42 + 2.41 1.52 67.07 ± 1.22 / 48.82 ± 2.09 59.89 ± 0.97 / 72.51 ± 0.84 @@ -1217,10 +1217,10 @@ title: Germanic NLU 🇪🇺 5,947 ± 1,301 / 1,129 ± 374 1.86 1.79 - 1.84 + 1.80 1.44 2.51 - 2.99 + 3.00 1.38 1.65 1.30 @@ -1302,10 +1302,10 @@ title: Germanic NLU 🇪🇺 3,633 ± 1,236 / 777 ± 220 1.87 1.41 - 2.00 + 1.99 1.52 2.27 - 2.15 + 2.16 1.74 2.39 1.46 @@ -1470,14 +1470,14 @@ title: Germanic NLU 🇪🇺 131072 False 1,909 ± 646 / 248 ± 84 - 2.03 - 1.79 - 2.46 + 2.02 + 1.78 + 2.42 1.73 2.43 2.34 1.75 - 2.22 + 2.21 1.53 63.63 ± 2.58 / 40.18 ± 3.60 50.82 ± 1.22 / 64.57 ± 1.50 @@ -1559,9 +1559,9 @@ title: Germanic NLU 🇪🇺 1.90 2.08 2.03 - 1.84 + 1.83 2.54 - 1.92 + 1.90 2.34 1.67 59.96 ± 1.64 / 41.55 ± 2.90 @@ -1647,7 +1647,7 @@ title: Germanic NLU 🇪🇺 2.42 2.32 1.86 - 2.57 + 2.56 1.58 58.34 ± 1.24 / 33.52 ± 1.47 59.14 ± 1.19 / 72.45 ± 0.97 @@ -1730,8 +1730,8 @@ title: Germanic NLU 🇪🇺 2.04 1.64 2.55 - 3.17 - 1.81 + 3.18 + 1.80 2.00 1.52 68.70 ± 2.40 / 64.05 ± 2.19 @@ -1803,6 +1803,91 @@ title: Germanic NLU 🇪🇺 12.6.1 12.6.1 + + gpt-4-1106-preview (zero-shot, val) + unknown + 100 + 8191 + True + 436 ± 152 / 57 ± 21 + 2.09 + 1.84 + 2.38 + 1.70 + 2.07 + 2.56 + 1.73 + 2.43 + 2.03 + 35.79 ± 2.45 / 25.86 ± 1.74 + 53.69 ± 2.27 / 67.91 ± 1.55 + 62.98 ± 2.61 / 81.27 ± 1.31 + 51.96 ± 1.07 / 64.87 ± 0.92 + 60.16 ± 3.63 / 49.38 ± 2.48 + 48.74 ± 2.05 / 40.98 ± 1.83 + 39.62 ± 1.29 / 38.68 ± 0.94 + 71.38 ± 2.71 / 85.52 ± 1.38 + 42.94 ± 3.66 / 71.26 ± 1.76 + 36.04 ± 1.53 / 69.46 ± 0.84 + 51.31 ± 4.64 / 43.84 ± 3.44 + 73.54 ± 3.03 / 68.68 ± 2.31 + 66.39 ± 2.24 / 82.10 ± 1.23 + 52.22 ± 1.51 / 63.21 ± 1.40 + 30.39 ± 3.49 / 25.88 ± 1.98 + 47.47 ± 3.03 / 63.45 ± 2.13 + 43.30 ± 3.41 / 71.54 ± 1.71 + 29.82 ± 1.09 / 51.48 ± 0.89 + 57.52 ± 2.17 / 36.36 ± 1.53 + 13.18 ± 5.13 / 19.41 ± 3.41 + 28.03 ± 3.90 / 62.26 ± 2.03 + 59.06 ± 2.40 / 76.65 ± 1.98 + 57.47 ± 2.20 / 36.29 ± 1.91 + 58.67 ± 2.30 / 71.20 ± 1.34 + 54.55 ± 3.18 / 75.08 ± 1.71 + 27.02 ± 1.33 / 53.97 ± 1.74 + 55.72 ± 3.68 / 40.53 ± 2.95 + 11.13 ± 1.98 / 18.38 ± 1.21 + 67.28 ± 2.42 / 83.06 ± 1.36 + 54.20 ± 2.18 / 72.73 ± 1.79 + 42.40 ± 2.22 / 34.09 ± 2.02 + 65.24 ± 2.14 / 72.51 ± 1.77 + 44.59 ± 4.02 / 71.93 ± 2.06 + 62.94 ± 1.37 / 82.84 ± 0.53 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + gpt-4o-mini-2024-07-18 (zero-shot, val) unknown @@ -1812,10 +1897,10 @@ title: Germanic NLU 🇪🇺 908 ± 303 / 96 ± 36 2.09 1.71 - 2.48 + 2.47 1.87 - 2.31 - 2.41 + 2.30 + 2.40 1.89 2.41 1.68 @@ -1888,91 +1973,6 @@ title: Germanic NLU 🇪🇺 14.0.1 14.0.1 - - gpt-4-1106-preview (zero-shot, val) - unknown - 100 - 8191 - True - 436 ± 152 / 57 ± 21 - 2.10 - 1.85 - 2.39 - 1.70 - 2.07 - 2.57 - 1.73 - 2.43 - 2.03 - 35.79 ± 2.45 / 25.86 ± 1.74 - 53.69 ± 2.27 / 67.91 ± 1.55 - 62.98 ± 2.61 / 81.27 ± 1.31 - 51.96 ± 1.07 / 64.87 ± 0.92 - 60.16 ± 3.63 / 49.38 ± 2.48 - 48.74 ± 2.05 / 40.98 ± 1.83 - 39.62 ± 1.29 / 38.68 ± 0.94 - 71.38 ± 2.71 / 85.52 ± 1.38 - 42.94 ± 3.66 / 71.26 ± 1.76 - 36.04 ± 1.53 / 69.46 ± 0.84 - 51.31 ± 4.64 / 43.84 ± 3.44 - 73.54 ± 3.03 / 68.68 ± 2.31 - 66.39 ± 2.24 / 82.10 ± 1.23 - 52.22 ± 1.51 / 63.21 ± 1.40 - 30.39 ± 3.49 / 25.88 ± 1.98 - 47.47 ± 3.03 / 63.45 ± 2.13 - 43.30 ± 3.41 / 71.54 ± 1.71 - 29.82 ± 1.09 / 51.48 ± 0.89 - 57.52 ± 2.17 / 36.36 ± 1.53 - 13.18 ± 5.13 / 19.41 ± 3.41 - 28.03 ± 3.90 / 62.26 ± 2.03 - 59.06 ± 2.40 / 76.65 ± 1.98 - 57.47 ± 2.20 / 36.29 ± 1.91 - 58.67 ± 2.30 / 71.20 ± 1.34 - 54.55 ± 3.18 / 75.08 ± 1.71 - 27.02 ± 1.33 / 53.97 ± 1.74 - 55.72 ± 3.68 / 40.53 ± 2.95 - 11.13 ± 1.98 / 18.38 ± 1.21 - 67.28 ± 2.42 / 83.06 ± 1.36 - 54.20 ± 2.18 / 72.73 ± 1.79 - 42.40 ± 2.22 / 34.09 ± 2.02 - 65.24 ± 2.14 / 72.51 ± 1.77 - 44.59 ± 4.02 / 71.93 ± 2.06 - 62.94 ± 1.37 / 82.84 ± 0.53 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - AI-Sweden-Models/roberta-large-1160k 355 @@ -1988,7 +1988,7 @@ title: Germanic NLU 🇪🇺 3.23 2.66 3.37 - 1.33 + 1.34 74.16 ± 1.73 / 70.93 ± 1.67 51.20 ± 1.67 / 66.62 ± 1.58 73.87 ± 2.13 / 86.61 ± 1.17 @@ -2067,9 +2067,9 @@ title: Germanic NLU 🇪🇺 2,363 ± 794 / 311 ± 105 2.20 1.96 - 2.48 + 2.49 1.86 - 2.43 + 2.42 2.74 1.93 2.53 @@ -2144,105 +2144,20 @@ title: Germanic NLU 🇪🇺 14.0.4 - meta-llama/Meta-Llama-3-8B-Instruct (few-shot) - 8030 - 128 + nvidia/mistral-nemo-minitron-8b-base (few-shot) + 8414 + 131 8192 True - 1,007 ± 316 / 162 ± 45 - 2.26 - 2.12 - 2.48 - 2.06 - 2.51 - 2.60 - 1.85 - 2.60 - 1.85 - 57.74 ± 2.06 / 40.66 ± 2.58 - 48.43 ± 3.31 / 62.09 ± 3.62 - 27.12 ± 2.83 / 60.40 ± 2.70 - 46.76 ± 1.20 / 59.77 ± 0.51 - 74.47 ± 1.47 / 65.57 ± 2.39 - 72.93 ± 1.00 / 65.44 ± 2.55 - 34.44 ± 0.42 / 37.94 ± 0.39 - 27.77 ± 1.63 / 61.75 ± 1.77 - 20.35 ± 1.92 / 57.74 ± 2.28 - 42.90 ± 3.57 / 69.90 ± 3.17 - 69.67 ± 1.30 / 52.94 ± 4.01 - 59.93 ± 4.70 / 67.54 ± 3.04 - 27.63 ± 3.19 / 60.85 ± 3.29 - 49.84 ± 1.61 / 60.85 ± 0.93 - 60.20 ± 2.76 / 40.38 ± 4.22 - 38.09 ± 2.38 / 54.51 ± 2.16 - 9.14 ± 0.98 / 49.56 ± 2.11 - 28.66 ± 1.28 / 58.93 ± 1.41 - 70.61 ± 1.12 / 68.27 ± 1.76 - 45.78 ± 2.05 / 47.72 ± 1.15 - 4.58 ± 2.43 / 36.78 ± 1.86 - 50.67 ± 1.56 / 72.73 ± 1.11 - 68.18 ± 0.95 / 57.72 ± 1.15 - 58.33 ± 2.83 / 69.31 ± 3.16 - 29.12 ± 3.17 / 63.60 ± 1.63 - 28.68 ± 1.99 / 56.42 ± 3.34 - 68.72 ± 1.81 / 54.89 ± 2.10 - 14.67 ± 2.51 / 41.36 ± 2.04 - 32.91 ± 2.56 / 64.93 ± 1.97 - 45.36 ± 1.31 / 67.50 ± 0.69 - 75.02 ± 1.31 / 69.47 ± 1.18 - 67.64 ± 1.12 / 71.04 ± 1.17 - 32.29 ± 3.05 / 64.85 ± 2.07 - 54.84 ± 2.22 / 79.10 ± 1.10 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 14.0.4 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 14.0.1 - 14.0.1 - 14.0.1 - 14.0.1 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - - - nvidia/mistral-nemo-minitron-8b-base (few-shot) - 8414 - 131 - 8192 - True - 2,470 ± 836 / 326 ± 111 + 2,470 ± 836 / 326 ± 111 2.26 2.06 - 2.32 + 2.33 1.93 2.42 2.83 2.06 - 2.71 + 2.69 1.78 49.85 ± 2.30 / 33.79 ± 3.10 49.52 ± 1.96 / 66.14 ± 1.58 @@ -2314,69 +2229,69 @@ title: Germanic NLU 🇪🇺 14.1.1 - meta-llama/Llama-3.1-8B-Instruct (few-shot) + meta-llama/Meta-Llama-3-8B-Instruct (few-shot) 8030 128 - 131072 + 8192 True - 1,005 ± 330 / 196 ± 74 - 2.31 - 1.97 - 2.63 - 1.99 - 2.64 - 2.74 - 1.91 - 2.82 - 1.82 - 54.70 ± 1.69 / 38.11 ± 2.31 - 54.81 ± 1.51 / 67.88 ± 1.39 - 32.11 ± 1.93 / 63.11 ± 1.61 - 48.87 ± 1.18 / 59.47 ± 0.67 - 64.55 ± 1.69 / 56.81 ± 2.50 - 66.44 ± 1.38 / 60.02 ± 3.36 - 35.17 ± 0.32 / 38.11 ± 0.29 - 27.41 ± 1.97 / 54.94 ± 2.06 - 15.60 ± 2.05 / 46.51 ± 2.41 - 43.11 ± 2.22 / 69.74 ± 1.60 - 55.80 ± 2.68 / 34.65 ± 1.98 - 79.23 ± 0.48 / 76.86 ± 0.80 - 32.67 ± 2.18 / 63.89 ± 1.49 - 46.88 ± 1.47 / 58.66 ± 0.84 - 46.48 ± 1.98 / 24.57 ± 1.73 - 39.91 ± 2.35 / 57.39 ± 1.64 - 11.72 ± 1.81 / 51.67 ± 1.45 - 25.91 ± 0.99 / 53.39 ± 1.83 - 67.67 ± 1.31 / 59.86 ± 2.12 - 48.54 ± 2.43 / 49.17 ± 1.48 - 3.89 ± 2.71 / 36.33 ± 1.50 - 47.07 ± 2.63 / 69.50 ± 1.68 - 67.61 ± 1.23 / 60.39 ± 1.02 - 58.07 ± 2.32 / 70.76 ± 1.84 - 28.25 ± 3.57 / 59.54 ± 3.88 - 28.79 ± 2.02 / 55.82 ± 3.28 - 69.76 ± 1.36 / 57.66 ± 1.36 - 9.09 ± 1.42 / 20.14 ± 0.84 - 37.58 ± 3.42 / 66.98 ± 2.22 - 41.26 ± 2.09 / 65.63 ± 0.90 - 76.95 ± 0.95 / 72.47 ± 0.82 - 68.12 ± 0.92 / 72.48 ± 0.53 - 34.34 ± 3.37 / 65.84 ± 1.59 - 47.88 ± 3.37 / 76.21 ± 1.69 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 + 1,483 ± 377 / 287 ± 97 + 2.27 + 2.12 + 2.54 + 2.05 + 2.51 + 2.60 + 1.85 + 2.62 + 1.85 + 57.74 ± 2.06 / 40.66 ± 2.58 + 48.43 ± 3.31 / 62.09 ± 3.62 + 27.12 ± 2.83 / 60.40 ± 2.70 + 46.76 ± 1.20 / 59.77 ± 0.51 + 66.56 ± 1.70 / 58.47 ± 2.40 + 68.29 ± 1.47 / 61.22 ± 2.25 + 34.47 ± 0.44 / 37.96 ± 0.40 + 28.22 ± 1.37 / 59.51 ± 1.44 + 18.21 ± 2.21 / 52.84 ± 2.30 + 47.34 ± 1.99 / 73.23 ± 1.29 + 69.67 ± 1.30 / 52.94 ± 4.01 + 59.93 ± 4.70 / 67.54 ± 3.04 + 27.63 ± 3.19 / 60.85 ± 3.29 + 49.84 ± 1.61 / 60.85 ± 0.93 + 60.20 ± 2.76 / 40.38 ± 4.22 + 38.09 ± 2.38 / 54.51 ± 2.16 + 9.14 ± 0.98 / 49.56 ± 2.11 + 28.66 ± 1.28 / 58.93 ± 1.41 + 70.61 ± 1.12 / 68.27 ± 1.76 + 45.78 ± 2.05 / 47.72 ± 1.15 + 4.58 ± 2.43 / 36.78 ± 1.86 + 50.67 ± 1.56 / 72.73 ± 1.11 + 68.18 ± 0.95 / 57.72 ± 1.15 + 58.33 ± 2.83 / 69.31 ± 3.16 + 29.12 ± 3.17 / 63.60 ± 1.63 + 28.68 ± 1.99 / 56.42 ± 3.34 + 68.72 ± 1.81 / 54.89 ± 2.10 + 14.67 ± 2.51 / 41.36 ± 2.04 + 32.91 ± 2.56 / 64.93 ± 1.97 + 45.36 ± 1.31 / 67.50 ± 0.69 + 75.02 ± 1.31 / 69.47 ± 1.18 + 67.64 ± 1.12 / 71.04 ± 1.17 + 32.29 ± 3.05 / 64.85 ± 2.07 + 54.84 ± 2.22 / 79.10 ± 1.10 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 14.1.2 14.1.2 14.1.2 14.1.2 14.1.2 14.1.2 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 14.0.1 14.0.1 14.0.1 @@ -2385,18 +2300,18 @@ title: Germanic NLU 🇪🇺 14.0.4 14.0.4 14.0.4 - 13.0.0 - 14.0.4 - 13.0.0 - 13.0.0 - 13.0.0 - 14.0.4 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 ZurichNLP/unsup-simcse-xlm-roberta-base @@ -2408,7 +2323,7 @@ title: Germanic NLU 🇪🇺 2.35 2.33 2.21 - 1.93 + 1.92 2.85 3.39 2.15 @@ -2483,6 +2398,91 @@ title: Germanic NLU 🇪🇺 12.6.1 12.6.1 + + meta-llama/Llama-3.1-8B-Instruct (few-shot) + 8030 + 128 + 131072 + True + 1,473 ± 377 / 283 ± 96 + 2.35 + 1.97 + 2.62 + 1.98 + 2.63 + 2.74 + 2.04 + 3.00 + 1.82 + 54.70 ± 1.69 / 38.11 ± 2.31 + 54.81 ± 1.51 / 67.88 ± 1.39 + 32.11 ± 1.93 / 63.11 ± 1.61 + 48.87 ± 1.18 / 59.47 ± 0.67 + 64.55 ± 1.69 / 56.81 ± 2.50 + 66.44 ± 1.38 / 60.02 ± 3.36 + 35.17 ± 0.32 / 38.11 ± 0.29 + 27.41 ± 1.97 / 54.94 ± 2.06 + 15.60 ± 2.05 / 46.51 ± 2.41 + 43.11 ± 2.22 / 69.74 ± 1.60 + 55.80 ± 2.68 / 34.65 ± 1.98 + 79.23 ± 0.48 / 76.86 ± 0.80 + 32.67 ± 2.18 / 63.89 ± 1.49 + 46.88 ± 1.47 / 58.66 ± 0.84 + 46.48 ± 1.98 / 24.57 ± 1.73 + 39.91 ± 2.35 / 57.39 ± 1.64 + 11.72 ± 1.81 / 51.67 ± 1.45 + 25.91 ± 0.99 / 53.39 ± 1.83 + 67.67 ± 1.31 / 59.86 ± 2.12 + 48.54 ± 2.43 / 49.17 ± 1.48 + 3.89 ± 2.71 / 36.33 ± 1.50 + 47.07 ± 2.63 / 69.50 ± 1.68 + 62.03 ± 1.17 / 45.31 ± 1.89 + 58.15 ± 2.40 / 70.81 ± 1.86 + 30.18 ± 1.92 / 63.41 ± 1.19 + 26.48 ± 0.98 / 52.41 ± 1.48 + 61.68 ± 1.94 / 42.64 ± 1.85 + 8.97 ± 1.44 / 20.07 ± 0.82 + 36.57 ± 1.77 / 65.25 ± 1.94 + 33.88 ± 1.83 / 62.17 ± 0.91 + 76.95 ± 0.95 / 72.47 ± 0.82 + 68.12 ± 0.92 / 72.48 ± 0.53 + 34.34 ± 3.37 / 65.84 ± 1.59 + 47.88 ± 3.37 / 76.21 ± 1.69 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.1 + 14.0.1 + 14.0.1 + 14.0.1 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + meta-llama/Llama-3.1-8B (few-shot) 8030 @@ -2497,7 +2497,7 @@ title: Germanic NLU 🇪🇺 2.49 3.12 2.17 - 2.52 + 2.54 1.75 50.92 ± 1.88 / 34.24 ± 2.85 47.86 ± 1.66 / 62.47 ± 1.97 @@ -2569,103 +2569,18 @@ title: Germanic NLU 🇪🇺 13.0.0 - meta-llama/Meta-Llama-3-8B (few-shot) - 8030 - 128 - 8192 - True - 1,335 ± 338 / 260 ± 88 - 2.37 - 2.16 - 2.57 - 1.97 - 2.58 - 3.18 - 2.02 - 2.65 - 1.82 - 49.46 ± 1.88 / 32.11 ± 2.41 - 51.16 ± 2.15 / 67.00 ± 1.51 - 23.01 ± 3.93 / 49.99 ± 4.63 - 49.75 ± 5.10 / 56.13 ± 4.89 - 61.48 ± 1.83 / 47.65 ± 2.94 - 61.58 ± 2.21 / 50.10 ± 2.68 - 32.94 ± 0.86 / 37.52 ± 0.43 - 21.20 ± 6.57 / 52.29 ± 7.43 - 19.65 ± 4.32 / 56.66 ± 4.40 - 53.35 ± 4.33 / 74.98 ± 3.70 - 59.92 ± 2.46 / 40.98 ± 4.90 - 80.91 ± 0.41 / 78.09 ± 1.22 - 26.39 ± 3.47 / 52.38 ± 4.49 - 47.69 ± 6.29 / 54.30 ± 6.65 - 50.45 ± 1.95 / 37.62 ± 3.95 - 34.68 ± 3.74 / 53.39 ± 3.75 - 8.69 ± 2.70 / 44.84 ± 4.03 - 31.94 ± 2.16 / 59.26 ± 1.50 - 61.11 ± 4.21 / 58.55 ± 4.19 - 19.40 ± 8.13 / 32.14 ± 7.75 - 2.02 ± 1.68 / 39.88 ± 3.56 - 50.34 ± 1.74 / 71.74 ± 1.27 - 56.00 ± 1.94 / 43.49 ± 2.05 - 56.40 ± 3.89 / 70.17 ± 2.91 - 22.01 ± 5.17 / 56.97 ± 3.54 - 35.39 ± 2.49 / 64.61 ± 2.42 - 62.26 ± 2.20 / 42.41 ± 2.02 - 10.45 ± 2.69 / 33.45 ± 1.99 - 30.30 ± 3.94 / 62.28 ± 2.89 - 62.99 ± 1.00 / 73.73 ± 0.98 - 66.31 ± 2.09 / 58.68 ± 1.95 - 64.30 ± 0.65 / 69.26 ± 0.50 - 28.18 ± 3.96 / 58.97 ± 4.03 - 70.38 ± 3.51 / 82.95 ± 2.38 - 14.0.3 - 14.1.2 - 14.1.2 - 14.0.3 - 12.6.1 - 12.6.1 - 14.1.2 - 12.6.1 - 12.6.1 - 12.6.1 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 - 14.0.1 - 14.1.2 - 14.1.2 - 14.0.1 - 12.6.1 - 14.1.2 - 12.6.1 - 13.0.0 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - - - mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot) - 341029 - 256 - 4096 + mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot) + 341029 + 256 + 4096 True 1,904 ± 475 / 361 ± 121 - 2.39 - 2.06 - 2.46 + 2.38 + 2.05 + 2.45 2.05 - 2.15 - 2.85 + 2.14 + 2.84 2.38 2.95 2.19 @@ -2738,6 +2653,91 @@ title: Germanic NLU 🇪🇺 14.0.4 14.0.4 + + meta-llama/Meta-Llama-3-8B (few-shot) + 8030 + 128 + 8192 + True + 1,477 ± 376 / 285 ± 97 + 2.39 + 2.16 + 2.79 + 1.97 + 2.57 + 3.18 + 2.02 + 2.63 + 1.82 + 49.46 ± 1.88 / 32.11 ± 2.41 + 51.16 ± 2.15 / 67.00 ± 1.51 + 23.01 ± 3.93 / 49.99 ± 4.63 + 49.75 ± 5.10 / 56.13 ± 4.89 + 62.89 ± 2.01 / 50.34 ± 1.92 + 56.18 ± 3.26 / 47.30 ± 2.62 + 33.07 ± 0.73 / 37.60 ± 0.49 + 30.73 ± 3.19 / 64.06 ± 2.41 + 20.57 ± 3.62 / 54.22 ± 5.15 + 30.77 ± 4.33 / 54.66 ± 4.41 + 59.92 ± 2.46 / 40.98 ± 4.90 + 80.91 ± 0.41 / 78.09 ± 1.22 + 26.39 ± 3.47 / 52.38 ± 4.49 + 47.69 ± 6.29 / 54.30 ± 6.65 + 50.45 ± 1.95 / 37.62 ± 3.95 + 34.68 ± 3.74 / 53.39 ± 3.75 + 8.69 ± 2.70 / 44.84 ± 4.03 + 31.94 ± 2.16 / 59.26 ± 1.50 + 61.11 ± 4.21 / 58.55 ± 4.19 + 19.40 ± 8.13 / 32.14 ± 7.75 + 2.02 ± 1.68 / 39.88 ± 3.56 + 50.34 ± 1.74 / 71.74 ± 1.27 + 56.00 ± 1.94 / 43.49 ± 2.05 + 56.40 ± 3.89 / 70.17 ± 2.91 + 22.01 ± 5.17 / 56.97 ± 3.54 + 35.39 ± 2.49 / 64.61 ± 2.42 + 62.26 ± 2.20 / 42.41 ± 2.02 + 10.45 ± 2.69 / 33.45 ± 1.99 + 30.30 ± 3.94 / 62.28 ± 2.89 + 62.99 ± 1.00 / 73.73 ± 0.98 + 66.31 ± 2.09 / 58.68 ± 1.95 + 64.30 ± 0.65 / 69.26 ± 0.50 + 28.18 ± 3.96 / 58.97 ± 4.03 + 70.38 ± 3.51 / 82.95 ± 2.38 + 14.0.3 + 14.1.2 + 14.1.2 + 14.0.3 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.0.4 + 14.1.2 + 14.1.2 + 14.0.4 + 14.0.1 + 14.1.2 + 14.1.2 + 14.0.1 + 12.6.1 + 14.1.2 + 12.6.1 + 13.0.0 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + mistralai/Ministral-8B-Instruct-2410 (few-shot) 8020 @@ -2745,14 +2745,14 @@ title: Germanic NLU 🇪🇺 32768 True 1,302 ± 323 / 253 ± 86 - 2.39 + 2.40 2.20 2.60 2.10 - 2.86 - 3.05 + 2.85 + 3.04 2.06 - 2.55 + 2.59 1.72 55.49 ± 2.05 / 34.11 ± 1.79 49.18 ± 1.89 / 65.27 ± 1.69 @@ -2780,10 +2780,10 @@ title: Germanic NLU 🇪🇺 50.39 ± 2.45 / 66.39 ± 1.62 30.86 ± 1.37 / 53.78 ± 1.61 30.53 ± 1.21 / 58.26 ± 1.93 - 66.51 ± 1.38 / 52.40 ± 2.62 - 11.91 ± 1.03 / 34.21 ± 1.08 - 34.46 ± 2.79 / 65.61 ± 2.58 - 59.23 ± 1.16 / 72.56 ± 0.80 + 63.30 ± 2.36 / 39.20 ± 2.16 + 11.82 ± 1.07 / 34.18 ± 1.11 + 32.20 ± 0.77 / 65.67 ± 0.69 + 59.45 ± 0.89 / 71.13 ± 0.60 72.40 ± 0.80 / 65.83 ± 1.64 63.46 ± 2.10 / 69.49 ± 1.15 35.86 ± 7.94 / 65.20 ± 6.98 @@ -2814,15 +2814,100 @@ title: Germanic NLU 🇪🇺 14.1.2 14.1.2 14.1.2 - 13.0.0 - 14.0.4 - 13.0.0 - 13.0.0 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 13.0.0 13.0.0 13.0.0 13.0.0 + + mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot) + 46703 + 32 + 32768 + True + 5,535 ± 1,837 / 760 ± 256 + 2.40 + 2.08 + 2.70 + 2.11 + 3.10 + 2.87 + 2.04 + 2.63 + 1.70 + 51.20 ± 3.31 / 30.53 ± 2.46 + 50.95 ± 1.15 / 66.13 ± 0.87 + 33.44 ± 1.67 / 64.34 ± 1.32 + 46.85 ± 1.67 / 61.54 ± 1.02 + 55.02 ± 2.74 / 36.31 ± 1.97 + 57.37 ± 2.34 / 38.25 ± 2.37 + 36.76 ± 0.69 / 38.81 ± 0.52 + 30.73 ± 1.86 / 63.33 ± 2.10 + 18.96 ± 2.45 / 52.64 ± 4.21 + 41.01 ± 2.00 / 70.24 ± 1.75 + 46.15 ± 1.63 / 25.08 ± 1.69 + 80.33 ± 0.51 / 79.04 ± 0.76 + 32.89 ± 2.06 / 64.41 ± 1.78 + 46.51 ± 1.69 / 61.35 ± 1.02 + 35.00 ± 3.16 / 20.45 ± 2.42 + 23.93 ± 4.28 / 46.14 ± 3.72 + 9.69 ± 1.44 / 52.07 ± 1.78 + 21.38 ± 2.08 / 53.35 ± 1.40 + 63.43 ± 0.93 / 41.57 ± 2.20 + 43.99 ± 2.94 / 59.58 ± 2.00 + 9.38 ± 2.05 / 51.75 ± 1.36 + 25.35 ± 3.98 / 49.54 ± 4.57 + 58.07 ± 0.94 / 40.30 ± 1.75 + 52.18 ± 1.33 / 65.98 ± 1.66 + 41.45 ± 1.66 / 68.13 ± 0.98 + 26.87 ± 1.60 / 59.41 ± 2.41 + 58.80 ± 3.37 / 37.95 ± 1.80 + 12.50 ± 1.74 / 33.57 ± 1.09 + 45.22 ± 1.65 / 71.80 ± 1.10 + 47.03 ± 2.34 / 67.41 ± 1.00 + 70.02 ± 1.35 / 60.95 ± 1.57 + 69.48 ± 0.68 / 71.36 ± 1.02 + 44.59 ± 0.99 / 70.37 ± 0.77 + 55.70 ± 2.73 / 77.76 ± 1.18 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + cardiffnlp/twitter-xlm-roberta-base 278 @@ -2834,11 +2919,11 @@ title: Germanic NLU 🇪🇺 2.09 2.07 1.84 - 2.61 + 2.60 3.79 2.26 2.37 - 2.25 + 2.26 70.10 ± 1.16 / 64.54 ± 1.00 45.30 ± 2.03 / 63.22 ± 1.47 51.74 ± 2.53 / 74.31 ± 1.94 @@ -2908,91 +2993,6 @@ title: Germanic NLU 🇪🇺 12.6.1 12.6.1 - - mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot) - 46703 - 32 - 32768 - True - 5,535 ± 1,837 / 760 ± 256 - 2.41 - 2.08 - 2.71 - 2.11 - 3.11 - 2.87 - 2.04 - 2.63 - 1.70 - 51.20 ± 3.31 / 30.53 ± 2.46 - 50.95 ± 1.15 / 66.13 ± 0.87 - 33.44 ± 1.67 / 64.34 ± 1.32 - 46.85 ± 1.67 / 61.54 ± 1.02 - 55.02 ± 2.74 / 36.31 ± 1.97 - 57.37 ± 2.34 / 38.25 ± 2.37 - 36.76 ± 0.69 / 38.81 ± 0.52 - 30.73 ± 1.86 / 63.33 ± 2.10 - 18.96 ± 2.45 / 52.64 ± 4.21 - 41.01 ± 2.00 / 70.24 ± 1.75 - 46.15 ± 1.63 / 25.08 ± 1.69 - 80.33 ± 0.51 / 79.04 ± 0.76 - 32.89 ± 2.06 / 64.41 ± 1.78 - 46.51 ± 1.69 / 61.35 ± 1.02 - 35.00 ± 3.16 / 20.45 ± 2.42 - 23.93 ± 4.28 / 46.14 ± 3.72 - 9.69 ± 1.44 / 52.07 ± 1.78 - 21.38 ± 2.08 / 53.35 ± 1.40 - 63.43 ± 0.93 / 41.57 ± 2.20 - 43.99 ± 2.94 / 59.58 ± 2.00 - 9.38 ± 2.05 / 51.75 ± 1.36 - 25.35 ± 3.98 / 49.54 ± 4.57 - 58.07 ± 0.94 / 40.30 ± 1.75 - 52.18 ± 1.33 / 65.98 ± 1.66 - 41.45 ± 1.66 / 68.13 ± 0.98 - 26.87 ± 1.60 / 59.41 ± 2.41 - 58.80 ± 3.37 / 37.95 ± 1.80 - 12.50 ± 1.74 / 33.57 ± 1.09 - 45.22 ± 1.65 / 71.80 ± 1.10 - 47.03 ± 2.34 / 67.41 ± 1.00 - 70.02 ± 1.35 / 60.95 ± 1.57 - 69.48 ± 0.68 / 71.36 ± 1.02 - 44.59 ± 0.99 / 70.37 ± 0.77 - 55.70 ± 2.73 / 77.76 ± 1.18 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - Nexusflow/Starling-LM-7B-beta (few-shot) 7242 @@ -3002,10 +3002,10 @@ title: Germanic NLU 🇪🇺 4,136 ± 1,282 / 668 ± 326 2.44 2.24 - 2.14 - 2.10 + 2.19 + 2.09 3.09 - 3.40 + 3.39 2.20 2.72 1.64 @@ -3038,114 +3038,29 @@ title: Germanic NLU 🇪🇺 62.86 ± 2.07 / 33.76 ± 1.85 15.11 ± 1.83 / 40.18 ± 1.61 39.11 ± 1.22 / 68.00 ± 1.24 - 36.48 ± 2.80 / 59.22 ± 2.19 - 72.77 ± 1.02 / 57.29 ± 1.58 - 70.12 ± 0.78 / 74.54 ± 0.50 - 44.68 ± 0.97 / 71.05 ± 0.52 - 57.17 ± 2.60 / 80.36 ± 1.40 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - - - CohereForAI/aya-expanse-8b (few-shot) - 8028 - 256 - 8192 - False - 2,686 ± 685 / 491 ± 164 - 2.46 - 2.15 - 2.63 - 2.06 - 3.22 - 3.08 - 2.11 - 2.64 - 1.78 - 51.32 ± 3.82 / 25.54 ± 2.10 - 52.00 ± 1.67 / 66.25 ± 1.77 - 18.48 ± 2.44 / 52.18 ± 4.28 - 52.43 ± 1.19 / 62.08 ± 0.60 - 66.55 ± 2.12 / 39.28 ± 3.45 - 63.63 ± 1.62 / 37.25 ± 3.49 - 38.61 ± 2.28 / 51.46 ± 2.62 - 15.80 ± 2.22 / 51.42 ± 3.79 - 12.30 ± 2.38 / 51.96 ± 3.31 - 43.26 ± 2.53 / 71.49 ± 2.01 - 57.38 ± 1.93 / 29.69 ± 4.23 - 78.43 ± 0.93 / 74.54 ± 2.40 - 14.52 ± 2.43 / 45.18 ± 4.21 - 53.14 ± 1.81 / 63.00 ± 0.50 - 28.98 ± 2.63 / 21.75 ± 1.89 - 19.83 ± 4.76 / 41.64 ± 3.64 - 4.93 ± 1.06 / 49.69 ± 2.65 - 24.72 ± 2.22 / 54.41 ± 1.43 - 64.72 ± 1.73 / 47.25 ± 4.90 - 28.57 ± 4.08 / 50.22 ± 2.92 - 5.12 ± 1.16 / 51.09 ± 0.89 - 38.83 ± 3.81 / 56.89 ± 3.51 - 59.95 ± 1.43 / 39.14 ± 2.24 - 55.39 ± 1.97 / 69.86 ± 1.34 - 30.59 ± 1.76 / 64.21 ± 1.65 - 26.94 ± 1.06 / 57.13 ± 1.35 - 62.07 ± 1.67 / 37.68 ± 1.28 - 13.70 ± 1.36 / 34.90 ± 0.68 - 35.14 ± 2.33 / 66.66 ± 1.50 - 49.15 ± 1.48 / 68.82 ± 0.68 - 67.33 ± 1.57 / 53.00 ± 0.88 - 68.67 ± 0.74 / 66.23 ± 0.49 - 31.18 ± 1.63 / 65.23 ± 0.69 - 68.33 ± 2.04 / 84.26 ± 1.04 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.3.0 - 13.0.0 - 13.0.0 + 36.48 ± 2.80 / 59.22 ± 2.19 + 72.77 ± 1.02 / 57.29 ± 1.58 + 70.12 ± 0.78 / 74.54 ± 0.50 + 44.68 ± 0.97 / 71.05 ± 0.52 + 57.17 ± 2.60 / 80.36 ± 1.40 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 14.0.4 14.0.4 14.0.4 @@ -3154,10 +3069,10 @@ title: Germanic NLU 🇪🇺 14.1.2 14.1.2 14.1.2 - 13.0.0 + 14.0.4 14.0.4 - 13.0.0 - 13.0.0 + 14.0.4 + 14.0.4 14.0.4 14.0.4 14.0.4 @@ -3173,9 +3088,9 @@ title: Germanic NLU 🇪🇺 2.46 2.51 2.42 - 1.73 - 2.94 - 3.03 + 1.72 + 2.93 + 3.02 2.34 2.89 1.81 @@ -3248,6 +3163,91 @@ title: Germanic NLU 🇪🇺 14.0.4 14.0.4 + + CohereForAI/aya-expanse-8b (few-shot) + 8028 + 256 + 8192 + False + 2,686 ± 685 / 491 ± 164 + 2.47 + 2.15 + 2.64 + 2.05 + 3.21 + 3.08 + 2.11 + 2.73 + 1.78 + 51.32 ± 3.82 / 25.54 ± 2.10 + 52.00 ± 1.67 / 66.25 ± 1.77 + 18.48 ± 2.44 / 52.18 ± 4.28 + 52.43 ± 1.19 / 62.08 ± 0.60 + 66.55 ± 2.12 / 39.28 ± 3.45 + 63.63 ± 1.62 / 37.25 ± 3.49 + 38.61 ± 2.28 / 51.46 ± 2.62 + 15.80 ± 2.22 / 51.42 ± 3.79 + 12.30 ± 2.38 / 51.96 ± 3.31 + 43.26 ± 2.53 / 71.49 ± 2.01 + 57.38 ± 1.93 / 29.69 ± 4.23 + 78.43 ± 0.93 / 74.54 ± 2.40 + 14.52 ± 2.43 / 45.18 ± 4.21 + 53.14 ± 1.81 / 63.00 ± 0.50 + 28.98 ± 2.63 / 21.75 ± 1.89 + 19.83 ± 4.76 / 41.64 ± 3.64 + 4.93 ± 1.06 / 49.69 ± 2.65 + 24.72 ± 2.22 / 54.41 ± 1.43 + 64.72 ± 1.73 / 47.25 ± 4.90 + 28.57 ± 4.08 / 50.22 ± 2.92 + 5.12 ± 1.16 / 51.09 ± 0.89 + 38.83 ± 3.81 / 56.89 ± 3.51 + 59.95 ± 1.43 / 39.14 ± 2.24 + 55.39 ± 1.97 / 69.86 ± 1.34 + 30.59 ± 1.76 / 64.21 ± 1.65 + 26.94 ± 1.06 / 57.13 ± 1.35 + 53.02 ± 1.86 / 30.09 ± 1.16 + 13.68 ± 1.32 / 34.87 ± 0.67 + 29.97 ± 2.13 / 64.01 ± 1.12 + 53.40 ± 1.34 / 69.31 ± 0.65 + 67.33 ± 1.57 / 53.00 ± 0.88 + 68.67 ± 0.74 / 66.23 ± 0.49 + 31.18 ± 1.63 / 65.23 ± 0.69 + 68.33 ± 2.04 / 84.26 ± 1.04 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.3.0 + 13.0.0 + 13.0.0 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + NorwAI/NorwAI-Mixtral-8x7B (few-shot) 46998 @@ -3341,13 +3341,13 @@ title: Germanic NLU 🇪🇺 True 1,446 ± 354 / 295 ± 100 2.48 - 2.36 - 2.66 - 2.11 - 2.91 + 2.35 + 2.68 + 2.10 + 2.90 3.11 2.09 - 2.81 + 2.80 1.77 45.42 ± 2.88 / 32.66 ± 2.49 43.16 ± 1.69 / 54.53 ± 2.83 @@ -3425,14 +3425,14 @@ title: Germanic NLU 🇪🇺 4096 True 1,118 ± 302 / 184 ± 63 - 2.51 - 2.29 - 2.56 + 2.50 + 2.28 + 2.54 2.15 - 3.34 + 3.33 3.18 2.13 - 2.73 + 2.72 1.68 44.92 ± 3.05 / 32.65 ± 2.50 49.31 ± 1.35 / 66.02 ± 1.10 @@ -3512,9 +3512,9 @@ title: Germanic NLU 🇪🇺 1,919 ± 645 / 248 ± 83 2.53 2.14 - 2.86 - 2.27 - 3.06 + 2.87 + 2.26 + 3.05 2.96 2.21 2.89 @@ -3596,13 +3596,13 @@ title: Germanic NLU 🇪🇺 True 1,364 ± 343 / 266 ± 90 2.54 - 2.32 - 2.72 - 2.12 - 3.00 + 2.31 + 2.71 + 2.11 + 2.99 3.20 2.17 - 2.99 + 2.98 1.82 43.60 ± 2.94 / 32.17 ± 2.26 45.92 ± 1.50 / 61.91 ± 1.50 @@ -3682,12 +3682,12 @@ title: Germanic NLU 🇪🇺 5,993 ± 1,028 / 1,742 ± 561 2.56 2.23 - 2.50 - 2.24 - 3.16 + 2.52 + 2.23 + 3.15 3.63 2.10 - 2.68 + 2.67 1.97 52.61 ± 1.77 / 33.64 ± 2.67 49.81 ± 1.43 / 66.32 ± 1.25 @@ -3758,91 +3758,6 @@ title: Germanic NLU 🇪🇺 12.6.1 12.6.1 - - microsoft/xlm-align-base - 278 - 250 - 512 - True - 14,744 ± 2,870 / 3,265 ± 1,053 - 2.57 - 2.32 - 2.03 - 2.14 - 2.77 - 3.75 - 2.13 - 2.77 - 2.66 - 70.36 ± 2.14 / 65.91 ± 2.15 - 47.83 ± 1.46 / 65.49 ± 0.96 - 11.87 ± 5.47 / 48.82 ± 4.15 - 29.87 ± 3.18 / 35.11 ± 2.73 - 90.07 ± 1.08 / 87.56 ± 1.39 - 85.65 ± 0.96 / 82.40 ± 1.16 - 54.46 ± 1.16 / 68.25 ± 0.76 - 12.16 ± 5.91 / 50.55 ± 4.73 - 8.99 ± 2.25 / 48.57 ± 3.67 - 49.24 ± 1.30 / 64.35 ± 1.24 - 78.60 ± 1.91 / 73.04 ± 2.25 - 73.67 ± 1.48 / 68.61 ± 3.14 - 15.41 ± 4.59 / 53.29 ± 3.93 - 32.41 ± 3.14 / 37.13 ± 3.07 - 78.01 ± 2.18 / 79.20 ± 2.10 - 38.76 ± 2.04 / 56.09 ± 2.02 - 5.92 ± 1.91 / 46.95 ± 3.38 - 10.47 ± 1.42 / 43.32 ± 3.55 - 85.97 ± 1.12 / 86.52 ± 1.08 - 2.54 ± 4.09 / 19.42 ± 4.54 - 0.02 ± 1.38 / 44.65 ± 2.65 - 0.72 ± 0.93 / 1.00 ± 1.31 - 79.38 ± 0.80 / 79.33 ± 0.74 - 58.58 ± 2.31 / 72.09 ± 1.64 - 15.34 ± 5.24 / 52.99 ± 1.90 - 16.58 ± 6.50 / 32.33 ± 11.35 - 78.85 ± 2.48 / 83.35 ± 2.28 - 11.80 ± 7.64 / 33.49 ± 6.73 - 14.56 ± 8.02 / 53.64 ± 5.14 - 42.08 ± 7.94 / 51.94 ± 9.08 - 88.62 ± 0.53 / 88.44 ± 0.46 - 11.09 ± 10.39 / 33.19 ± 4.75 - 8.46 ± 2.34 / 51.98 ± 2.86 - 49.64 ± 1.94 / 62.02 ± 1.77 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 13.3.0 - 0.0.0 - 0.0.0 - 0.0.0 - 13.3.0 - 0.0.0 - 13.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - utter-project/EuroLLM-9B-Instruct (few-shot) 9152 @@ -3850,14 +3765,14 @@ title: Germanic NLU 🇪🇺 4096 False 1,483 ± 321 / 379 ± 158 - 2.57 - 2.17 - 2.65 - 2.05 - 3.44 + 2.56 + 2.16 + 2.64 + 2.04 + 3.43 3.22 - 2.14 - 2.73 + 2.13 + 2.72 2.13 34.00 ± 2.69 / 25.49 ± 2.06 53.97 ± 1.21 / 68.40 ± 1.28 @@ -3937,7 +3852,7 @@ title: Germanic NLU 🇪🇺 2,515 ± 625 / 476 ± 159 2.58 2.19 - 2.53 + 2.52 2.09 3.58 3.24 @@ -3970,10 +3885,10 @@ title: Germanic NLU 🇪🇺 57.84 ± 3.59 / 71.20 ± 2.77 22.58 ± 5.59 / 53.17 ± 6.23 27.96 ± 1.39 / 54.99 ± 1.75 - 52.26 ± 1.87 / 42.18 ± 1.90 + 52.32 ± 1.98 / 41.98 ± 1.88 8.46 ± 1.09 / 21.30 ± 0.67 42.42 ± 3.42 / 68.81 ± 2.66 - 53.11 ± 1.79 / 63.80 ± 1.61 + 53.12 ± 1.81 / 63.79 ± 1.63 55.76 ± 2.15 / 52.69 ± 1.24 66.89 ± 1.11 / 69.52 ± 0.94 36.60 ± 2.37 / 67.85 ± 1.19 @@ -4000,18 +3915,103 @@ title: Germanic NLU 🇪🇺 14.1.2 13.0.0 13.0.0 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.0.4 + 14.1.2 + 14.1.2 + 14.0.4 + + + microsoft/xlm-align-base + 278 + 250 + 512 + True + 14,744 ± 2,870 / 3,265 ± 1,053 + 2.58 + 2.32 + 2.08 + 2.14 + 2.77 + 3.75 + 2.17 + 2.77 + 2.67 + 70.36 ± 2.14 / 65.91 ± 2.15 + 47.83 ± 1.46 / 65.49 ± 0.96 + 11.87 ± 5.47 / 48.82 ± 4.15 + 29.87 ± 3.18 / 35.11 ± 2.73 + 90.07 ± 1.08 / 87.56 ± 1.39 + 85.65 ± 0.96 / 82.40 ± 1.16 + 54.46 ± 1.16 / 68.25 ± 0.76 + 12.16 ± 5.91 / 50.55 ± 4.73 + 8.99 ± 2.25 / 48.57 ± 3.67 + 49.24 ± 1.30 / 64.35 ± 1.24 + 78.60 ± 1.91 / 73.04 ± 2.25 + 73.67 ± 1.48 / 68.61 ± 3.14 + 15.41 ± 4.59 / 53.29 ± 3.93 + 32.41 ± 3.14 / 37.13 ± 3.07 + 78.01 ± 2.18 / 79.20 ± 2.10 + 38.76 ± 2.04 / 56.09 ± 2.02 + 5.92 ± 1.91 / 46.95 ± 3.38 + 10.47 ± 1.42 / 43.32 ± 3.55 + 85.97 ± 1.12 / 86.52 ± 1.08 + 2.54 ± 4.09 / 19.42 ± 4.54 + 0.02 ± 1.38 / 44.65 ± 2.65 + 0.72 ± 0.93 / 1.00 ± 1.31 + 79.38 ± 0.80 / 79.33 ± 0.74 + 58.58 ± 2.31 / 72.09 ± 1.64 + 15.34 ± 5.24 / 52.99 ± 1.90 + 16.58 ± 6.50 / 32.33 ± 11.35 + 78.85 ± 2.48 / 83.35 ± 2.28 + 11.80 ± 7.64 / 33.49 ± 6.73 + 14.56 ± 8.02 / 53.64 ± 5.14 + 42.08 ± 7.94 / 51.94 ± 9.08 + 88.62 ± 0.53 / 88.44 ± 0.46 + 11.09 ± 10.39 / 33.19 ± 4.75 + 8.46 ± 2.34 / 51.98 ± 2.86 + 49.64 ± 1.94 / 62.02 ± 1.77 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 13.3.0 + 0.0.0 + 0.0.0 + 0.0.0 + 13.3.0 + 0.0.0 + 13.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 CohereForAI/aya-23-8B (few-shot) @@ -4020,14 +4020,14 @@ title: Germanic NLU 🇪🇺 8192 False 2,707 ± 688 / 497 ± 166 - 2.64 + 2.63 2.29 - 2.82 - 2.07 - 3.45 - 3.37 + 2.80 + 2.06 + 3.44 + 3.36 2.42 - 2.74 + 2.72 1.95 47.08 ± 3.39 / 32.34 ± 2.97 47.16 ± 1.21 / 63.47 ± 1.57 @@ -4109,9 +4109,9 @@ title: Germanic NLU 🇪🇺 2.42 2.61 2.03 - 2.84 + 2.83 3.72 - 2.30 + 2.31 3.05 2.28 60.01 ± 2.63 / 56.13 ± 2.46 @@ -4192,9 +4192,9 @@ title: Germanic NLU 🇪🇺 2,313 ± 423 / 682 ± 210 2.66 2.50 - 2.77 - 2.24 - 3.41 + 2.76 + 2.23 + 3.40 3.38 2.33 2.79 @@ -4277,12 +4277,12 @@ title: Germanic NLU 🇪🇺 5,054 ± 1,200 / 1,056 ± 339 2.68 2.31 - 2.65 + 2.64 1.96 3.51 3.58 - 2.33 - 3.13 + 2.34 + 3.12 1.96 42.43 ± 3.36 / 29.30 ± 2.53 47.82 ± 2.00 / 63.19 ± 2.09 @@ -4362,12 +4362,12 @@ title: Germanic NLU 🇪🇺 20,154 ± 4,438 / 3,890 ± 1,256 2.69 2.41 - 2.72 - 2.26 + 2.73 + 2.25 3.13 3.74 - 2.59 - 2.42 + 2.60 + 2.41 2.26 61.17 ± 2.09 / 58.41 ± 2.11 46.39 ± 1.25 / 63.97 ± 1.08 @@ -4445,11 +4445,11 @@ title: Germanic NLU 🇪🇺 32768 False 2,370 ± 416 / 711 ± 242 - 2.71 + 2.70 2.29 - 2.87 - 2.36 - 3.50 + 2.86 + 2.35 + 3.49 3.23 2.40 2.90 @@ -4534,11 +4534,11 @@ title: Germanic NLU 🇪🇺 2.54 2.75 2.20 - 3.08 - 3.52 + 3.07 + 3.53 2.59 2.88 - 2.21 + 2.22 58.52 ± 1.78 / 55.04 ± 1.60 42.26 ± 1.13 / 61.41 ± 0.76 34.80 ± 5.89 / 64.51 ± 4.90 @@ -4616,12 +4616,12 @@ title: Germanic NLU 🇪🇺 False 2,088 ± 352 / 706 ± 214 2.73 - 2.57 + 2.56 2.85 - 2.27 - 3.32 + 2.26 + 3.31 3.48 - 2.28 + 2.27 2.99 2.10 40.19 ± 2.55 / 29.73 ± 1.44 @@ -4705,8 +4705,8 @@ title: Germanic NLU 🇪🇺 2.97 2.28 3.06 - 3.77 - 2.28 + 3.78 + 2.29 3.10 2.05 66.39 ± 1.42 / 62.24 ± 1.29 @@ -4786,9 +4786,9 @@ title: Germanic NLU 🇪🇺 True 26,099 ± 5,881 / 5,178 ± 1,665 2.77 - 2.60 + 2.59 2.65 - 2.30 + 2.29 3.21 3.72 2.45 @@ -4871,14 +4871,14 @@ title: Germanic NLU 🇪🇺 True 5,617 ± 995 / 1,623 ± 540 2.78 - 2.61 + 2.60 2.80 2.28 3.61 3.45 2.58 2.95 - 1.97 + 1.98 45.90 ± 2.53 / 33.00 ± 1.93 37.11 ± 1.88 / 56.47 ± 1.59 11.70 ± 2.16 / 50.31 ± 3.91 @@ -4957,13 +4957,13 @@ title: Germanic NLU 🇪🇺 2,219 ± 427 / 717 ± 224 2.78 2.58 - 2.87 + 2.86 2.22 3.40 - 3.55 + 3.54 2.30 3.16 - 2.14 + 2.15 37.93 ± 3.09 / 29.50 ± 2.18 44.62 ± 1.98 / 62.62 ± 1.54 0.28 ± 0.54 / 33.48 ± 0.24 @@ -5040,12 +5040,12 @@ title: Germanic NLU 🇪🇺 131073 False 10,424 ± 2,641 / 2,081 ± 666 - 2.80 + 2.79 2.50 3.11 - 2.33 + 2.32 3.52 - 3.12 + 3.11 2.62 3.17 2.01 @@ -5126,11 +5126,11 @@ title: Germanic NLU 🇪🇺 False 634 ± 179 / 110 ± 35 2.80 - 2.49 - 2.86 + 2.48 + 2.84 2.36 3.58 - 3.53 + 3.52 2.51 3.11 1.97 @@ -5210,15 +5210,15 @@ title: Germanic NLU 🇪🇺 200000 True 277 ± 77 / 70 ± 25 - 2.85 + 2.84 2.14 - 3.23 - 2.53 - 3.36 - 3.64 + 3.21 + 2.52 + 3.35 + 3.63 2.56 2.79 - 2.55 + 2.56 59.48 ± 1.97 / 42.21 ± 2.53 56.46 ± 2.39 / 71.07 ± 1.48 20.57 ± 3.78 / 49.85 ± 2.21 @@ -5302,8 +5302,8 @@ title: Germanic NLU 🇪🇺 3.29 3.36 2.57 - 3.16 - 2.37 + 3.14 + 2.38 50.83 ± 1.31 / 41.56 ± 1.86 53.23 ± 2.63 / 64.70 ± 3.82 23.02 ± 6.31 / 53.19 ± 7.11 @@ -5380,14 +5380,14 @@ title: Germanic NLU 🇪🇺 4096 False 2,643 ± 455 / 800 ± 247 - 2.88 + 2.87 2.50 - 3.09 + 3.08 2.48 - 3.51 - 3.62 + 3.50 + 3.61 2.61 - 3.13 + 3.12 2.09 35.44 ± 3.00 / 24.63 ± 1.65 44.88 ± 1.45 / 62.35 ± 1.33 @@ -5468,11 +5468,11 @@ title: Germanic NLU 🇪🇺 3.03 2.89 3.15 - 2.84 - 3.74 + 2.83 + 3.73 3.29 - 2.59 - 3.44 + 2.60 + 3.46 2.32 49.01 ± 1.94 / 29.13 ± 2.09 47.95 ± 1.37 / 64.82 ± 0.89 @@ -5550,14 +5550,14 @@ title: Germanic NLU 🇪🇺 4096 True 10,246 ± 3,021 / 1,629 ± 550 - 3.06 + 3.05 2.89 - 3.26 - 2.55 - 3.94 - 3.94 + 3.25 + 2.54 + 3.91 + 3.93 2.57 - 3.16 + 3.15 2.18 37.37 ± 2.46 / 26.81 ± 2.24 31.44 ± 1.82 / 48.96 ± 2.35 @@ -5628,91 +5628,6 @@ title: Germanic NLU 🇪🇺 13.0.0 13.0.0 - - sentence-transformers/quora-distilbert-multilingual - 135 - 120 - 512 - True - 26,458 ± 5,992 / 5,274 ± 1,731 - 3.06 - 2.99 - 3.11 - 2.64 - 3.25 - 3.71 - 3.12 - 2.72 - 2.91 - 54.48 ± 2.16 / 52.85 ± 2.28 - 36.60 ± 1.60 / 56.93 ± 1.48 - 8.84 ± 5.33 / 51.76 ± 3.80 - 13.97 ± 1.75 / 19.76 ± 2.26 - 77.81 ± 0.76 / 74.83 ± 0.79 - 72.22 ± 0.95 / 68.32 ± 1.13 - 44.59 ± 1.89 / 59.87 ± 1.84 - 8.98 ± 3.55 / 52.49 ± 2.08 - 5.72 ± 3.29 / 50.40 ± 3.21 - 0.00 ± 0.00 / 0.00 ± 0.00 - 65.50 ± 1.20 / 59.72 ± 1.22 - 68.36 ± 1.18 / 63.94 ± 2.47 - 14.81 ± 6.63 / 55.50 ± 4.28 - 16.11 ± 1.18 / 22.88 ± 1.34 - 63.36 ± 0.96 / 65.24 ± 0.77 - 33.10 ± 1.86 / 52.77 ± 1.64 - 1.02 ± 0.94 / 47.05 ± 2.13 - 6.48 ± 0.37 / 27.44 ± 0.44 - 82.91 ± 0.89 / 83.43 ± 0.87 - 9.77 ± 3.39 / 32.64 ± 2.55 - 1.67 ± 2.22 / 46.20 ± 3.31 - 0.00 ± 0.00 / 0.00 ± 0.00 - 64.12 ± 0.92 / 62.50 ± 0.85 - 49.66 ± 1.68 / 66.33 ± 1.12 - 0.58 ± 1.23 / 49.27 ± 0.86 - 0.05 ± 0.06 / 0.36 ± 0.30 - 67.89 ± 1.61 / 74.48 ± 1.24 - 23.25 ± 6.95 / 44.88 ± 6.27 - 21.36 ± 7.80 / 59.50 ± 3.54 - 4.50 ± 0.39 / 9.94 ± 0.33 - 81.71 ± 0.66 / 82.33 ± 0.53 - 50.69 ± 1.19 / 50.90 ± 0.50 - 2.16 ± 1.58 / 49.99 ± 1.47 - 4.19 ± 0.69 / 11.99 ± 0.62 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 13.3.0 - 0.0.0 - 0.0.0 - 0.0.0 - 13.3.0 - 0.0.0 - 13.0.0 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - MaLA-LM/emma-500-llama2-7b (few-shot) 6738 @@ -5720,15 +5635,15 @@ title: Germanic NLU 🇪🇺 4096 True 6,275 ± 1,193 / 1,755 ± 578 - 3.07 - 2.93 - 3.17 + 3.06 + 2.92 + 3.15 2.38 - 3.48 - 3.62 + 3.47 + 3.61 3.26 - 3.40 - 2.32 + 3.39 + 2.33 28.18 ± 3.39 / 24.25 ± 3.30 29.32 ± 7.19 / 41.08 ± 8.29 2.90 ± 2.18 / 37.93 ± 4.98 @@ -5798,6 +5713,91 @@ title: Germanic NLU 🇪🇺 13.0.0 13.0.0 + + sentence-transformers/quora-distilbert-multilingual + 135 + 120 + 512 + True + 26,458 ± 5,992 / 5,274 ± 1,731 + 3.06 + 2.99 + 3.11 + 2.63 + 3.25 + 3.71 + 3.13 + 2.72 + 2.92 + 54.48 ± 2.16 / 52.85 ± 2.28 + 36.60 ± 1.60 / 56.93 ± 1.48 + 8.84 ± 5.33 / 51.76 ± 3.80 + 13.97 ± 1.75 / 19.76 ± 2.26 + 77.81 ± 0.76 / 74.83 ± 0.79 + 72.22 ± 0.95 / 68.32 ± 1.13 + 44.59 ± 1.89 / 59.87 ± 1.84 + 8.98 ± 3.55 / 52.49 ± 2.08 + 5.72 ± 3.29 / 50.40 ± 3.21 + 0.00 ± 0.00 / 0.00 ± 0.00 + 65.50 ± 1.20 / 59.72 ± 1.22 + 68.36 ± 1.18 / 63.94 ± 2.47 + 14.81 ± 6.63 / 55.50 ± 4.28 + 16.11 ± 1.18 / 22.88 ± 1.34 + 63.36 ± 0.96 / 65.24 ± 0.77 + 33.10 ± 1.86 / 52.77 ± 1.64 + 1.02 ± 0.94 / 47.05 ± 2.13 + 6.48 ± 0.37 / 27.44 ± 0.44 + 82.91 ± 0.89 / 83.43 ± 0.87 + 9.77 ± 3.39 / 32.64 ± 2.55 + 1.67 ± 2.22 / 46.20 ± 3.31 + 0.00 ± 0.00 / 0.00 ± 0.00 + 64.12 ± 0.92 / 62.50 ± 0.85 + 49.66 ± 1.68 / 66.33 ± 1.12 + 0.58 ± 1.23 / 49.27 ± 0.86 + 0.05 ± 0.06 / 0.36 ± 0.30 + 67.89 ± 1.61 / 74.48 ± 1.24 + 23.25 ± 6.95 / 44.88 ± 6.27 + 21.36 ± 7.80 / 59.50 ± 3.54 + 4.50 ± 0.39 / 9.94 ± 0.33 + 81.71 ± 0.66 / 82.33 ± 0.53 + 50.69 ± 1.19 / 50.90 ± 0.50 + 2.16 ± 1.58 / 49.99 ± 1.47 + 4.19 ± 0.69 / 11.99 ± 0.62 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 13.3.0 + 0.0.0 + 0.0.0 + 0.0.0 + 13.3.0 + 0.0.0 + 13.0.0 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + ibm-granite/granite-3b-code-base-2k (few-shot) 3483 @@ -5805,12 +5805,12 @@ title: Germanic NLU 🇪🇺 2048 True 2,732 ± 868 / 662 ± 238 - 3.13 - 2.84 - 3.67 + 3.12 + 2.83 + 3.66 2.43 3.75 - 3.98 + 3.97 3.00 3.19 2.16 @@ -5893,12 +5893,12 @@ title: Germanic NLU 🇪🇺 3.14 2.99 3.11 - 2.64 - 3.29 + 2.63 + 3.28 3.71 - 3.12 + 3.15 3.34 - 2.91 + 2.92 54.48 ± 2.16 / 52.85 ± 2.28 36.60 ± 1.60 / 56.93 ± 1.48 8.84 ± 5.33 / 51.76 ± 3.80 @@ -5975,12 +5975,12 @@ title: Germanic NLU 🇪🇺 2048 True 9,059 ± 1,947 / 2,201 ± 728 - 3.16 - 2.92 - 3.30 - 2.56 - 3.93 - 3.97 + 3.15 + 2.91 + 3.29 + 2.55 + 3.92 + 3.96 2.93 3.33 2.31 @@ -6053,6 +6053,91 @@ title: Germanic NLU 🇪🇺 13.0.0 13.0.0 + + dbmdz/bert-base-historic-multilingual-cased + 111 + 32 + 512 + True + 20,047 ± 4,407 / 3,844 ± 1,259 + 3.17 + 3.16 + 3.18 + 2.32 + 3.46 + 3.84 + 2.90 + 3.54 + 2.96 + 47.61 ± 1.71 / 45.91 ± 1.91 + 24.17 ± 1.92 / 43.75 ± 2.75 + 8.14 ± 3.76 / 51.78 ± 1.81 + 25.19 ± 1.29 / 30.51 ± 1.06 + 68.63 ± 1.64 / 64.83 ± 1.55 + 67.70 ± 2.68 / 63.70 ± 2.54 + 25.68 ± 2.17 / 41.65 ± 2.77 + 6.73 ± 5.40 / 48.20 ± 3.68 + 3.35 ± 2.61 / 47.52 ± 3.20 + 22.57 ± 1.57 / 34.64 ± 1.94 + 68.83 ± 1.00 / 63.29 ± 1.48 + 64.25 ± 1.66 / 63.62 ± 2.92 + 28.62 ± 9.43 / 59.33 ± 5.91 + 28.78 ± 2.01 / 34.26 ± 2.03 + 56.62 ± 0.57 / 59.34 ± 0.66 + 24.79 ± 3.07 / 45.25 ± 3.48 + -1.21 ± 1.45 / 47.43 ± 1.67 + 6.01 ± 0.45 / 29.08 ± 1.30 + 80.45 ± 1.48 / 81.32 ± 1.42 + 0.90 ± 2.61 / 26.49 ± 2.31 + 2.52 ± 1.88 / 47.78 ± 1.67 + 0.58 ± 0.81 / 1.12 ± 1.62 + 65.35 ± 1.08 / 63.62 ± 1.05 + 37.77 ± 2.20 / 58.20 ± 1.43 + 16.07 ± 3.83 / 54.09 ± 2.44 + 5.67 ± 1.14 / 16.63 ± 2.52 + 56.69 ± 1.80 / 68.42 ± 0.85 + 9.29 ± 3.04 / 30.73 ± 2.40 + 3.02 ± 1.45 / 50.08 ± 1.17 + 22.14 ± 1.13 / 31.59 ± 0.96 + 77.64 ± 0.74 / 79.56 ± 0.53 + 12.42 ± 9.17 / 33.80 ± 4.31 + 13.65 ± 6.22 / 53.84 ± 2.20 + 33.29 ± 0.96 / 45.71 ± 0.70 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 12.6.1 + 13.3.0 + 12.6.1 + 12.6.1 + 0.0.0 + 13.3.0 + 0.0.0 + 13.0.0 + 12.8.0 + 12.8.0 + 12.8.0 + 12.8.0 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 118 @@ -6060,15 +6145,15 @@ title: Germanic NLU 🇪🇺 512 True 29,201 ± 6,282 / 6,045 ± 2,027 - 3.16 + 3.17 2.63 - 2.91 + 2.90 2.45 3.32 3.75 - 3.08 + 3.09 4.20 - 2.97 + 2.98 56.75 ± 1.91 / 53.43 ± 1.87 44.48 ± 1.32 / 63.11 ± 0.83 26.74 ± 1.94 / 62.19 ± 1.84 @@ -6138,91 +6223,6 @@ title: Germanic NLU 🇪🇺 12.6.1 12.6.1 - - dbmdz/bert-base-historic-multilingual-cased - 111 - 32 - 512 - True - 20,047 ± 4,407 / 3,844 ± 1,259 - 3.17 - 3.17 - 3.19 - 2.33 - 3.46 - 3.83 - 2.90 - 3.54 - 2.95 - 47.61 ± 1.71 / 45.91 ± 1.91 - 24.17 ± 1.92 / 43.75 ± 2.75 - 8.14 ± 3.76 / 51.78 ± 1.81 - 25.19 ± 1.29 / 30.51 ± 1.06 - 68.63 ± 1.64 / 64.83 ± 1.55 - 67.70 ± 2.68 / 63.70 ± 2.54 - 25.68 ± 2.17 / 41.65 ± 2.77 - 6.73 ± 5.40 / 48.20 ± 3.68 - 3.35 ± 2.61 / 47.52 ± 3.20 - 22.57 ± 1.57 / 34.64 ± 1.94 - 68.83 ± 1.00 / 63.29 ± 1.48 - 64.25 ± 1.66 / 63.62 ± 2.92 - 28.62 ± 9.43 / 59.33 ± 5.91 - 28.78 ± 2.01 / 34.26 ± 2.03 - 56.62 ± 0.57 / 59.34 ± 0.66 - 24.79 ± 3.07 / 45.25 ± 3.48 - -1.21 ± 1.45 / 47.43 ± 1.67 - 6.01 ± 0.45 / 29.08 ± 1.30 - 80.45 ± 1.48 / 81.32 ± 1.42 - 0.90 ± 2.61 / 26.49 ± 2.31 - 2.52 ± 1.88 / 47.78 ± 1.67 - 0.58 ± 0.81 / 1.12 ± 1.62 - 65.35 ± 1.08 / 63.62 ± 1.05 - 37.77 ± 2.20 / 58.20 ± 1.43 - 16.07 ± 3.83 / 54.09 ± 2.44 - 5.67 ± 1.14 / 16.63 ± 2.52 - 56.69 ± 1.80 / 68.42 ± 0.85 - 9.29 ± 3.04 / 30.73 ± 2.40 - 3.02 ± 1.45 / 50.08 ± 1.17 - 22.14 ± 1.13 / 31.59 ± 0.96 - 77.64 ± 0.74 / 79.56 ± 0.53 - 12.42 ± 9.17 / 33.80 ± 4.31 - 13.65 ± 6.22 / 53.84 ± 2.20 - 33.29 ± 0.96 / 45.71 ± 0.70 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 12.6.1 - 13.3.0 - 12.6.1 - 12.6.1 - 0.0.0 - 13.3.0 - 0.0.0 - 13.0.0 - 12.8.0 - 12.8.0 - 12.8.0 - 12.8.0 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - ibm-granite/granite-3.0-3b-a800m-base (few-shot) 3374 @@ -6230,14 +6230,14 @@ title: Germanic NLU 🇪🇺 4096 True 10,504 ± 3,028 / 1,678 ± 559 - 3.31 - 3.41 + 3.30 + 3.35 3.44 - 2.77 - 4.09 - 4.13 + 2.76 + 4.08 + 4.12 3.03 - 3.31 + 3.30 2.34 31.80 ± 2.87 / 23.06 ± 2.09 6.85 ± 2.25 / 19.42 ± 0.91 @@ -6318,11 +6318,11 @@ title: Germanic NLU 🇪🇺 3.33 3.10 3.65 - 3.06 - 3.56 + 3.05 + 3.55 4.05 3.07 - 3.24 + 3.23 2.93 41.09 ± 1.83 / 40.40 ± 1.82 27.33 ± 1.92 / 49.78 ± 1.38 @@ -6401,14 +6401,14 @@ title: Germanic NLU 🇪🇺 False 1,254 ± 328 / 243 ± 83 3.35 - 3.05 + 3.04 3.55 - 2.85 + 2.84 3.79 3.82 3.14 3.57 - 3.02 + 3.03 34.66 ± 1.19 / 21.37 ± 1.52 21.93 ± 3.72 / 31.67 ± 4.68 1.50 ± 1.04 / 33.84 ± 0.24 @@ -6485,15 +6485,15 @@ title: Germanic NLU 🇪🇺 4096 True 15,009 ± 4,072 / 2,702 ± 878 - 3.40 - 3.05 - 3.43 - 2.75 - 4.08 - 4.36 + 3.39 + 3.04 + 3.42 + 2.74 + 4.07 + 4.35 3.19 - 3.66 - 2.65 + 3.65 + 2.66 19.61 ± 2.68 / 17.44 ± 2.64 37.92 ± 1.74 / 46.23 ± 1.91 2.81 ± 1.13 / 38.15 ± 2.81 @@ -6571,13 +6571,13 @@ title: Germanic NLU 🇪🇺 True 15,971 ± 3,654 / 3,609 ± 1,197 3.42 - 3.25 + 3.24 3.69 - 2.77 - 3.78 - 4.26 + 2.76 + 3.77 + 4.25 3.38 - 3.99 + 3.98 2.25 29.44 ± 1.81 / 20.31 ± 1.68 18.49 ± 2.47 / 35.29 ± 2.83 @@ -6656,14 +6656,14 @@ title: Germanic NLU 🇪🇺 True 34,042 ± 8,482 / 5,951 ± 1,950 3.43 - 3.33 + 3.32 3.62 - 3.06 - 3.54 + 3.05 + 3.53 3.72 - 3.56 + 3.57 3.29 - 3.31 + 3.32 46.78 ± 1.50 / 44.41 ± 1.78 27.78 ± 2.22 / 49.38 ± 2.89 3.04 ± 1.85 / 46.85 ± 3.03 @@ -6741,14 +6741,14 @@ title: Germanic NLU 🇪🇺 True 19,206 ± 4,451 / 3,658 ± 1,187 3.47 - 3.66 - 3.51 - 3.08 - 3.50 + 3.65 + 3.52 + 3.07 + 3.49 3.72 3.39 3.61 - 3.30 + 3.31 26.96 ± 1.31 / 25.63 ± 1.35 30.13 ± 2.10 / 46.78 ± 4.49 2.01 ± 1.29 / 48.79 ± 1.65 @@ -6825,12 +6825,12 @@ title: Germanic NLU 🇪🇺 4096 True 1,438 ± 410 / 233 ± 79 - 3.53 - 3.28 - 3.63 - 3.21 - 3.83 - 3.92 + 3.52 + 3.23 + 3.62 + 3.24 + 3.82 + 3.91 3.55 3.79 3.03 @@ -6911,14 +6911,14 @@ title: Germanic NLU 🇪🇺 True 33,247 ± 8,123 / 6,017 ± 1,977 3.53 - 3.66 - 3.51 - 3.08 - 3.46 + 3.65 + 3.52 + 3.07 + 3.45 3.72 3.39 - 4.10 - 3.30 + 4.09 + 3.31 26.96 ± 1.31 / 25.63 ± 1.35 30.13 ± 2.10 / 46.78 ± 4.49 2.01 ± 1.29 / 48.79 ± 1.65 @@ -6996,11 +6996,11 @@ title: Germanic NLU 🇪🇺 True 16,249 ± 3,690 / 3,689 ± 1,226 3.57 - 3.52 + 3.57 3.87 - 2.71 - 3.86 - 4.41 + 2.70 + 3.85 + 4.39 3.61 4.02 2.55 @@ -7080,15 +7080,15 @@ title: Germanic NLU 🇪🇺 2048 True 10,756 ± 3,589 / 1,157 ± 670 - 3.71 - 3.27 - 3.94 - 3.22 - 4.00 - 4.40 + 3.70 + 3.26 + 3.93 + 3.21 + 3.99 + 4.39 3.68 - 3.88 - 3.25 + 3.87 + 3.26 28.30 ± 2.45 / 22.93 ± 1.82 28.95 ± 4.05 / 48.32 ± 5.01 0.20 ± 0.52 / 34.12 ± 1.62 @@ -7167,13 +7167,13 @@ title: Germanic NLU 🇪🇺 12,783 ± 2,537 / 2,712 ± 885 3.76 3.69 - 3.91 - 3.47 - 3.49 + 3.90 + 3.46 + 3.48 4.00 3.74 4.02 - 3.75 + 3.76 36.51 ± 2.44 / 36.31 ± 2.71 22.07 ± 2.24 / 44.70 ± 3.72 1.63 ± 1.49 / 45.36 ± 3.07 @@ -7251,14 +7251,14 @@ title: Germanic NLU 🇪🇺 True 78,027 ± 15,466 / 17,064 ± 5,335 3.80 - 3.82 - 3.99 - 3.61 - 3.80 + 3.81 + 3.98 + 3.60 + 3.79 3.98 - 3.75 + 3.76 4.02 - 3.44 + 3.45 33.62 ± 1.57 / 31.69 ± 1.40 20.71 ± 1.68 / 40.07 ± 2.65 1.19 ± 1.08 / 48.46 ± 1.34 @@ -7328,6 +7328,91 @@ title: Germanic NLU 🇪🇺 12.6.1 12.6.1 + + tiiuae/Falcon3-1B-Instruct (few-shot) + 1669 + 131 + 8192 + True + 9,270 ± 2,690 / 1,434 ± 437 + 3.84 + 3.71 + 4.14 + 3.75 + 4.37 + 4.52 + 3.42 + 4.24 + 2.60 + 20.03 ± 1.60 / 17.52 ± 1.50 + 15.96 ± 2.77 / 38.74 ± 2.88 + 0.86 ± 1.76 / 42.48 ± 3.82 + 28.98 ± 1.93 / 35.38 ± 2.10 + 29.25 ± 2.15 / 28.34 ± 2.25 + 25.45 ± 1.96 / 24.62 ± 1.86 + 11.28 ± 0.71 / 22.46 ± 2.54 + 1.52 ± 1.26 / 40.82 ± 4.08 + 0.52 ± 1.60 / 41.23 ± 4.21 + 8.47 ± 0.94 / 20.43 ± 2.10 + 26.41 ± 2.74 / 25.18 ± 2.46 + 25.99 ± 3.51 / 35.36 ± 4.53 + 1.64 ± 1.90 / 37.52 ± 2.69 + 21.39 ± 1.66 / 26.70 ± 1.83 + 9.39 ± 3.31 / 9.28 ± 3.38 + 6.44 ± 2.00 / 22.24 ± 3.62 + -0.72 ± 1.67 / 43.21 ± 3.74 + 3.34 ± 0.98 / 21.41 ± 2.93 + 14.18 ± 3.48 / 13.89 ± 3.46 + 2.36 ± 2.76 / 17.70 ± 4.11 + 2.29 ± 2.66 / 48.34 ± 3.37 + 6.35 ± 1.25 / 12.91 ± 1.66 + 32.54 ± 1.23 / 30.63 ± 1.12 + 22.27 ± 4.97 / 36.09 ± 3.66 + 7.18 ± 1.13 / 51.77 ± 1.56 + 16.72 ± 0.88 / 38.07 ± 0.76 + 28.25 ± 3.03 / 25.24 ± 2.38 + 3.73 ± 1.83 / 15.20 ± 2.26 + 0.76 ± 1.10 / 33.57 ± 0.34 + 19.08 ± 2.27 / 28.16 ± 2.64 + 43.00 ± 1.94 / 39.96 ± 1.87 + 54.47 ± 1.37 / 64.39 ± 0.56 + 17.44 ± 1.67 / 50.79 ± 2.44 + 53.15 ± 1.38 / 70.06 ± 0.79 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + PleIAs/Pleias-3b-Preview (few-shot) 3212 @@ -7335,15 +7420,15 @@ title: Germanic NLU 🇪🇺 4096 True 6,513 ± 1,241 / 1,282 ± 644 - 3.94 - 3.72 - 4.00 - 3.37 - 4.24 - 4.60 + 3.93 + 3.68 + 3.99 + 3.36 + 4.23 + 4.59 3.90 - 3.94 - 3.78 + 3.93 + 3.79 15.93 ± 3.91 / 14.68 ± 2.81 13.01 ± 2.33 / 28.28 ± 4.63 0.05 ± 1.37 / 40.73 ± 3.78 @@ -7421,12 +7506,12 @@ title: Germanic NLU 🇪🇺 True 2,519 ± 841 / 323 ± 104 3.95 - 3.43 - 4.11 - 3.75 + 3.47 + 4.10 + 3.71 4.14 - 4.63 - 4.06 + 4.61 + 4.05 4.01 3.49 16.17 ± 3.44 / 14.33 ± 1.92 @@ -7506,13 +7591,13 @@ title: Germanic NLU 🇪🇺 True 21,777 ± 6,115 / 3,617 ± 1,211 4.07 - 4.13 - 4.37 - 3.90 + 4.11 + 4.36 + 3.92 4.33 - 4.54 + 4.53 4.02 - 4.29 + 4.28 2.98 8.97 ± 3.18 / 8.62 ± 2.72 2.66 ± 2.70 / 16.29 ± 2.34 @@ -7591,11 +7676,11 @@ title: Germanic NLU 🇪🇺 True 22,023 ± 6,203 / 3,675 ± 1,231 4.07 - 4.01 - 4.34 - 3.93 + 3.99 + 4.33 + 3.95 4.31 - 4.60 + 4.59 3.95 4.23 3.19 @@ -7675,15 +7760,15 @@ title: Germanic NLU 🇪🇺 2048 True 10,242 ± 3,432 / 1,335 ± 484 - 4.18 - 3.94 - 4.37 - 4.00 + 4.17 + 3.99 + 4.36 + 3.91 4.33 - 4.52 + 4.50 4.02 4.45 - 3.81 + 3.82 13.84 ± 1.95 / 13.12 ± 1.60 9.47 ± 3.30 / 25.66 ± 3.36 -0.36 ± 1.60 / 39.52 ± 3.19 @@ -7761,14 +7846,14 @@ title: Germanic NLU 🇪🇺 True 2,331 ± 787 / 301 ± 97 4.23 - 4.04 + 3.99 4.40 - 3.97 + 4.00 4.35 - 4.65 + 4.63 4.15 - 4.48 - 3.82 + 4.47 + 3.83 10.59 ± 2.24 / 10.29 ± 1.37 13.31 ± 3.23 / 34.38 ± 3.13 0.52 ± 0.78 / 33.76 ± 0.37 @@ -7845,15 +7930,15 @@ title: Germanic NLU 🇪🇺 512 True 2,214 ± 94 / 1,494 ± 229 - 4.24 - 4.12 + 4.23 + 4.11 4.28 - 3.94 - 4.03 - 4.26 - 4.27 - 4.79 - 4.21 + 3.93 + 4.02 + 4.25 + 4.28 + 4.78 + 4.22 16.04 ± 2.47 / 15.60 ± 2.62 17.37 ± 3.82 / 36.83 ± 4.86 1.34 ± 0.97 / 35.45 ± 3.20 @@ -7931,13 +8016,13 @@ title: Germanic NLU 🇪🇺 True 26,346 ± 7,812 / 4,082 ± 1,372 4.33 - 4.14 - 4.31 - 4.25 + 4.18 + 4.30 + 4.22 4.33 - 4.58 + 4.57 4.53 - 4.75 + 4.74 3.74 13.72 ± 1.83 / 13.41 ± 1.52 3.79 ± 3.11 / 21.06 ± 4.74 @@ -8016,14 +8101,14 @@ title: Germanic NLU 🇪🇺 True 25,602 ± 7,583 / 3,953 ± 1,325 4.37 - 4.24 - 4.52 - 4.18 + 4.23 + 4.51 + 4.20 4.37 - 4.57 + 4.55 4.49 4.79 - 3.79 + 3.80 12.11 ± 1.07 / 11.48 ± 1.07 2.61 ± 3.22 / 18.95 ± 3.93 0.25 ± 1.87 / 39.65 ± 4.00 @@ -8100,15 +8185,15 @@ title: Germanic NLU 🇪🇺 8192 True 7,692 ± 1,423 / 1,960 ± 644 - 4.82 - 4.68 - 4.86 - 4.80 - 4.66 - 5.00 - 4.77 - 5.08 - 4.72 + 4.81 + 4.67 + 4.85 + 4.77 + 4.65 + 4.97 + 4.76 + 5.07 + 4.73 0.00 ± 0.00 / 0.00 ± 0.00 0.00 ± 0.00 / 18.12 ± 0.19 0.00 ± 0.00 / 33.25 ± 0.23 diff --git a/icelandic-nlg.csv b/icelandic-nlg.csv index f8bb0ec8..4e275cb0 100644 --- a/icelandic-nlg.csv +++ b/icelandic-nlg.csv @@ -5,106 +5,107 @@ meta-llama/Llama-3.1-405B-Instruct-FP8 (few-shot),405869,128,131072,True,False,7 "gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,1.86,64.69,56.18,45.52,15.8,67.06,82.1,42.82 "gpt-4-1106-preview (zero-shot, val)",-1,100,8191,True,False,436,1.89,30.39,47.47,43.3,29.82,63.89,86.79,54.76 "gpt-4o-2024-05-13 (zero-shot, val)",-1,200,8191,True,False,637,1.9,72.85,53.43,49.26,27.36,67.01,78.98,9.83 -meta-llama/Llama-3.1-70B-Instruct (few-shot),70554,128,131072,True,False,1409,1.9,69.95,51.27,18.75,30.79,69.53,75.02,48.44 +meta-llama/Llama-3.1-70B-Instruct (few-shot),70554,128,131072,True,False,1409,1.91,69.95,51.27,18.75,30.79,69.53,75.02,48.44 meta-llama/Llama-3.3-70B-Instruct (few-shot),70554,128,131072,True,False,1353,2.0,69.04,50.79,20.36,24.56,69.13,77.24,46.46 mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot),341029,256,4096,True,False,1904,2.1,37.98,47.34,50.14,16.27,63.4,79.94,44.61 Qwen/Qwen2.5-72B-Instruct (few-shot),72706,152,32768,True,False,1219,2.17,68.54,47.72,22.98,26.72,66.93,67.74,36.29 ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot),28411,256,4096,True,False,3633,2.29,67.23,50.38,20.01,21.18,67.61,66.74,33.03 "gpt-4o-mini-2024-07-18 (zero-shot, val)",-1,200,8191,True,False,908,2.29,31.11,50.02,35.85,26.93,66.48,72.8,28.63 -Qwen/QwQ-32B-Preview (few-shot),32764,152,32768,True,False,2258,2.35,65.08,48.71,20.51,28.29,65.67,52.92,28.01 -"claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,2.45,61.7,51.24,52.43,22.92,66.02,15.97,5.31 +Qwen/QwQ-32B-Preview (few-shot),32764,152,32768,True,False,2258,2.36,65.08,48.71,20.51,28.29,65.67,52.92,28.01 +"claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,2.46,61.7,51.24,52.43,22.92,66.02,15.97,5.31 nvidia/Llama-3.1-Nemotron-70B-Instruct-HF (few-shot),70554,128,131072,True,False,1208,2.61,54.15,51.96,19.63,21.22,43.26,73.61,45.29 CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,2.88,62.68,31.96,11.81,30.49,66.24,21.1,9.34 -meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1007,2.94,60.2,38.09,9.14,28.66,67.5,17.93,5.59 -mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,2.96,55.09,39.6,8.23,30.78,63.01,18.12,8.15 -nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,2.97,53.28,41.8,11.07,29.74,64.08,20.14,3.62 -meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1005,2.98,46.48,39.91,11.72,25.91,67.67,25.83,6.4 -meta-llama/Llama-3.1-8B (few-shot),8030,128,131072,True,False,2986,3.01,52.97,41.29,5.95,31.99,65.19,21.56,4.44 -meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1335,3.07,50.45,34.68,8.69,31.94,65.22,19.93,4.52 -mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,3.22,47.1,24.43,7.19,28.73,66.62,21.98,0.1 -NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,3.32,62.28,38.08,2.85,0.86,57.6,48.56,16.74 -NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,3.32,41.84,38.12,7.47,20.87,65.24,13.41,3.94 -mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot),46703,32,32768,True,False,5535,3.35,35.0,23.93,9.69,21.38,64.69,17.73,9.06 +meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1483,2.94,60.2,38.09,9.14,28.66,67.5,17.93,5.59 +mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,2.97,55.09,39.6,8.23,30.78,63.01,18.12,8.15 +nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,2.98,53.28,41.8,11.07,29.74,64.08,20.14,3.62 +meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1473,2.99,46.48,39.91,11.72,25.91,67.67,25.83,6.4 +meta-llama/Llama-3.1-8B (few-shot),8030,128,131072,True,False,2986,3.02,52.97,41.29,5.95,31.99,65.19,21.56,4.44 +meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1477,3.07,50.45,34.68,8.69,31.94,65.22,19.93,4.52 +mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,3.23,47.1,24.43,7.19,28.73,66.62,21.98,0.1 +NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,3.33,62.28,38.08,2.85,0.86,57.6,48.56,16.74 +NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,3.33,41.84,38.12,7.47,20.87,65.24,13.41,3.94 +mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot),46703,32,32768,True,False,5535,3.36,35.0,23.93,9.69,21.38,64.69,17.73,9.06 CohereForAI/c4ai-command-r-v01 (few-shot),34981,256,8192,False,False,1919,3.39,42.29,38.87,0.28,18.74,66.07,10.93,4.44 -mhenrichsen/hestenettetLM (few-shot),7242,32,4096,True,False,1151,3.42,50.99,24.17,1.15,27.31,56.73,8.81,3.23 -NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,3.44,36.73,34.34,6.57,22.59,58.69,12.07,2.87 -mistralai/Mistral-7B-v0.1 (few-shot),7242,32,4096,True,False,1446,3.44,50.69,24.38,1.46,27.11,56.76,8.71,3.21 +mhenrichsen/hestenettetLM (few-shot),7242,32,4096,True,False,1151,3.43,50.99,24.17,1.15,27.31,56.73,8.81,3.23 +NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,3.45,36.73,34.34,6.57,22.59,58.69,12.07,2.87 +mistralai/Mistral-7B-v0.1 (few-shot),7242,32,4096,True,False,1446,3.45,50.69,24.38,1.46,27.11,56.76,8.71,3.21 senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,3.48,56.71,7.92,3.44,21.55,65.39,9.11,3.3 -Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,3.53,42.23,27.93,6.38,19.39,61.92,4.22,0.31 -mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,3.53,46.73,26.28,1.5,25.17,55.71,7.21,2.09 -CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,3.54,28.98,19.83,4.93,24.72,63.45,10.97,4.23 -meta-llama/Llama-3.2-3B (few-shot),3213,128,131073,True,False,3713,3.56,42.04,14.53,0.0,24.07,61.8,13.79,0.25 -occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,3.6,40.71,14.7,0.71,20.66,65.25,5.35,0.35 +Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,3.54,42.23,27.93,6.38,19.39,61.92,4.22,0.31 +mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,3.54,46.73,26.28,1.5,25.17,55.71,7.21,2.09 +CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,3.55,28.98,19.83,4.93,24.72,63.45,10.97,4.23 +meta-llama/Llama-3.2-3B (few-shot),3213,128,131073,True,False,3713,3.57,42.04,14.53,0.0,24.07,61.8,13.79,0.25 +occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,3.61,40.71,14.7,0.71,20.66,65.25,5.35,0.35 "claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,3.63,34.99,31.19,-10.68,23.65,66.12,9.0,0.0 AI-Sweden-Models/gpt-sw3-6.7b-v2-instruct (few-shot),7111,64,2048,True,False,1473,3.65,19.41,44.79,1.93,21.13,61.9,4.05,-2.29 -occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,3.66,40.08,16.23,1.59,15.98,62.55,5.98,-0.51 nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,3.67,43.65,10.7,10.77,0.29,65.26,23.45,8.35 +occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,3.67,40.08,16.23,1.59,15.98,62.55,5.98,-0.51 ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,3.68,42.67,9.95,1.11,22.25,63.81,5.12,0.89 -LumiOpen/Viking-7B (few-shot),7550,131,4096,True,False,1431,3.7,33.21,25.5,-0.08,25.76,56.52,0.81,-3.96 -mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32768,False,False,634,3.7,36.04,12.93,-0.36,18.06,62.8,5.44,6.35 -timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,3.71,36.47,1.84,2.54,18.66,63.68,5.12,8.3 +LumiOpen/Viking-7B (few-shot),7550,131,4096,True,False,1431,3.71,33.21,25.5,-0.08,25.76,56.52,0.81,-3.96 +mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32768,False,False,634,3.71,36.04,12.93,-0.36,18.06,62.8,5.44,6.35 utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,3.71,24.92,9.76,0.15,28.18,64.67,11.1,0.89 -MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,3.72,31.81,18.33,3.63,16.72,58.72,12.62,3.43 ibm-granite/granite-8b-code-base-4k (few-shot),8055,49,4096,True,False,2313,3.72,50.89,9.52,0.5,17.43,59.94,5.52,1.73 +timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,3.72,36.47,1.84,2.54,18.66,63.68,5.12,8.3 +MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,3.73,31.81,18.33,3.63,16.72,58.72,12.62,3.43 01-ai/Yi-1.5-6B (few-shot),6061,64,4097,True,False,2867,3.75,38.15,5.32,0.98,20.39,61.23,7.59,3.44 -meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4096,False,False,2643,3.76,41.1,13.59,-1.07,16.13,62.3,3.16,1.84 -google/gemma-2-2b-it (few-shot),2614,256,8193,True,False,5374,3.77,14.79,15.24,1.1,25.42,62.67,10.76,-5.2 -ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4096,True,False,2515,3.77,37.82,-0.29,-0.12,21.59,62.35,6.54,1.83 -mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,3.77,34.8,17.64,5.46,12.66,61.79,2.88,-1.11 -meta-llama/Llama-3.2-3B-Instruct (few-shot),3213,128,131073,False,False,10424,3.78,27.57,10.07,-1.39,22.98,62.0,13.33,0.74 -CohereForAI/aya-23-8B (few-shot),8028,256,8192,False,False,2707,3.81,47.16,0.33,3.84,21.75,59.16,3.7,-3.24 -microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,3194,3.81,33.05,14.42,0.71,17.23,60.08,2.67,2.74 +ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4096,True,False,2515,3.76,37.82,-0.29,-0.12,21.59,62.35,6.54,1.83 +meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4096,False,False,2643,3.77,41.1,13.59,-1.07,16.13,62.3,3.16,1.84 +google/gemma-2-2b-it (few-shot),2614,256,8193,True,False,5374,3.78,14.79,15.24,1.1,25.42,62.67,10.76,-5.2 +mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,3.78,34.8,17.64,5.46,12.66,61.79,2.88,-1.11 +meta-llama/Llama-3.2-3B-Instruct (few-shot),3213,128,131073,False,False,10424,3.79,27.57,10.07,-1.39,22.98,62.0,13.33,0.74 +CohereForAI/aya-23-8B (few-shot),8028,256,8192,False,False,2707,3.8,47.16,0.33,3.84,21.75,59.16,3.7,-3.24 microsoft/Phi-3-mini-128k-instruct (few-shot),3821,32,131072,True,False,7312,3.82,27.22,17.8,1.31,17.24,62.0,1.85,1.06 +microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,8681,3.82,33.05,14.42,0.71,17.23,60.08,2.67,2.74 HPLT/gpt-7b-nordic-prerelease (few-shot),7550,131,4096,True,False,1382,3.84,29.67,26.31,-0.03,21.77,52.52,0.09,-0.74 -meta-llama/Llama-2-7b-hf (few-shot),6738,32,4096,True,False,930,3.84,32.71,13.24,0.66,18.04,60.73,3.65,-0.0 +meta-llama/Llama-2-7b-hf (few-shot),6738,32,4096,True,False,930,3.85,32.71,13.24,0.66,18.04,60.73,3.65,-0.0 norallm/normistral-7b-warm-instruct (few-shot),7248,33,4096,True,False,6194,3.85,36.59,11.75,0.86,14.58,61.99,1.48,-0.98 bineric/NorskGPT-Llama-7B-v0.1 (few-shot),6738,32,4096,False,False,5384,3.87,34.62,12.72,-0.24,18.1,61.81,3.06,-1.9 neuralmagic/Sparse-Llama-3.1-8B-2of4 (few-shot),8030,128,131072,True,False,2996,3.87,22.23,21.46,0.1,23.34,51.47,6.43,1.43 -tollefj/nordavind-7b-instruct-warm (few-shot),7248,33,2048,False,False,6450,3.87,34.76,12.07,0.77,12.8,61.23,2.01,-0.76 +tollefj/nordavind-7b-instruct-warm (few-shot),7248,33,2048,False,False,6450,3.88,34.76,12.07,0.77,12.8,61.23,2.01,-0.76 AI-Sweden-Models/gpt-sw3-356m-instruct (few-shot),471,64,2048,True,False,5855,3.91,17.79,21.09,0.08,15.04,59.45,1.06,5.69 -google/gemma-2-2b (few-shot),2614,256,8193,True,False,5235,3.92,10.67,12.86,-0.21,20.22,59.97,10.08,1.16 meta-llama/Llama-3.2-1B-Instruct (few-shot),1236,128,131073,False,False,7436,3.92,33.76,6.91,-0.13,12.43,59.46,2.09,3.56 -HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,3.97,26.23,6.86,2.69,10.84,60.43,1.93,3.27 +google/gemma-2-2b (few-shot),2614,256,8193,True,False,5235,3.93,10.67,12.86,-0.21,20.22,59.97,10.08,1.16 +HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,3.98,26.23,6.86,2.69,10.84,60.43,1.93,3.27 Qwen/Qwen1.5-4B-Chat (few-shot),3950,152,32768,False,False,4347,3.98,25.65,7.56,-0.35,14.46,62.11,4.5,-1.89 ibm-granite/granite-3.0-2b-instruct (few-shot),2634,49,4097,True,False,10194,3.98,30.21,5.88,3.67,15.12,56.75,3.22,1.88 -ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,4.01,38.52,4.29,0.0,12.94,58.58,2.11,-4.75 -openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,4.02,28.74,4.3,0.06,17.41,60.79,0.18,-1.64 -AI-Sweden-Models/gpt-sw3-1.3b (few-shot),1445,64,2048,True,False,4608,4.03,1.42,4.18,0.75,23.33,64.23,0.4,0.68 -ibm-granite/granite-3.0-2b-base (few-shot),2534,49,4097,True,False,10187,4.03,29.51,1.7,-0.32,12.36,59.43,2.35,0.01 ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,4.03,23.14,5.07,0.18,14.15,60.8,2.86,-1.31 -ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,4.04,43.2,2.54,0.0,14.28,49.66,3.07,2.79 -google/gemma-2b (few-shot),2506,256,8192,True,False,6087,4.06,8.83,10.08,0.31,16.08,60.0,2.52,0.0 -openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,4.06,26.58,-0.79,0.63,15.14,60.84,1.17,-0.12 +ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,4.03,38.52,4.29,0.0,12.94,58.58,2.11,-4.75 +ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,4.03,43.2,2.54,0.0,14.28,49.66,3.07,2.79 +ibm-granite/granite-3.0-2b-base (few-shot),2534,49,4097,True,False,10187,4.04,29.51,1.7,-0.32,12.36,59.43,2.35,0.01 +openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,4.04,28.74,4.3,0.06,17.41,60.79,0.18,-1.64 +AI-Sweden-Models/gpt-sw3-1.3b (few-shot),1445,64,2048,True,False,4608,4.05,1.42,4.18,0.75,23.33,64.23,0.4,0.68 +openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,4.05,26.58,-0.79,0.63,15.14,60.84,1.17,-0.12 +google/gemma-2b (few-shot),2506,256,8192,True,False,6087,4.07,8.83,10.08,0.31,16.08,60.0,2.52,0.0 HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,4.12,20.5,10.09,0.83,10.84,57.52,3.16,-1.83 Qwen/Qwen1.5-4B (few-shot),3950,152,32768,True,False,3248,4.13,15.66,9.17,-0.55,14.11,57.17,5.46,-1.71 -ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,4.19,18.07,0.65,-0.72,12.27,56.49,0.32,1.0 -Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,4.2,14.15,6.87,0.78,7.8,57.27,1.62,1.92 -meta-llama/Llama-3.2-1B (few-shot),1236,128,131073,True,False,7577,4.21,17.77,7.64,-0.35,8.15,54.21,0.71,2.88 -utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,4.21,13.29,6.83,0.92,7.49,56.76,0.59,1.87 -google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,4.22,20.49,-0.75,-0.01,10.95,59.16,0.45,0.62 -PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,4.23,22.56,0.53,-0.26,11.77,53.36,0.36,0.24 +ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,4.2,18.07,0.65,-0.72,12.27,56.49,0.32,1.0 +Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,4.21,14.15,6.87,0.78,7.8,57.27,1.62,1.92 +google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,4.21,20.49,-0.75,-0.01,10.95,59.16,0.45,0.62 +meta-llama/Llama-3.2-1B (few-shot),1236,128,131073,True,False,7577,4.22,17.77,7.64,-0.35,8.15,54.21,0.71,2.88 +utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,4.22,13.29,6.83,0.92,7.49,56.76,0.59,1.87 +PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,4.24,22.56,0.53,-0.26,11.77,53.36,0.36,0.24 PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,4.24,18.86,-0.67,-0.76,8.09,60.61,-0.78,-0.84 -ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,4.24,33.57,0.6,0.0,11.27,49.32,1.37,-4.04 -ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,4.25,19.07,-0.51,0.99,9.54,59.42,0.79,-3.2 -timpal0l/Mistral-7B-v0.1-flashback-v2-instruct (few-shot),7242,32,32768,False,False,5172,4.25,24.98,13.57,1.18,8.52,39.94,4.83,4.7 -ibm-granite/granite-3.0-1b-a400m-instruct (few-shot),1335,49,4096,True,False,7964,4.29,17.74,3.84,-1.13,7.74,58.6,-1.11,-4.23 -PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,4.31,9.9,2.13,0.02,10.64,54.83,-0.56,1.21 -Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,4.35,9.5,1.63,1.76,3.14,58.92,-1.28,1.48 -HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.37,13.43,3.82,1.14,3.71,51.93,0.95,2.9 -Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,4.37,16.2,0.1,-0.57,3.31,56.0,1.96,0.85 +ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,4.24,19.07,-0.51,0.99,9.54,59.42,0.79,-3.2 +ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,4.25,33.57,0.6,0.0,11.27,49.32,1.37,-4.04 +timpal0l/Mistral-7B-v0.1-flashback-v2-instruct (few-shot),7242,32,32768,False,False,5172,4.26,24.98,13.57,1.18,8.52,39.94,4.83,4.7 +PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,4.3,9.9,2.13,0.02,10.64,54.83,-0.56,1.21 +ibm-granite/granite-3.0-1b-a400m-instruct (few-shot),1335,49,4096,True,False,7964,4.31,17.74,3.84,-1.13,7.74,58.6,-1.11,-4.23 +Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,4.36,16.2,0.1,-0.57,3.31,56.0,1.96,0.85 +tiiuae/Falcon3-1B-Instruct (few-shot),1669,131,8192,True,False,9270,4.36,9.39,6.44,-0.72,3.34,59.37,-1.22,4.2 +Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,4.37,9.5,1.63,1.76,3.14,58.92,-1.28,1.48 ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,4.38,5.62,4.82,-0.2,4.94,58.01,0.78,-2.14 -HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.39,14.74,3.13,-0.25,1.35,52.66,1.21,1.69 -Qwen/Qwen1.5-1.8B (few-shot),1837,152,32768,True,False,5666,4.41,12.26,-9.69,0.94,6.31,55.32,3.65,1.13 -RJuro/kanelsnegl-v0.2 (few-shot),7242,32,512,True,False,1373,4.41,23.67,7.76,0.0,0.0,50.54,0.0,0.0 -HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.42,13.6,3.12,0.28,4.09,50.0,-0.11,2.51 -PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.43,17.73,2.38,-0.18,1.59,52.81,0.89,-1.11 -state-spaces/mamba-2.8b-hf (few-shot),2768,50,32769,True,False,2722,4.44,18.38,-1.7,0.49,6.3,51.62,2.25,-6.17 -RuterNorway/Llama-2-7b-chat-norwegian (few-shot),6738,32,4096,False,False,10890,4.47,9.48,3.32,0.07,1.04,55.16,-0.8,-0.16 -HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.5,13.7,3.01,-0.83,0.94,50.3,1.1,-0.07 -PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.61,13.8,2.17,-0.63,1.29,43.4,1.16,1.21 -NorGLM/NorGPT-369M (few-shot),-1,64,2048,True,False,19896,4.69,1.68,2.78,-1.38,0.08,44.02,0.15,0.28 -Sigurdur/icebreaker (few-shot),110,32,1024,False,False,48619,4.75,0.0,2.31,0.0,0.0,44.8,0.23,0.38 -Sigurdur/icechat (few-shot),110,32,1024,False,False,49558,4.78,0.0,1.86,0.0,0.0,42.46,0.0,0.0 -ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.9,0.0,-1.3,0.0,0.0,36.58,-0.0,0.0 -ai-forever/mGPT (few-shot),-1,100,2048,True,False,11734,5.16,0.0,2.54,0.0,0.0,17.11,-0.02,0.47 +HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.39,13.43,3.82,1.14,3.71,51.93,0.95,2.9 +HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.41,14.74,3.13,-0.25,1.35,52.66,1.21,1.69 +PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.42,17.73,2.38,-0.18,1.59,52.81,0.89,-1.11 +Qwen/Qwen1.5-1.8B (few-shot),1837,152,32768,True,False,5666,4.42,12.26,-9.69,0.94,6.31,55.32,3.65,1.13 +RJuro/kanelsnegl-v0.2 (few-shot),7242,32,512,True,False,1373,4.42,23.67,7.76,0.0,0.0,50.54,0.0,0.0 +HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.43,13.6,3.12,0.28,4.09,50.0,-0.11,2.51 +state-spaces/mamba-2.8b-hf (few-shot),2768,50,32769,True,False,2722,4.45,18.38,-1.7,0.49,6.3,51.62,2.25,-6.17 +RuterNorway/Llama-2-7b-chat-norwegian (few-shot),6738,32,4096,False,False,10890,4.49,9.48,3.32,0.07,1.04,55.16,-0.8,-0.16 +HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.52,13.7,3.01,-0.83,0.94,50.3,1.1,-0.07 +PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.6,13.8,2.17,-0.63,1.29,43.4,1.16,1.21 +NorGLM/NorGPT-369M (few-shot),-1,64,2048,True,False,19896,4.71,1.68,2.78,-1.38,0.08,44.02,0.15,0.28 +Sigurdur/icebreaker (few-shot),110,32,1024,False,False,48619,4.74,0.0,2.31,0.0,0.0,44.8,0.23,0.38 +Sigurdur/icechat (few-shot),110,32,1024,False,False,49558,4.79,0.0,1.86,0.0,0.0,42.46,0.0,0.0 +ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.91,0.0,-1.3,0.0,0.0,36.58,-0.0,0.0 +ai-forever/mGPT (few-shot),-1,100,2048,True,False,11734,5.15,0.0,2.54,0.0,0.0,17.11,-0.02,0.47 Sigurdur/jonas-hallgrimsson-gpt2 (few-shot),125,51,512,False,False,32644,5.43,0.0,0.0,0.0,0.0,0.0,-0.26,-0.01 diff --git a/icelandic-nlg.md b/icelandic-nlg.md index 50dbfd05..38c3b8e9 100644 --- a/icelandic-nlg.md +++ b/icelandic-nlg.md @@ -3,7 +3,7 @@ layout: leaderboard title: Icelandic NLG 🇮🇸 --- -
Last updated: 10/01/2025 12:30:23 CET
+
Last updated: 11/01/2025 11:03:33 CET
@@ -185,7 +185,7 @@ title: Icelandic NLG 🇮🇸 131072 True 1,409 ± 457 / 186 ± 63 - 1.90 + 1.91 69.95 ± 2.18 / 42.88 ± 4.51 51.27 ± 1.19 / 63.66 ± 1.02 18.75 ± 1.65 / 55.50 ± 0.89 @@ -323,7 +323,7 @@ title: Icelandic NLG 🇮🇸 32768 True 2,258 ± 1,221 / 198 ± 67 - 2.35 + 2.36 65.08 ± 2.06 / 46.62 ± 5.37 48.71 ± 1.27 / 63.98 ± 0.90 20.51 ± 2.32 / 57.54 ± 3.00 @@ -346,7 +346,7 @@ title: Icelandic NLG 🇮🇸 200000 True 193 ± 87 / 55 ± 19 - 2.45 + 2.46 61.70 ± 2.37 / 41.44 ± 1.76 51.24 ± 1.69 / 65.49 ± 1.06 52.43 ± 2.29 / 73.78 ± 1.09 @@ -414,7 +414,7 @@ title: Icelandic NLG 🇮🇸 128 8192 True - 1,007 ± 316 / 162 ± 45 + 1,483 ± 377 / 287 ± 97 2.94 60.20 ± 2.76 / 40.38 ± 4.22 38.09 ± 2.38 / 54.51 ± 2.16 @@ -438,7 +438,7 @@ title: Icelandic NLG 🇮🇸 32768 True 2,363 ± 794 / 311 ± 105 - 2.96 + 2.97 55.09 ± 2.15 / 38.38 ± 3.92 39.60 ± 2.22 / 53.52 ± 3.54 8.23 ± 1.55 / 49.15 ± 2.76 @@ -461,7 +461,7 @@ title: Icelandic NLG 🇮🇸 8192 True 2,470 ± 836 / 326 ± 111 - 2.97 + 2.98 53.28 ± 3.54 / 39.35 ± 3.78 41.80 ± 1.57 / 59.81 ± 1.60 11.07 ± 2.40 / 51.96 ± 2.62 @@ -483,8 +483,8 @@ title: Icelandic NLG 🇮🇸 128 131072 True - 1,005 ± 330 / 196 ± 74 - 2.98 + 1,473 ± 377 / 283 ± 96 + 2.99 46.48 ± 1.98 / 24.57 ± 1.73 39.91 ± 2.35 / 57.39 ± 1.64 11.72 ± 1.81 / 51.67 ± 1.45 @@ -507,7 +507,7 @@ title: Icelandic NLG 🇮🇸 131072 True 2,986 ± 823 / 276 ± 94 - 3.01 + 3.02 52.97 ± 1.54 / 38.17 ± 4.65 41.29 ± 2.30 / 59.30 ± 1.75 5.95 ± 2.81 / 40.86 ± 3.68 @@ -529,7 +529,7 @@ title: Icelandic NLG 🇮🇸 128 8192 True - 1,335 ± 338 / 260 ± 88 + 1,477 ± 376 / 285 ± 97 3.07 50.45 ± 1.95 / 37.62 ± 3.95 34.68 ± 3.74 / 53.39 ± 3.75 @@ -553,7 +553,7 @@ title: Icelandic NLG 🇮🇸 32768 True 1,302 ± 323 / 253 ± 86 - 3.22 + 3.23 47.10 ± 1.90 / 22.63 ± 2.03 24.43 ± 2.98 / 39.27 ± 3.37 7.19 ± 2.76 / 44.62 ± 3.61 @@ -576,7 +576,7 @@ title: Icelandic NLG 🇮🇸 131072 True 1,220 ± 411 / 158 ± 53 - 3.32 + 3.33 62.28 ± 1.43 / 53.70 ± 3.40 38.08 ± 3.91 / 47.69 ± 4.81 2.85 ± 1.20 / 39.44 ± 3.92 @@ -599,7 +599,7 @@ title: Icelandic NLG 🇮🇸 32768 True 9,015 ± 2,966 / 1,121 ± 510 - 3.32 + 3.33 41.84 ± 3.14 / 24.97 ± 2.44 38.12 ± 1.81 / 54.60 ± 1.56 7.47 ± 1.59 / 45.34 ± 4.68 @@ -622,7 +622,7 @@ title: Icelandic NLG 🇮🇸 32768 True 5,535 ± 1,837 / 760 ± 256 - 3.35 + 3.36 35.00 ± 3.16 / 20.45 ± 2.42 23.93 ± 4.28 / 46.14 ± 3.72 9.69 ± 1.44 / 52.07 ± 1.78 @@ -668,7 +668,7 @@ title: Icelandic NLG 🇮🇸 4096 True 1,151 ± 294 / 227 ± 76 - 3.42 + 3.43 50.99 ± 3.49 / 41.71 ± 4.09 24.17 ± 5.04 / 45.19 ± 5.13 1.15 ± 1.86 / 38.83 ± 2.36 @@ -691,7 +691,7 @@ title: Icelandic NLG 🇮🇸 32768 True 2,368 ± 793 / 317 ± 108 - 3.44 + 3.45 36.73 ± 4.50 / 27.29 ± 1.83 34.34 ± 3.99 / 53.29 ± 3.80 6.57 ± 2.18 / 46.35 ± 4.24 @@ -714,7 +714,7 @@ title: Icelandic NLG 🇮🇸 4096 True 1,446 ± 354 / 295 ± 100 - 3.44 + 3.45 50.69 ± 3.67 / 41.90 ± 4.06 24.38 ± 5.21 / 44.65 ± 5.83 1.46 ± 1.66 / 38.73 ± 2.27 @@ -760,7 +760,7 @@ title: Icelandic NLG 🇮🇸 4096 False 4,136 ± 1,282 / 668 ± 326 - 3.53 + 3.54 42.23 ± 4.22 / 25.96 ± 4.27 27.93 ± 2.80 / 48.93 ± 1.43 6.38 ± 0.99 / 49.63 ± 1.47 @@ -783,7 +783,7 @@ title: Icelandic NLG 🇮🇸 32768 True 1,364 ± 343 / 266 ± 90 - 3.53 + 3.54 46.73 ± 3.51 / 38.01 ± 4.18 26.28 ± 4.82 / 46.42 ± 4.24 1.50 ± 1.18 / 36.24 ± 1.65 @@ -806,7 +806,7 @@ title: Icelandic NLG 🇮🇸 8192 False 2,686 ± 685 / 491 ± 164 - 3.54 + 3.55 28.98 ± 2.63 / 21.75 ± 1.89 19.83 ± 4.76 / 41.64 ± 3.64 4.93 ± 1.06 / 49.69 ± 2.65 @@ -829,7 +829,7 @@ title: Icelandic NLG 🇮🇸 131073 True 3,713 ± 877 / 836 ± 267 - 3.56 + 3.57 42.04 ± 3.53 / 25.31 ± 1.59 14.53 ± 4.72 / 32.88 ± 5.49 0.00 ± 0.00 / 32.97 ± 0.29 @@ -852,7 +852,7 @@ title: Icelandic NLG 🇮🇸 32768 False 2,088 ± 352 / 706 ± 214 - 3.60 + 3.61 40.71 ± 2.93 / 34.57 ± 4.02 14.70 ± 7.78 / 36.09 ± 6.06 0.71 ± 2.00 / 36.90 ± 2.10 @@ -914,29 +914,6 @@ title: Icelandic NLG 🇮🇸 14.0.3 14.0.3 - - occiglot/occiglot-7b-eu5 (few-shot) - 7242 - 32 - 32768 - True - 2,219 ± 427 / 717 ± 224 - 3.66 - 40.08 ± 2.82 / 37.15 ± 4.07 - 16.23 ± 4.59 / 35.37 ± 4.49 - 1.59 ± 1.86 / 39.93 ± 4.19 - 15.98 ± 3.74 / 39.67 ± 3.36 - 62.55 ± 3.03 / 15.26 ± 2.31 - 5.98 ± 1.66 / 28.18 ± 1.30 - -0.51 ± 1.95 / 47.23 ± 2.39 - 12.5.2 - 14.1.2 - 12.1.0 - 12.1.0 - 12.1.0 - 12.10.8 - 12.2.0 - nvidia/mistral-nemo-minitron-8b-instruct (few-shot) 8414 @@ -960,6 +937,29 @@ title: Icelandic NLG 🇮🇸 14.1.1 14.1.1 + + occiglot/occiglot-7b-eu5 (few-shot) + 7242 + 32 + 32768 + True + 2,219 ± 427 / 717 ± 224 + 3.67 + 40.08 ± 2.82 / 37.15 ± 4.07 + 16.23 ± 4.59 / 35.37 ± 4.49 + 1.59 ± 1.86 / 39.93 ± 4.19 + 15.98 ± 3.74 / 39.67 ± 3.36 + 62.55 ± 3.03 / 15.26 ± 2.31 + 5.98 ± 1.66 / 28.18 ± 1.30 + -0.51 ± 1.95 / 47.23 ± 2.39 + 12.5.2 + 14.1.2 + 12.1.0 + 12.1.0 + 12.1.0 + 12.10.8 + 12.2.0 + ibm-granite/granite-3.0-8b-instruct (few-shot) 8171 @@ -990,7 +990,7 @@ title: Icelandic NLG 🇮🇸 4096 True 1,431 ± 352 / 287 ± 97 - 3.70 + 3.71 33.21 ± 3.20 / 25.94 ± 2.59 25.50 ± 6.42 / 38.95 ± 8.69 -0.08 ± 1.20 / 39.03 ± 2.81 @@ -1013,7 +1013,7 @@ title: Icelandic NLG 🇮🇸 32768 False 634 ± 179 / 110 ± 35 - 3.70 + 3.71 36.04 ± 2.59 / 24.74 ± 2.79 12.93 ± 5.42 / 30.40 ± 3.15 -0.36 ± 1.36 / 33.94 ± 0.32 @@ -1029,29 +1029,6 @@ title: Icelandic NLG 🇮🇸 12.10.8 12.1.0 - - timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot) - 7242 - 32 - 32768 - True - 5,054 ± 1,200 / 1,056 ± 339 - 3.71 - 36.47 ± 4.24 / 30.33 ± 3.70 - 1.84 ± 3.02 / 23.26 ± 3.53 - 2.54 ± 1.29 / 50.66 ± 0.62 - 18.66 ± 4.26 / 38.73 ± 3.66 - 63.68 ± 1.75 / 16.38 ± 1.24 - 5.12 ± 1.30 / 28.85 ± 0.99 - 8.30 ± 1.28 / 57.35 ± 0.75 - 12.5.3 - 14.1.2 - 12.5.3 - 12.5.3 - 12.5.3 - 12.10.8 - 12.5.3 - utter-project/EuroLLM-9B-Instruct (few-shot) 9152 @@ -1075,29 +1052,6 @@ title: Icelandic NLG 🇮🇸 13.1.0 13.1.0 - - MaLA-LM/emma-500-llama2-7b (few-shot) - 6738 - 32 - 4096 - True - 6,275 ± 1,193 / 1,755 ± 578 - 3.72 - 31.81 ± 1.93 / 29.47 ± 2.02 - 18.33 ± 5.09 / 30.21 ± 5.80 - 3.63 ± 1.69 / 44.49 ± 3.89 - 16.72 ± 7.29 / 46.83 ± 5.93 - 58.72 ± 3.28 / 13.71 ± 1.35 - 12.62 ± 1.36 / 34.51 ± 1.27 - 3.43 ± 2.18 / 44.56 ± 1.09 - 13.0.0 - 14.1.2 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - ibm-granite/granite-8b-code-base-4k (few-shot) 8055 @@ -1121,6 +1075,52 @@ title: Icelandic NLG 🇮🇸 13.0.0 13.0.0 + + timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot) + 7242 + 32 + 32768 + True + 5,054 ± 1,200 / 1,056 ± 339 + 3.72 + 36.47 ± 4.24 / 30.33 ± 3.70 + 1.84 ± 3.02 / 23.26 ± 3.53 + 2.54 ± 1.29 / 50.66 ± 0.62 + 18.66 ± 4.26 / 38.73 ± 3.66 + 63.68 ± 1.75 / 16.38 ± 1.24 + 5.12 ± 1.30 / 28.85 ± 0.99 + 8.30 ± 1.28 / 57.35 ± 0.75 + 12.5.3 + 14.1.2 + 12.5.3 + 12.5.3 + 12.5.3 + 12.10.8 + 12.5.3 + + + MaLA-LM/emma-500-llama2-7b (few-shot) + 6738 + 32 + 4096 + True + 6,275 ± 1,193 / 1,755 ± 578 + 3.73 + 31.81 ± 1.93 / 29.47 ± 2.02 + 18.33 ± 5.09 / 30.21 ± 5.80 + 3.63 ± 1.69 / 44.49 ± 3.89 + 16.72 ± 7.29 / 46.83 ± 5.93 + 58.72 ± 3.28 / 13.71 ± 1.35 + 12.62 ± 1.36 / 34.51 ± 1.27 + 3.43 ± 2.18 / 44.56 ± 1.09 + 13.0.0 + 14.1.2 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 01-ai/Yi-1.5-6B (few-shot) 6061 @@ -1144,6 +1144,29 @@ title: Icelandic NLG 🇮🇸 13.0.0 13.0.0 + + ibm-granite/granite-3.0-8b-base (few-shot) + 8171 + 49 + 4096 + True + 2,515 ± 625 / 476 ± 159 + 3.76 + 37.82 ± 3.63 / 32.97 ± 3.97 + -0.29 ± 1.16 / 15.66 ± 0.37 + -0.12 ± 1.31 / 33.78 ± 0.32 + 21.59 ± 2.22 / 47.09 ± 1.09 + 62.35 ± 3.34 / 16.15 ± 1.66 + 6.54 ± 1.30 / 28.63 ± 1.24 + 1.83 ± 3.53 / 49.23 ± 2.07 + 13.0.0 + 14.1.2 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + meta-llama/Llama-2-7b-chat-hf (few-shot) 6738 @@ -1151,7 +1174,7 @@ title: Icelandic NLG 🇮🇸 4096 False 2,643 ± 455 / 800 ± 247 - 3.76 + 3.77 41.10 ± 3.35 / 40.54 ± 3.19 13.59 ± 6.27 / 36.87 ± 4.51 -1.07 ± 2.09 / 44.83 ± 2.20 @@ -1174,7 +1197,7 @@ title: Icelandic NLG 🇮🇸 8193 True 5,374 ± 1,233 / 1,193 ± 377 - 3.77 + 3.78 14.79 ± 4.70 / 13.03 ± 2.92 15.24 ± 6.48 / 34.96 ± 6.66 1.10 ± 1.54 / 34.67 ± 0.87 @@ -1190,29 +1213,6 @@ title: Icelandic NLG 🇮🇸 13.0.0 13.0.0 - - ibm-granite/granite-3.0-8b-base (few-shot) - 8171 - 49 - 4096 - True - 2,515 ± 625 / 476 ± 159 - 3.77 - 37.82 ± 3.63 / 32.97 ± 3.97 - -0.29 ± 1.16 / 15.66 ± 0.37 - -0.12 ± 1.31 / 33.78 ± 0.32 - 21.59 ± 2.22 / 47.09 ± 1.09 - 62.35 ± 3.34 / 16.15 ± 1.66 - 6.54 ± 1.30 / 28.63 ± 1.24 - 1.83 ± 3.53 / 49.23 ± 2.07 - 13.0.0 - 14.1.2 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - mistralai/Mistral-7B-Instruct-v0.2 (few-shot) 7242 @@ -1220,7 +1220,7 @@ title: Icelandic NLG 🇮🇸 32768 False 2,370 ± 416 / 711 ± 242 - 3.77 + 3.78 34.80 ± 1.37 / 24.03 ± 2.32 17.64 ± 3.40 / 40.55 ± 3.17 5.46 ± 1.31 / 51.42 ± 1.10 @@ -1243,7 +1243,7 @@ title: Icelandic NLG 🇮🇸 131073 False 10,424 ± 2,641 / 2,081 ± 666 - 3.78 + 3.79 27.57 ± 1.71 / 22.52 ± 1.18 10.07 ± 5.45 / 28.32 ± 4.30 -1.39 ± 1.30 / 34.40 ± 1.96 @@ -1266,7 +1266,7 @@ title: Icelandic NLG 🇮🇸 8192 False 2,707 ± 688 / 497 ± 166 - 3.81 + 3.80 47.16 ± 2.83 / 38.60 ± 4.04 0.33 ± 0.93 / 15.64 ± 1.01 3.84 ± 1.27 / 40.06 ± 3.79 @@ -1282,29 +1282,6 @@ title: Icelandic NLG 🇮🇸 13.0.0 13.0.0 - - microsoft/Phi-3-mini-4k-instruct (few-shot) - 3821 - 32 - 4096 - True - 3,194 ± 687 / 650 ± 216 - 3.81 - 33.05 ± 4.29 / 29.75 ± 3.67 - 14.42 ± 3.18 / 37.82 ± 2.65 - 0.71 ± 1.18 / 34.80 ± 0.88 - 17.23 ± 2.51 / 39.88 ± 1.59 - 60.08 ± 1.45 / 13.80 ± 0.81 - 2.67 ± 1.35 / 27.04 ± 1.07 - 2.74 ± 2.28 / 53.18 ± 0.93 - 12.10.5 - 13.3.0 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.8 - 12.10.5 - microsoft/Phi-3-mini-128k-instruct (few-shot) 3821 @@ -1328,6 +1305,29 @@ title: Icelandic NLG 🇮🇸 12.10.8 12.10.0 + + microsoft/Phi-3-mini-4k-instruct (few-shot) + 3821 + 32 + 4096 + True + 8,681 ± 1,650 / 2,177 ± 717 + 3.82 + 33.05 ± 4.29 / 29.75 ± 3.67 + 14.42 ± 3.18 / 37.82 ± 2.65 + 0.71 ± 1.18 / 34.80 ± 0.88 + 17.23 ± 2.51 / 39.88 ± 1.59 + 60.08 ± 1.45 / 13.80 ± 0.81 + 2.67 ± 1.35 / 27.04 ± 1.07 + 2.74 ± 2.28 / 53.18 ± 0.93 + 12.10.5 + 13.3.0 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.8 + 12.10.5 + HPLT/gpt-7b-nordic-prerelease (few-shot) 7550 @@ -1358,7 +1358,7 @@ title: Icelandic NLG 🇮🇸 4096 True 930 ± 310 / 128 ± 43 - 3.84 + 3.85 32.71 ± 2.77 / 32.17 ± 2.13 13.24 ± 8.29 / 31.92 ± 6.23 0.66 ± 1.75 / 40.36 ± 4.19 @@ -1450,7 +1450,7 @@ title: Icelandic NLG 🇮🇸 2048 False 6,450 ± 961 / 2,082 ± 658 - 3.87 + 3.88 34.76 ± 4.42 / 23.42 ± 2.33 12.07 ± 3.53 / 32.35 ± 5.40 0.77 ± 1.05 / 39.63 ± 2.41 @@ -1489,29 +1489,6 @@ title: Icelandic NLG 🇮🇸 12.10.8 12.7.0 - - google/gemma-2-2b (few-shot) - 2614 - 256 - 8193 - True - 5,235 ± 1,226 / 1,154 ± 366 - 3.92 - 10.67 ± 5.23 / 13.01 ± 3.26 - 12.86 ± 6.56 / 28.74 ± 6.46 - -0.21 ± 0.99 / 33.71 ± 0.28 - 20.22 ± 3.01 / 43.48 ± 2.32 - 59.97 ± 2.49 / 13.32 ± 2.03 - 10.08 ± 1.82 / 32.08 ± 1.37 - 1.16 ± 2.48 / 51.00 ± 1.70 - 13.0.0 - 14.1.2 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - meta-llama/Llama-3.2-1B-Instruct (few-shot) 1236 @@ -1535,6 +1512,29 @@ title: Icelandic NLG 🇮🇸 13.0.0 13.0.0 + + google/gemma-2-2b (few-shot) + 2614 + 256 + 8193 + True + 5,235 ± 1,226 / 1,154 ± 366 + 3.93 + 10.67 ± 5.23 / 13.01 ± 3.26 + 12.86 ± 6.56 / 28.74 ± 6.46 + -0.21 ± 0.99 / 33.71 ± 0.28 + 20.22 ± 3.01 / 43.48 ± 2.32 + 59.97 ± 2.49 / 13.32 ± 2.03 + 10.08 ± 1.82 / 32.08 ± 1.37 + 1.16 ± 2.48 / 51.00 ± 1.70 + 13.0.0 + 14.1.2 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot) 1711 @@ -1542,7 +1542,7 @@ title: Icelandic NLG 🇮🇸 8192 True 15,971 ± 3,654 / 3,609 ± 1,197 - 3.97 + 3.98 26.23 ± 3.53 / 23.26 ± 2.30 6.86 ± 5.25 / 23.96 ± 4.13 2.69 ± 1.42 / 45.61 ± 2.69 @@ -1604,6 +1604,29 @@ title: Icelandic NLG 🇮🇸 13.0.0 13.0.0 + + ibm-granite/granite-3.0-3b-a800m-instruct (few-shot) + 3374 + 49 + 4096 + True + 10,246 ± 3,021 / 1,629 ± 550 + 4.03 + 23.14 ± 2.08 / 23.09 ± 2.24 + 5.07 ± 2.69 / 27.01 ± 2.31 + 0.18 ± 1.67 / 33.93 ± 0.34 + 14.15 ± 2.49 / 36.10 ± 1.65 + 60.80 ± 0.81 / 10.79 ± 1.85 + 2.86 ± 1.31 / 26.61 ± 1.04 + -1.31 ± 2.24 / 50.29 ± 1.93 + 13.0.0 + 13.3.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + ibm-granite/granite-3b-code-base-2k (few-shot) 3483 @@ -1611,7 +1634,7 @@ title: Icelandic NLG 🇮🇸 2048 True 2,732 ± 868 / 662 ± 238 - 4.01 + 4.03 38.52 ± 3.25 / 28.84 ± 5.49 4.29 ± 3.60 / 23.24 ± 4.11 0.00 ± 0.00 / 33.69 ± 0.28 @@ -1627,6 +1650,52 @@ title: Icelandic NLG 🇮🇸 13.0.0 13.0.0 + + ibm-granite/granite-8b-code-instruct-4k (few-shot) + 8055 + 49 + 4096 + True + 5,617 ± 995 / 1,623 ± 540 + 4.03 + 43.20 ± 3.61 / 32.10 ± 3.76 + 2.54 ± 2.60 / 17.05 ± 2.70 + 0.00 ± 0.00 / 33.69 ± 0.28 + 14.28 ± 2.94 / 38.21 ± 2.38 + 49.66 ± 3.28 / 7.97 ± 1.43 + 3.07 ± 1.66 / 25.74 ± 1.09 + 2.79 ± 3.13 / 47.97 ± 1.43 + 13.0.0 + 13.3.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + + + ibm-granite/granite-3.0-2b-base (few-shot) + 2534 + 49 + 4097 + True + 10,187 ± 2,363 / 2,204 ± 737 + 4.04 + 29.51 ± 2.44 / 29.14 ± 2.34 + 1.70 ± 2.98 / 16.77 ± 2.07 + -0.32 ± 0.63 / 33.70 ± 0.28 + 12.36 ± 2.12 / 34.06 ± 1.47 + 59.43 ± 1.83 / 13.09 ± 1.58 + 2.35 ± 1.26 / 27.85 ± 0.87 + 0.01 ± 2.73 / 45.18 ± 1.36 + 13.0.0 + 14.1.2 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot) 7453 @@ -1634,7 +1703,7 @@ title: Icelandic NLG 🇮🇸 4096 False 1,254 ± 328 / 243 ± 83 - 4.02 + 4.04 28.74 ± 2.37 / 19.43 ± 1.48 4.30 ± 1.28 / 19.35 ± 2.86 0.06 ± 1.39 / 34.20 ± 1.30 @@ -1657,7 +1726,7 @@ title: Icelandic NLG 🇮🇸 2048 True 4,608 ± 988 / 1,115 ± 354 - 4.03 + 4.05 1.42 ± 1.60 / 3.11 ± 1.85 4.18 ± 2.03 / 17.89 ± 1.60 0.75 ± 0.73 / 45.87 ± 2.20 @@ -1674,73 +1743,27 @@ title: Icelandic NLG 🇮🇸 12.7.0 - ibm-granite/granite-3.0-2b-base (few-shot) - 2534 - 49 - 4097 - True - 10,187 ± 2,363 / 2,204 ± 737 - 4.03 - 29.51 ± 2.44 / 29.14 ± 2.34 - 1.70 ± 2.98 / 16.77 ± 2.07 - -0.32 ± 0.63 / 33.70 ± 0.28 - 12.36 ± 2.12 / 34.06 ± 1.47 - 59.43 ± 1.83 / 13.09 ± 1.58 - 2.35 ± 1.26 / 27.85 ± 0.87 - 0.01 ± 2.73 / 45.18 ± 1.36 - 13.0.0 - 14.1.2 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - - - ibm-granite/granite-3.0-3b-a800m-instruct (few-shot) - 3374 - 49 - 4096 - True - 10,246 ± 3,021 / 1,629 ± 550 - 4.03 - 23.14 ± 2.08 / 23.09 ± 2.24 - 5.07 ± 2.69 / 27.01 ± 2.31 - 0.18 ± 1.67 / 33.93 ± 0.34 - 14.15 ± 2.49 / 36.10 ± 1.65 - 60.80 ± 0.81 / 10.79 ± 1.85 - 2.86 ± 1.31 / 26.61 ± 1.04 - -1.31 ± 2.24 / 50.29 ± 1.93 - 13.0.0 - 13.3.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - - - ibm-granite/granite-8b-code-instruct-4k (few-shot) - 8055 - 49 + openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot) + 7453 + 251 4096 True - 5,617 ± 995 / 1,623 ± 540 - 4.04 - 43.20 ± 3.61 / 32.10 ± 3.76 - 2.54 ± 2.60 / 17.05 ± 2.70 - 0.00 ± 0.00 / 33.69 ± 0.28 - 14.28 ± 2.94 / 38.21 ± 2.38 - 49.66 ± 3.28 / 7.97 ± 1.43 - 3.07 ± 1.66 / 25.74 ± 1.09 - 2.79 ± 3.13 / 47.97 ± 1.43 - 13.0.0 - 13.3.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 + 1,438 ± 410 / 233 ± 79 + 4.05 + 26.58 ± 1.47 / 22.13 ± 2.38 + -0.79 ± 2.98 / 26.06 ± 1.66 + 0.63 ± 1.36 / 43.80 ± 3.59 + 15.14 ± 2.49 / 44.23 ± 2.12 + 60.84 ± 0.65 / 13.75 ± 0.70 + 1.17 ± 1.56 / 23.34 ± 1.03 + -0.12 ± 2.10 / 54.54 ± 1.79 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 google/gemma-2b (few-shot) @@ -1749,7 +1772,7 @@ title: Icelandic NLG 🇮🇸 8192 True 6,087 ± 1,046 / 1,902 ± 563 - 4.06 + 4.07 8.83 ± 5.85 / 9.93 ± 4.70 10.08 ± 5.54 / 31.35 ± 3.02 0.31 ± 1.95 / 45.42 ± 3.51 @@ -1765,29 +1788,6 @@ title: Icelandic NLG 🇮🇸 12.10.8 12.1.0 - - openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot) - 7453 - 251 - 4096 - True - 1,438 ± 410 / 233 ± 79 - 4.06 - 26.58 ± 1.47 / 22.13 ± 2.38 - -0.79 ± 2.98 / 26.06 ± 1.66 - 0.63 ± 1.36 / 43.80 ± 3.59 - 15.14 ± 2.49 / 44.23 ± 2.12 - 60.84 ± 0.65 / 13.75 ± 0.70 - 1.17 ± 1.56 / 23.34 ± 1.03 - -0.12 ± 2.10 / 54.54 ± 1.79 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - HuggingFaceTB/SmolLM2-1.7B (few-shot) 1711 @@ -1841,7 +1841,7 @@ title: Icelandic NLG 🇮🇸 4096 True 10,504 ± 3,028 / 1,678 ± 559 - 4.19 + 4.20 18.07 ± 3.62 / 18.73 ± 2.54 0.65 ± 3.12 / 18.97 ± 1.86 -0.72 ± 1.22 / 33.96 ± 0.50 @@ -1864,7 +1864,7 @@ title: Icelandic NLG 🇮🇸 32768 False 8,304 ± 1,846 / 1,933 ± 617 - 4.20 + 4.21 14.15 ± 1.92 / 14.96 ± 2.11 6.87 ± 6.27 / 28.55 ± 3.29 0.78 ± 1.70 / 44.74 ± 3.57 @@ -1880,6 +1880,29 @@ title: Icelandic NLG 🇮🇸 12.10.8 12.1.0 + + google/gemma-2b-it (few-shot) + 2506 + 256 + 8192 + False + 6,471 ± 1,142 / 1,961 ± 584 + 4.21 + 20.49 ± 2.30 / 18.33 ± 1.40 + -0.75 ± 1.97 / 22.91 ± 2.76 + -0.01 ± 2.13 / 46.02 ± 2.71 + 10.95 ± 2.39 / 37.64 ± 0.75 + 59.16 ± 0.96 / 9.92 ± 1.05 + 0.45 ± 1.44 / 22.94 ± 0.76 + 0.62 ± 1.42 / 56.02 ± 0.95 + 12.5.2 + 14.1.2 + 12.1.0 + 12.4.0 + 12.4.0 + 12.10.8 + 12.1.0 + meta-llama/Llama-3.2-1B (few-shot) 1236 @@ -1887,7 +1910,7 @@ title: Icelandic NLG 🇮🇸 131073 True 7,577 ± 1,884 / 1,555 ± 492 - 4.21 + 4.22 17.77 ± 6.81 / 16.42 ± 6.19 7.64 ± 4.41 / 25.58 ± 4.40 -0.35 ± 1.50 / 34.29 ± 1.64 @@ -1910,7 +1933,7 @@ title: Icelandic NLG 🇮🇸 4096 True 15,009 ± 4,072 / 2,702 ± 878 - 4.21 + 4.22 13.29 ± 2.33 / 14.72 ± 2.11 6.83 ± 5.62 / 31.41 ± 4.70 0.92 ± 1.51 / 42.09 ± 3.39 @@ -1926,29 +1949,6 @@ title: Icelandic NLG 🇮🇸 13.1.0 13.1.0 - - google/gemma-2b-it (few-shot) - 2506 - 256 - 8192 - False - 6,471 ± 1,142 / 1,961 ± 584 - 4.22 - 20.49 ± 2.30 / 18.33 ± 1.40 - -0.75 ± 1.97 / 22.91 ± 2.76 - -0.01 ± 2.13 / 46.02 ± 2.71 - 10.95 ± 2.39 / 37.64 ± 0.75 - 59.16 ± 0.96 / 9.92 ± 1.05 - 0.45 ± 1.44 / 22.94 ± 0.76 - 0.62 ± 1.42 / 56.02 ± 0.95 - 12.5.2 - 14.1.2 - 12.1.0 - 12.4.0 - 12.4.0 - 12.10.8 - 12.1.0 - PleIAs/Pleias-1.2b-Preview (few-shot) 1195 @@ -1956,7 +1956,7 @@ title: Icelandic NLG 🇮🇸 2048 True 10,756 ± 3,589 / 1,157 ± 670 - 4.23 + 4.24 22.56 ± 3.90 / 22.69 ± 3.50 0.53 ± 2.22 / 20.28 ± 1.55 -0.26 ± 0.43 / 33.59 ± 1.01 @@ -1995,29 +1995,6 @@ title: Icelandic NLG 🇮🇸 14.0.4 14.0.4 - - ibm-granite/granite-3b-code-instruct-2k (few-shot) - 3483 - 49 - 2048 - True - 9,059 ± 1,947 / 2,201 ± 728 - 4.24 - 33.57 ± 2.48 / 33.47 ± 2.48 - 0.60 ± 2.01 / 16.42 ± 1.16 - 0.00 ± 0.00 / 33.69 ± 0.28 - 11.27 ± 2.38 / 33.54 ± 1.64 - 49.32 ± 3.71 / 8.01 ± 2.04 - 1.37 ± 1.11 / 25.57 ± 1.36 - -4.04 ± 3.58 / 50.55 ± 3.65 - 13.0.0 - 13.3.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - ibm-granite/granite-7b-instruct (few-shot) 6738 @@ -2025,7 +2002,7 @@ title: Icelandic NLG 🇮🇸 4096 True 3,136 ± 558 / 942 ± 290 - 4.25 + 4.24 19.07 ± 1.70 / 20.43 ± 1.80 -0.51 ± 3.34 / 18.63 ± 3.02 0.99 ± 1.27 / 39.24 ± 3.43 @@ -2041,6 +2018,29 @@ title: Icelandic NLG 🇮🇸 13.2.0 13.2.0 + + ibm-granite/granite-3b-code-instruct-2k (few-shot) + 3483 + 49 + 2048 + True + 9,059 ± 1,947 / 2,201 ± 728 + 4.25 + 33.57 ± 2.48 / 33.47 ± 2.48 + 0.60 ± 2.01 / 16.42 ± 1.16 + 0.00 ± 0.00 / 33.69 ± 0.28 + 11.27 ± 2.38 / 33.54 ± 1.64 + 49.32 ± 3.71 / 8.01 ± 2.04 + 1.37 ± 1.11 / 25.57 ± 1.36 + -4.04 ± 3.58 / 50.55 ± 3.65 + 13.0.0 + 13.3.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + timpal0l/Mistral-7B-v0.1-flashback-v2-instruct (few-shot) 7242 @@ -2048,7 +2048,7 @@ title: Icelandic NLG 🇮🇸 32768 False 5,172 ± 813 / 1,647 ± 518 - 4.25 + 4.26 24.98 ± 5.71 / 25.35 ± 4.78 13.57 ± 4.68 / 28.38 ± 5.14 1.18 ± 1.09 / 39.01 ± 2.76 @@ -2064,6 +2064,29 @@ title: Icelandic NLG 🇮🇸 12.10.8 12.3.2 + + PleIAs/Pleias-Nano (few-shot) + 1195 + 66 + 2048 + True + 2,519 ± 841 / 323 ± 104 + 4.30 + 9.90 ± 5.26 / 15.24 ± 2.90 + 2.13 ± 2.07 / 21.55 ± 2.63 + 0.02 ± 1.43 / 34.31 ± 2.24 + 10.64 ± 2.56 / 31.31 ± 1.47 + 54.83 ± 2.32 / 11.29 ± 0.66 + -0.56 ± 1.92 / 23.31 ± 1.19 + 1.21 ± 2.27 / 53.67 ± 2.22 + 14.0.4 + 14.1.2 + 14.1.2 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + ibm-granite/granite-3.0-1b-a400m-instruct (few-shot) 1335 @@ -2071,7 +2094,7 @@ title: Icelandic NLG 🇮🇸 4096 True 7,964 ± 2,255 / 1,299 ± 433 - 4.29 + 4.31 17.74 ± 3.49 / 16.16 ± 1.89 3.84 ± 4.22 / 27.14 ± 5.28 -1.13 ± 1.09 / 44.10 ± 3.57 @@ -2088,27 +2111,50 @@ title: Icelandic NLG 🇮🇸 13.2.0 - PleIAs/Pleias-Nano (few-shot) - 1195 - 66 - 2048 + Qwen/Qwen1.5-0.5B (few-shot) + 620 + 152 + 32768 True - 2,519 ± 841 / 323 ± 104 - 4.31 - 9.90 ± 5.26 / 15.24 ± 2.90 - 2.13 ± 2.07 / 21.55 ± 2.63 - 0.02 ± 1.43 / 34.31 ± 2.24 - 10.64 ± 2.56 / 31.31 ± 1.47 - 54.83 ± 2.32 / 11.29 ± 0.66 - -0.56 ± 1.92 / 23.31 ± 1.19 - 1.21 ± 2.27 / 53.67 ± 2.22 - 14.0.4 + 11,371 ± 2,924 / 2,122 ± 692 + 4.36 + 16.20 ± 1.52 / 16.96 ± 1.71 + 0.10 ± 1.78 / 18.76 ± 2.62 + -0.57 ± 1.20 / 41.25 ± 3.51 + 3.31 ± 0.82 / 16.86 ± 2.98 + 56.00 ± 3.13 / 10.05 ± 0.73 + 1.96 ± 1.72 / 25.55 ± 1.37 + 0.85 ± 1.91 / 52.12 ± 2.92 + 12.5.2 + 14.1.2 + 12.1.0 + 12.1.0 + 12.1.0 + 12.10.8 + 12.1.0 + + + tiiuae/Falcon3-1B-Instruct (few-shot) + 1669 + 131 + 8192 + True + 9,270 ± 2,690 / 1,434 ± 437 + 4.36 + 9.39 ± 3.31 / 9.28 ± 3.38 + 6.44 ± 2.00 / 22.24 ± 3.62 + -0.72 ± 1.67 / 43.21 ± 3.74 + 3.34 ± 0.98 / 21.41 ± 2.93 + 59.37 ± 0.99 / 11.96 ± 0.51 + -1.22 ± 0.96 / 23.83 ± 1.00 + 4.20 ± 2.70 / 53.16 ± 2.28 + 14.1.2 14.1.2 14.1.2 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 Qwen/Qwen1.5-0.5B-Chat (few-shot) @@ -2117,7 +2163,7 @@ title: Icelandic NLG 🇮🇸 32768 False 11,740 ± 3,000 / 2,209 ± 721 - 4.35 + 4.37 9.50 ± 3.17 / 9.41 ± 3.40 1.63 ± 1.45 / 19.19 ± 0.27 1.76 ± 1.62 / 38.51 ± 3.72 @@ -2133,52 +2179,6 @@ title: Icelandic NLG 🇮🇸 12.10.8 12.1.0 - - HuggingFaceTB/SmolLM2-360M (few-shot) - 362 - 49 - 8192 - True - 22,023 ± 6,203 / 3,675 ± 1,231 - 4.37 - 13.43 ± 1.36 / 13.81 ± 1.45 - 3.82 ± 2.09 / 24.09 ± 2.81 - 1.14 ± 1.52 / 36.93 ± 3.69 - 3.71 ± 1.14 / 16.21 ± 2.86 - 51.93 ± 3.96 / 8.48 ± 0.87 - 0.95 ± 1.48 / 22.52 ± 1.05 - 2.90 ± 2.91 / 56.72 ± 0.69 - 13.1.0 - 14.1.2 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - - - Qwen/Qwen1.5-0.5B (few-shot) - 620 - 152 - 32768 - True - 11,371 ± 2,924 / 2,122 ± 692 - 4.37 - 16.20 ± 1.52 / 16.96 ± 1.71 - 0.10 ± 1.78 / 18.76 ± 2.62 - -0.57 ± 1.20 / 41.25 ± 3.51 - 3.31 ± 0.82 / 16.86 ± 2.98 - 56.00 ± 3.13 / 10.05 ± 0.73 - 1.96 ± 1.72 / 25.55 ± 1.37 - 0.85 ± 1.91 / 52.12 ± 2.92 - 12.5.2 - 14.1.2 - 12.1.0 - 12.1.0 - 12.1.0 - 12.10.8 - 12.1.0 - ibm-granite/granite-3.0-1b-a400m-base (few-shot) 1385 @@ -2202,6 +2202,29 @@ title: Icelandic NLG 🇮🇸 13.2.0 13.2.0 + + HuggingFaceTB/SmolLM2-360M (few-shot) + 362 + 49 + 8192 + True + 22,023 ± 6,203 / 3,675 ± 1,231 + 4.39 + 13.43 ± 1.36 / 13.81 ± 1.45 + 3.82 ± 2.09 / 24.09 ± 2.81 + 1.14 ± 1.52 / 36.93 ± 3.69 + 3.71 ± 1.14 / 16.21 ± 2.86 + 51.93 ± 3.96 / 8.48 ± 0.87 + 0.95 ± 1.48 / 22.52 ± 1.05 + 2.90 ± 2.91 / 56.72 ± 0.69 + 13.1.0 + 14.1.2 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + HuggingFaceTB/SmolLM2-135M (few-shot) 135 @@ -2209,7 +2232,7 @@ title: Icelandic NLG 🇮🇸 8192 True 26,346 ± 7,812 / 4,082 ± 1,372 - 4.39 + 4.41 14.74 ± 2.42 / 16.01 ± 2.04 3.13 ± 3.12 / 22.51 ± 2.62 -0.25 ± 0.60 / 34.69 ± 3.02 @@ -2225,6 +2248,29 @@ title: Icelandic NLG 🇮🇸 13.1.0 13.1.0 + + PleIAs/Pleias-350m-Preview (few-shot) + 353 + 66 + 2048 + True + 10,242 ± 3,432 / 1,335 ± 484 + 4.42 + 17.73 ± 1.58 / 18.22 ± 1.53 + 2.38 ± 2.00 / 21.79 ± 2.03 + -0.18 ± 1.21 / 37.94 ± 4.17 + 1.59 ± 0.83 / 12.57 ± 3.10 + 52.81 ± 2.78 / 8.12 ± 0.57 + 0.89 ± 1.75 / 25.64 ± 1.59 + -1.11 ± 3.80 / 55.19 ± 1.68 + 14.0.4 + 14.1.2 + 14.1.2 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + Qwen/Qwen1.5-1.8B (few-shot) 1837 @@ -2232,7 +2278,7 @@ title: Icelandic NLG 🇮🇸 32768 True 5,666 ± 1,328 / 1,256 ± 408 - 4.41 + 4.42 12.26 ± 4.13 / 12.77 ± 3.60 -9.69 ± 4.92 / 18.53 ± 1.70 0.94 ± 1.34 / 40.66 ± 3.73 @@ -2255,7 +2301,7 @@ title: Icelandic NLG 🇮🇸 512 True 1,373 ± 120 / 709 ± 172 - 4.41 + 4.42 23.67 ± 5.16 / 23.19 ± 4.37 7.76 ± 3.65 / 29.49 ± 4.31 0.00 ± 0.00 / 33.69 ± 0.28 @@ -2278,7 +2324,7 @@ title: Icelandic NLG 🇮🇸 8192 True 21,777 ± 6,115 / 3,617 ± 1,211 - 4.42 + 4.43 13.60 ± 1.50 / 13.99 ± 1.39 3.12 ± 4.92 / 27.55 ± 4.21 0.28 ± 1.41 / 37.58 ± 4.34 @@ -2294,29 +2340,6 @@ title: Icelandic NLG 🇮🇸 13.1.0 13.1.0 - - PleIAs/Pleias-350m-Preview (few-shot) - 353 - 66 - 2048 - True - 10,242 ± 3,432 / 1,335 ± 484 - 4.43 - 17.73 ± 1.58 / 18.22 ± 1.53 - 2.38 ± 2.00 / 21.79 ± 2.03 - -0.18 ± 1.21 / 37.94 ± 4.17 - 1.59 ± 0.83 / 12.57 ± 3.10 - 52.81 ± 2.78 / 8.12 ± 0.57 - 0.89 ± 1.75 / 25.64 ± 1.59 - -1.11 ± 3.80 / 55.19 ± 1.68 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - state-spaces/mamba-2.8b-hf (few-shot) 2768 @@ -2324,7 +2347,7 @@ title: Icelandic NLG 🇮🇸 32769 True 2,722 ± 495 / 766 ± 250 - 4.44 + 4.45 18.38 ± 1.54 / 20.76 ± 1.34 -1.70 ± 2.31 / 21.85 ± 2.75 0.49 ± 1.54 / 42.54 ± 4.23 @@ -2347,7 +2370,7 @@ title: Icelandic NLG 🇮🇸 4096 False 10,890 ± 2,686 / 2,186 ± 750 - 4.47 + 4.49 9.48 ± 1.48 / 10.10 ± 1.44 3.32 ± 3.76 / 21.42 ± 3.52 0.07 ± 1.06 / 43.54 ± 3.63 @@ -2370,7 +2393,7 @@ title: Icelandic NLG 🇮🇸 8192 True 25,602 ± 7,583 / 3,953 ± 1,325 - 4.50 + 4.52 13.70 ± 2.05 / 15.01 ± 2.07 3.01 ± 2.55 / 22.58 ± 2.22 -0.83 ± 0.71 / 32.99 ± 0.27 @@ -2393,7 +2416,7 @@ title: Icelandic NLG 🇮🇸 2048 True 2,331 ± 787 / 301 ± 97 - 4.61 + 4.60 13.80 ± 3.19 / 14.46 ± 3.19 2.17 ± 2.73 / 22.31 ± 3.55 -0.63 ± 0.78 / 36.66 ± 3.41 @@ -2416,7 +2439,7 @@ title: Icelandic NLG 🇮🇸 2048 True 19,896 ± 5,099 / 3,848 ± 1,251 - 4.69 + 4.71 1.68 ± 1.40 / 1.54 ± 1.28 2.78 ± 2.87 / 22.52 ± 3.50 -1.38 ± 1.13 / 34.41 ± 2.16 @@ -2439,7 +2462,7 @@ title: Icelandic NLG 🇮🇸 1024 False 48,619 ± 7,681 / 13,831 ± 4,404 - 4.75 + 4.74 0.00 ± 0.00 / 0.00 ± 0.00 2.31 ± 2.42 / 20.87 ± 2.00 0.00 ± 0.00 / 33.69 ± 0.28 @@ -2462,7 +2485,7 @@ title: Icelandic NLG 🇮🇸 1024 False 49,558 ± 7,930 / 13,921 ± 4,425 - 4.78 + 4.79 0.00 ± 0.00 / 0.00 ± 0.00 1.86 ± 2.32 / 23.48 ± 4.78 0.00 ± 0.00 / 33.69 ± 0.28 @@ -2485,7 +2508,7 @@ title: Icelandic NLG 🇮🇸 8192 True 7,692 ± 1,423 / 1,960 ± 644 - 4.90 + 4.91 0.00 ± 0.00 / 0.00 ± 0.00 -1.30 ± 1.45 / 18.79 ± 0.29 0.00 ± 0.00 / 33.69 ± 0.28 @@ -2508,7 +2531,7 @@ title: Icelandic NLG 🇮🇸 2048 True 11,734 ± 3,124 / 2,174 ± 720 - 5.16 + 5.15 0.00 ± 0.00 / 0.00 ± 0.00 2.54 ± 2.23 / 21.79 ± 2.35 0.00 ± 0.00 / 33.69 ± 0.28 diff --git a/icelandic-nlu.csv b/icelandic-nlu.csv index 40fc936e..8cafc3d2 100644 --- a/icelandic-nlu.csv +++ b/icelandic-nlu.csv @@ -7,7 +7,7 @@ vesteinn/ScandiBERT-no-faroese,124,50,514,True,False,15436,1.48,83.94,48.51,58.6 vesteinn/XLMR-ENIS,125,50,514,True,False,10711,1.52,82.2,49.16,48.51,27.06 google/rembert,576,250,512,True,False,11736,1.67,78.05,36.87,48.29,29.38 mideind/IceBERT-large,406,50,514,True,False,5677,1.73,85.14,49.4,59.31,12.84 -vesteinn/FoBERT,124,50,514,True,False,15623,1.76,85.04,48.45,50.78,17.76 +vesteinn/FoBERT,124,50,514,True,False,15623,1.77,85.04,48.45,50.78,17.76 mideind/IceBERT,163,50,514,True,False,16697,1.78,85.32,47.49,60.44,13.31 "claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,1.8,61.7,51.24,52.43,22.92 mideind/IceBERT-xlmr-ic3,278,250,514,True,False,11004,1.83,84.35,48.85,59.12,11.18 @@ -36,12 +36,12 @@ nvidia/Llama-3.1-Nemotron-70B-Instruct-HF (few-shot),70554,128,131072,True,False CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,2.46,62.68,31.96,11.81,30.49 mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,2.46,55.09,39.6,8.23,30.78 meta-llama/Llama-3.1-8B (few-shot),8030,128,131072,True,False,2986,2.48,52.97,41.29,5.95,31.99 -meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1007,2.53,60.2,38.09,9.14,28.66 +meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1483,2.53,60.2,38.09,9.14,28.66 intfloat/multilingual-e5-large,560,250,514,True,False,6732,2.57,78.43,48.52,10.78,13.79 -meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1335,2.58,50.45,34.68,8.69,31.94 +meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1477,2.58,50.45,34.68,8.69,31.94 intfloat/multilingual-e5-large-instruct,560,250,514,True,False,5947,2.59,78.32,50.1,8.11,13.93 intfloat/multilingual-e5-base,278,250,514,True,False,14965,2.63,75.46,42.08,15.21,10.82 -meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1005,2.66,46.48,39.91,11.72,25.91 +meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1473,2.66,46.48,39.91,11.72,25.91 cardiffnlp/twitter-xlm-roberta-base,278,250,514,True,False,34475,2.72,72.69,35.62,28.72,8.46 clips/mfaq,278,250,514,True,False,5591,2.85,77.3,43.25,3.51,11.31 microsoft/xlm-align-base,278,250,514,True,False,14744,2.86,78.01,38.76,5.92,10.47 @@ -57,8 +57,8 @@ NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,2.95,36.73, Twitter/twhin-bert-base,279,250,512,True,False,11514,2.96,70.38,40.22,11.09,7.67 Geotrend/bert-base-25lang-cased,151,85,512,True,False,13908,3.03,74.65,34.09,2.89,9.29 CohereForAI/c4ai-command-r-v01 (few-shot),34981,256,8192,False,False,1919,3.05,42.29,38.87,0.28,18.74 -mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,3.06,46.73,26.28,1.5,25.17 microsoft/infoxlm-base,278,250,514,True,False,34735,3.07,77.09,33.14,1.71,8.56 +mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,3.07,46.73,26.28,1.5,25.17 KennethEnevoldsen/dfm-sentence-encoder-medium,124,50,514,True,False,14998,3.08,64.88,36.18,-0.6,12.39 Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,3.09,42.23,27.93,6.38,19.39 sentence-transformers/stsb-xlm-r-multilingual,278,250,514,True,False,15040,3.09,66.23,37.79,0.04,10.04 @@ -89,9 +89,9 @@ DeepPavlov/rubert-base-cased,178,120,512,True,False,15785,3.34,61.95,29.51,2.4,6 "claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,3.34,34.99,31.19,-10.68,23.65 sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking,135,120,512,True,False,33753,3.34,59.15,33.1,0.8,6.14 Geotrend/distilbert-base-en-no-cased,69,33,512,True,False,26597,3.35,63.84,29.48,2.15,5.23 -KBLab/megatron-bert-large-swedish-cased-110k,370,64,512,True,False,7075,3.36,63.11,26.38,3.47,7.76 NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,3.36,62.28,38.08,2.85,0.86 ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,3.36,42.67,9.95,1.11,22.25 +KBLab/megatron-bert-large-swedish-cased-110k,370,64,512,True,False,7075,3.37,63.11,26.38,3.47,7.76 occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,4096,False,False,2088,3.37,40.71,14.7,0.71,20.66 ltg/norbert3-base,124,50,512,True,False,11405,3.38,68.22,34.62,2.41,0.0 DDSC/roberta-base-scandinavian,125,50,514,True,False,14491,3.4,51.53,34.36,0.89,5.19 @@ -113,7 +113,7 @@ pdelobelle/robbert-v2-dutch-base,117,40,514,True,False,15481,3.46,55.54,28.28,1. FacebookAI/roberta-base,125,50,514,True,False,13354,3.49,60.18,22.99,1.07,6.66 google/gemma-2-2b-it (few-shot),2614,256,4096,True,False,5374,3.49,14.79,15.24,1.1,25.42 meta-llama/Llama-2-7b-hf (few-shot),6738,32,4096,True,False,930,3.5,32.71,13.24,0.66,18.04 -microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,2047,True,False,3194,3.5,33.05,14.42,0.71,17.23 +microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,2047,True,False,8681,3.5,33.05,14.42,0.71,17.23 sentence-transformers/distiluse-base-multilingual-cased-v2,135,120,512,True,False,33247,3.51,48.62,34.19,2.64,1.22 01-ai/Yi-1.5-6B (few-shot),6061,64,4096,True,False,2867,3.53,38.15,5.32,0.98,20.39 bineric/NorskGPT-Llama-7B-v0.1 (few-shot),6738,32,4096,False,False,5384,3.53,34.62,12.72,-0.24,18.1 @@ -148,7 +148,7 @@ Maltehb/aelaectra-danish-electra-small-cased,14,32,512,True,False,4593,3.81,35.7 sarnikowski/convbert-small-da-cased,13,29,512,True,False,14273,3.83,25.49,24.23,1.63,5.28 timpal0l/Mistral-7B-v0.1-flashback-v2-instruct (few-shot),7242,32,4096,False,False,5172,3.83,24.98,13.57,1.18,8.52 nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,3.84,43.65,10.7,10.77,0.29 -AI-Sweden-Models/gpt-sw3-1.3b (few-shot),1445,64,2048,True,False,4608,3.87,1.42,4.18,0.75,23.33 +AI-Sweden-Models/gpt-sw3-1.3b (few-shot),1445,64,2048,True,False,4608,3.86,1.42,4.18,0.75,23.33 Maltehb/aelaectra-danish-electra-small-uncased,14,32,512,True,False,5995,3.87,30.5,25.53,3.59,0.06 dbmdz/bert-tiny-historic-multilingual-cased,5,32,512,True,False,78027,3.87,43.93,9.46,0.04,6.13 Qwen/Qwen1.5-4B (few-shot),3950,152,32768,True,False,3248,3.88,15.66,9.17,-0.55,14.11 @@ -158,15 +158,15 @@ ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,102 HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,3.9,20.5,10.09,0.83,10.84 HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,3.91,26.23,6.86,2.69,10.84 ibm-granite/granite-3.0-2b-base (few-shot),2534,49,4096,True,False,10187,3.93,29.51,1.7,-0.32,12.36 +openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.93,26.58,-0.79,0.63,15.14 ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,3.94,33.57,0.6,0.0,11.27 ibm-granite/granite-7b-base (few-shot),6738,32,4096,True,False,4405,3.94,28.06,4.92,0.1,11.07 -openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.94,26.58,-0.79,0.63,15.14 stabilityai/stablelm-2-1_6b (few-shot),1645,100,4096,True,False,7259,3.97,32.19,8.99,0.37,6.58 alexanderfalk/danbert-small-cased,83,52,514,True,False,30013,3.98,12.39,29.54,1.63,1.7 Maltehb/danish-bert-botxo,111,32,512,True,False,16091,4.03,12.64,22.02,0.06,4.77 +fresh-xlm-roberta-base,278,250,514,True,False,2214,4.06,17.34,25.25,-0.06,1.02 ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,4.06,18.07,0.65,-0.72,12.27 fresh-electra-small,14,31,512,True,False,7840,4.07,9.96,30.18,-0.1,0.12 -fresh-xlm-roberta-base,278,250,514,True,False,2214,4.07,17.34,25.25,-0.06,1.02 PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,4.08,22.56,0.53,-0.26,11.77 meta-llama/Llama-3.2-1B (few-shot),1236,128,131072,True,False,7577,4.08,17.77,7.64,-0.35,8.15 google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,4.1,20.49,-0.75,-0.01,10.95 @@ -181,14 +181,15 @@ PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,4.24,18.86,-0.6 state-spaces/mamba-2.8b-hf (few-shot),2768,50,-1,True,False,2722,4.25,18.38,-1.7,0.49,6.3 HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.27,13.43,3.82,1.14,3.71 HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.29,13.6,3.12,0.28,4.09 -PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.34,17.73,2.38,-0.18,1.59 +PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.33,17.73,2.38,-0.18,1.59 Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,4.36,16.2,0.1,-0.57,3.31 ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,4.36,5.62,4.82,-0.2,4.94 HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.37,14.74,3.13,-0.25,1.35 +tiiuae/Falcon3-1B-Instruct (few-shot),1669,131,8192,True,False,9270,4.37,9.39,6.44,-0.72,3.34 PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.38,13.8,2.17,-0.63,1.29 HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.4,13.7,3.01,-0.83,0.94 Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,4.41,9.5,1.63,1.76,3.14 -RuterNorway/Llama-2-7b-chat-norwegian (few-shot),6738,32,4096,False,False,10890,4.42,9.48,3.32,0.07,1.04 +RuterNorway/Llama-2-7b-chat-norwegian (few-shot),6738,32,4096,False,False,10890,4.41,9.48,3.32,0.07,1.04 Qwen/Qwen1.5-1.8B (few-shot),1837,152,32768,True,False,5666,4.44,12.26,-9.69,0.94,6.31 NorwAI/NorwAI-Mistral-7B-pretrain (few-shot),7537,68,4096,True,False,3024,4.45,3.69,5.91,1.24,0.29 RJuro/kanelsnegl-v0.1 (few-shot),7242,32,4096,True,False,5847,4.49,0.0,10.54,0.0,0.0 diff --git a/icelandic-nlu.md b/icelandic-nlu.md index 99f12a60..cf08a3ad 100644 --- a/icelandic-nlu.md +++ b/icelandic-nlu.md @@ -3,7 +3,7 @@ layout: leaderboard title: Icelandic NLU 🇮🇸 --- -
Last updated: 10/01/2025 12:30:20 CET
+
Last updated: 11/01/2025 11:03:30 CET
@@ -177,7 +177,7 @@ title: Icelandic NLU 🇮🇸 514 True 15,623 ± 2,828 / 3,737 ± 1,191 - 1.76 + 1.77 85.04 ± 0.95 / 80.51 ± 1.10 48.45 ± 1.37 / 64.80 ± 1.08 50.78 ± 2.21 / 73.10 ± 1.10 @@ -669,7 +669,7 @@ title: Icelandic NLU 🇮🇸 128 8192 True - 1,007 ± 316 / 162 ± 45 + 1,483 ± 377 / 287 ± 97 2.53 60.20 ± 2.76 / 40.38 ± 4.22 38.09 ± 2.38 / 54.51 ± 2.16 @@ -703,7 +703,7 @@ title: Icelandic NLU 🇮🇸 128 8192 True - 1,335 ± 338 / 260 ± 88 + 1,477 ± 376 / 285 ± 97 2.58 50.45 ± 1.95 / 37.62 ± 3.95 34.68 ± 3.74 / 53.39 ± 3.75 @@ -754,7 +754,7 @@ title: Icelandic NLU 🇮🇸 128 131072 True - 1,005 ± 330 / 196 ± 74 + 1,473 ± 377 / 283 ± 96 2.66 46.48 ± 1.98 / 24.57 ± 1.73 39.91 ± 2.35 / 57.39 ± 1.64 @@ -1020,23 +1020,6 @@ title: Icelandic NLU 🇮🇸 14.0.4 14.0.4 - - mistralai/Mistral-7B-v0.3 (few-shot) - 7248 - 33 - 32768 - True - 1,364 ± 343 / 266 ± 90 - 3.06 - 46.73 ± 3.51 / 38.01 ± 4.18 - 26.28 ± 4.82 / 46.42 ± 4.24 - 1.50 ± 1.18 / 36.24 ± 1.65 - 25.17 ± 3.82 / 50.14 ± 2.43 - 14.0.3 - 14.1.2 - 14.1.2 - 14.0.3 - microsoft/infoxlm-base 278 @@ -1054,6 +1037,23 @@ title: Icelandic NLU 🇮🇸 0.0.0 0.0.0 + + mistralai/Mistral-7B-v0.3 (few-shot) + 7248 + 33 + 32768 + True + 1,364 ± 343 / 266 ± 90 + 3.07 + 46.73 ± 3.51 / 38.01 ± 4.18 + 26.28 ± 4.82 / 46.42 ± 4.24 + 1.50 ± 1.18 / 36.24 ± 1.65 + 25.17 ± 3.82 / 50.14 ± 2.43 + 14.0.3 + 14.1.2 + 14.1.2 + 14.0.3 + KennethEnevoldsen/dfm-sentence-encoder-medium 124 @@ -1564,23 +1564,6 @@ title: Icelandic NLU 🇮🇸 12.7.0 12.7.0 - - KBLab/megatron-bert-large-swedish-cased-110k - 370 - 64 - 512 - True - 7,075 ± 1,093 / 2,057 ± 661 - 3.36 - 63.11 ± 1.31 / 65.36 ± 1.28 - 26.38 ± 1.89 / 48.24 ± 1.26 - 3.47 ± 1.38 / 48.04 ± 2.34 - 7.76 ± 0.57 / 36.28 ± 1.35 - 0.0.0 - 13.3.0 - 0.0.0 - 0.0.0 - NbAiLab/nb-llama-3.1-70B (few-shot) 70554 @@ -1615,6 +1598,23 @@ title: Icelandic NLU 🇮🇸 13.0.0 13.0.0 + + KBLab/megatron-bert-large-swedish-cased-110k + 370 + 64 + 512 + True + 7,075 ± 1,093 / 2,057 ± 661 + 3.37 + 63.11 ± 1.31 / 65.36 ± 1.28 + 26.38 ± 1.89 / 48.24 ± 1.26 + 3.47 ± 1.38 / 48.04 ± 2.34 + 7.76 ± 0.57 / 36.28 ± 1.35 + 0.0.0 + 13.3.0 + 0.0.0 + 0.0.0 + occiglot/occiglot-7b-eu5-instruct (few-shot) 7242 @@ -1978,7 +1978,7 @@ title: Icelandic NLU 🇮🇸 32 2047 True - 3,194 ± 687 / 650 ± 216 + 8,681 ± 1,650 / 2,177 ± 717 3.50 33.05 ± 4.29 / 29.75 ± 3.67 14.42 ± 3.18 / 37.82 ± 2.65 @@ -2574,7 +2574,7 @@ title: Icelandic NLU 🇮🇸 2048 True 4,608 ± 988 / 1,115 ± 354 - 3.87 + 3.86 1.42 ± 1.60 / 3.11 ± 1.85 4.18 ± 2.03 / 17.89 ± 1.60 0.75 ± 0.73 / 45.87 ± 2.20 @@ -2737,6 +2737,23 @@ title: Icelandic NLU 🇮🇸 13.0.0 13.0.0 + + openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot) + 7453 + 251 + 4096 + True + 1,438 ± 410 / 233 ± 79 + 3.93 + 26.58 ± 1.47 / 22.13 ± 2.38 + -0.79 ± 2.98 / 26.06 ± 1.66 + 0.63 ± 1.36 / 43.80 ± 3.59 + 15.14 ± 2.49 / 44.23 ± 2.12 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + ibm-granite/granite-3b-code-instruct-2k (few-shot) 3483 @@ -2771,23 +2788,6 @@ title: Icelandic NLU 🇮🇸 12.10.5 12.10.5 - - openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot) - 7453 - 251 - 4096 - True - 1,438 ± 410 / 233 ± 79 - 3.94 - 26.58 ± 1.47 / 22.13 ± 2.38 - -0.79 ± 2.98 / 26.06 ± 1.66 - 0.63 ± 1.36 / 43.80 ± 3.59 - 15.14 ± 2.49 / 44.23 ± 2.12 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - stabilityai/stablelm-2-1_6b (few-shot) 1645 @@ -2839,6 +2839,23 @@ title: Icelandic NLU 🇮🇸 0.0.0 0.0.0 + + fresh-xlm-roberta-base + 278 + 250 + 514 + True + 2,214 ± 94 / 1,494 ± 229 + 4.06 + 17.34 ± 1.13 / 16.43 ± 1.26 + 25.25 ± 3.12 / 44.40 ± 4.52 + -0.06 ± 0.99 / 36.73 ± 3.00 + 1.02 ± 0.30 / 21.61 ± 1.10 + 0.0.0 + 13.3.0 + 0.0.0 + 0.0.0 + ibm-granite/granite-3.0-3b-a800m-base (few-shot) 3374 @@ -2873,23 +2890,6 @@ title: Icelandic NLU 🇮🇸 0.0.0 0.0.0 - - fresh-xlm-roberta-base - 278 - 250 - 514 - True - 2,214 ± 94 / 1,494 ± 229 - 4.07 - 17.34 ± 1.13 / 16.43 ± 1.26 - 25.25 ± 3.12 / 44.40 ± 4.52 - -0.06 ± 0.99 / 36.73 ± 3.00 - 1.02 ± 0.30 / 21.61 ± 1.10 - 0.0.0 - 13.3.0 - 0.0.0 - 0.0.0 - PleIAs/Pleias-1.2b-Preview (few-shot) 1195 @@ -3135,7 +3135,7 @@ title: Icelandic NLU 🇮🇸 2048 True 10,242 ± 3,432 / 1,335 ± 484 - 4.34 + 4.33 17.73 ± 1.58 / 18.22 ± 1.53 2.38 ± 2.00 / 21.79 ± 2.03 -0.18 ± 1.21 / 37.94 ± 4.17 @@ -3196,6 +3196,23 @@ title: Icelandic NLU 🇮🇸 13.1.0 13.1.0 + + tiiuae/Falcon3-1B-Instruct (few-shot) + 1669 + 131 + 8192 + True + 9,270 ± 2,690 / 1,434 ± 437 + 4.37 + 9.39 ± 3.31 / 9.28 ± 3.38 + 6.44 ± 2.00 / 22.24 ± 3.62 + -0.72 ± 1.67 / 43.21 ± 3.74 + 3.34 ± 0.98 / 21.41 ± 2.93 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + PleIAs/Pleias-Pico (few-shot) 353 @@ -3254,7 +3271,7 @@ title: Icelandic NLU 🇮🇸 4096 False 10,890 ± 2,686 / 2,186 ± 750 - 4.42 + 4.41 9.48 ± 1.48 / 10.10 ± 1.44 3.32 ± 3.76 / 21.42 ± 3.52 0.07 ± 1.06 / 43.54 ± 3.63 diff --git a/mainland-scandinavian-nlg.csv b/mainland-scandinavian-nlg.csv index 2922946d..5c4209b4 100644 --- a/mainland-scandinavian-nlg.csv +++ b/mainland-scandinavian-nlg.csv @@ -1,6 +1,6 @@ model_id,num_model_parameters,vocabulary_size,max_sequence_length,commercially_licensed,merge,speed,rank,da_rank,no_rank,sv_rank,dansk,angry_tweets,scala_da,scandiqa_da,nordjylland_news,danske_talemaader,danish_citizen_tests,hellaswag_da,norne_nb,norne_nn,norec,no_sammendrag,scala_nb,scala_nn,norquad,mmlu_no,hellaswag_no,suc3,swerec,scala_sv,scandiqa_sv,swedn,mmlu_sv,hellaswag_sv "gpt-4-0613 (few-shot, val)",-1,100,8192,True,False,597,1.21,1.19,1.24,1.19,64.94,59.97,71.56,56.43,66.76,94.93,93.98,81.64,81.16,75.75,72.72,65.92,77.3,57.18,47.5,68.77,88.3,76.86,79.19,80.93,53.81,67.83,72.53,85.67 -"gpt-4-1106-preview (few-shot, val)",-1,100,128000,True,False,576,1.27,1.19,1.39,1.24,66.8,61.62,66.84,56.85,66.21,95.21,97.19,78.74,77.48,78.7,62.55,63.6,74.45,56.31,44.67,70.84,86.3,74.45,77.59,71.35,56.56,66.08,71.32,84.09 +"gpt-4-1106-preview (few-shot, val)",-1,100,128000,True,False,576,1.28,1.19,1.4,1.24,66.8,61.62,66.84,56.85,66.21,95.21,97.19,78.74,77.48,78.7,62.55,63.6,74.45,56.31,44.67,70.84,86.3,74.45,77.59,71.35,56.56,66.08,71.32,84.09 "gpt-4o-2024-05-13 (few-shot, val)",-1,200,128000,True,False,916,1.3,1.24,1.38,1.28,71.15,49.42,64.59,57.35,66.03,96.41,97.68,85.96,79.07,81.56,66.66,63.25,64.53,54.7,43.51,73.81,89.91,76.66,77.16,68.99,57.96,66.0,70.7,86.3 meta-llama/Llama-3.1-405B-Instruct-FP8 (few-shot),405869,128,131072,True,False,799,1.34,1.15,1.61,1.25,71.94,61.26,64.94,56.0,67.44,89.91,96.37,81.66,82.44,82.17,40.55,66.76,63.91,45.93,45.33,73.55,81.37,76.27,80.7,68.85,56.41,67.18,75.85,81.49 "meta-llama/Meta-Llama-3-70B (few-shot, val)",70554,128,8192,True,False,312,1.44,1.42,1.48,1.42,63.62,60.19,50.07,60.97,67.33,83.94,78.83,60.69,75.31,75.94,66.74,65.78,59.82,47.56,60.87,62.45,65.29,74.61,78.61,63.2,61.98,67.6,61.55,66.21 @@ -12,120 +12,120 @@ Qwen/Qwen2.5-72B-Instruct (few-shot),72706,152,32768,True,False,1219,1.61,1.46,1 ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot),28411,256,4096,True,False,3633,1.69,1.46,1.93,1.69,66.5,58.93,57.27,55.02,66.59,81.37,83.16,67.83,76.25,77.91,40.54,64.53,59.75,47.82,40.99,56.11,67.72,62.91,79.51,60.28,55.44,65.24,56.4,65.89 Qwen/QwQ-32B-Preview (few-shot),32764,152,32768,True,False,2258,1.74,1.6,2.01,1.6,64.66,53.42,53.93,55.55,64.54,72.75,78.49,74.11,75.68,75.89,38.41,62.79,56.42,39.34,44.35,61.53,67.36,69.73,78.76,57.57,56.43,65.53,64.02,67.11 "meta-llama/Meta-Llama-3-70B-Instruct (few-shot, val)",70554,128,8192,True,False,1673,1.75,1.61,1.8,1.84,63.1,53.09,40.98,51.13,67.76,85.18,79.22,70.19,80.5,76.47,59.29,65.7,47.28,32.76,39.71,63.58,63.41,77.06,53.56,47.5,46.86,68.25,61.31,66.73 -google/gemma-2-9b-it (few-shot),9242,256,8193,True,False,2062,1.76,1.66,1.85,1.78,49.05,52.26,58.38,55.34,67.67,80.62,75.37,66.17,61.82,60.41,61.06,65.94,62.46,52.99,39.1,49.45,67.96,52.5,78.51,61.28,55.22,66.3,51.58,66.61 +google/gemma-2-9b-it (few-shot),9242,256,8193,True,False,2062,1.76,1.65,1.85,1.78,49.05,52.26,58.38,55.34,67.67,80.62,75.37,66.17,61.82,60.41,61.06,65.94,62.46,52.99,39.1,49.45,67.96,52.5,78.51,61.28,55.22,66.3,51.58,66.61 "gpt-3.5-turbo-0613 (few-shot, val)",-1,100,4094,True,False,921,1.78,1.57,1.96,1.82,61.31,52.52,57.63,57.03,66.38,81.95,77.66,62.21,77.7,73.92,58.88,64.18,54.29,32.82,45.35,40.26,59.02,73.04,72.77,58.06,58.02,66.92,40.73,50.51 "152334H/miqu-1-70b-sf (few-shot, val)",68977,32,32764,True,False,2126,1.84,1.69,2.09,1.74,56.96,55.11,42.64,54.58,66.8,83.94,73.32,60.52,66.75,66.81,60.58,64.64,47.53,17.14,41.92,51.01,58.23,62.96,75.25,53.28,56.42,67.6,53.56,59.7 "gpt-4-1106-preview (zero-shot, val)",-1,100,8191,True,False,436,1.86,1.74,2.13,1.7,35.79,53.69,62.98,51.96,64.56,87.34,98.11,67.56,60.16,48.74,39.62,62.75,71.38,42.94,36.04,68.89,59.02,51.31,73.54,66.39,52.22,65.32,69.31,63.92 -"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,1.89,1.71,1.93,2.03,59.96,56.91,67.13,17.52,65.32,91.7,88.38,71.08,72.74,69.17,67.45,63.0,74.27,54.83,3.67,59.93,76.01,62.45,77.69,68.93,12.11,66.04,55.8,63.61 +"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,1.89,1.73,1.92,2.03,59.96,56.91,67.13,17.52,65.32,91.7,88.38,71.08,72.74,69.17,67.45,63.0,74.27,54.83,3.67,59.93,76.01,62.45,77.69,68.93,12.11,66.04,55.8,63.61 "meta-llama/Llama-2-70b-hf (few-shot, val)",68977,32,4096,True,False,1892,1.89,1.69,2.12,1.85,58.06,53.24,39.71,62.51,67.39,84.43,71.35,54.31,62.46,64.68,59.68,66.09,27.34,3.95,57.44,43.88,53.85,64.76,75.46,43.27,63.04,68.43,46.16,50.41 google/gemma-2-9b (few-shot),9242,256,8193,True,False,2038,1.97,1.96,2.03,1.91,44.16,38.84,43.42,60.11,67.46,81.68,77.77,50.04,54.08,54.33,62.54,65.67,50.88,41.01,46.25,50.81,53.37,50.43,80.55,50.86,59.35,65.63,52.06,49.8 "gpt-4o-mini-2024-07-18 (zero-shot, val)",-1,200,8191,True,False,908,1.97,1.71,2.22,1.99,57.52,49.73,57.56,51.79,64.66,79.84,83.21,63.33,60.43,55.59,39.82,62.77,54.84,33.8,36.55,56.96,68.7,52.47,73.55,52.27,48.95,65.59,54.3,54.13 timpal0l/sol (few-shot),10732,32,4096,False,False,3701,2.05,1.98,2.15,2.01,54.91,44.38,21.11,58.96,66.89,61.12,64.86,67.96,65.14,65.88,57.06,66.21,26.41,19.58,51.6,36.06,64.97,57.51,77.31,25.06,60.16,65.22,39.52,70.93 "claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,2.08,1.72,2.39,2.12,65.88,63.61,71.03,46.24,64.6,84.02,85.34,23.71,74.23,70.5,50.92,62.57,76.1,72.03,40.57,24.04,-0.8,70.22,77.7,74.34,49.32,65.36,28.34,13.68 nvidia/Llama-3.1-Nemotron-70B-Instruct-HF (few-shot),70554,128,131072,True,False,1208,2.09,2.09,2.27,1.9,58.34,59.14,56.46,39.77,47.41,85.69,91.37,74.3,69.21,70.45,39.87,47.52,57.8,40.31,40.97,63.81,73.01,58.65,81.81,63.69,42.29,50.67,68.28,72.36 -upstage/SOLAR-10.7B-v1.0 (few-shot),10732,32,4096,True,False,3780,2.09,1.96,2.26,2.06,58.03,46.63,15.09,62.15,66.81,64.79,68.59,65.29,68.11,68.19,55.33,65.51,10.15,7.51,55.33,35.57,62.76,59.65,77.48,16.94,62.65,65.19,39.82,68.87 -mistralai/Mistral-Nemo-Instruct-2407 (few-shot),12248,131,1024001,True,False,7095,2.12,2.02,2.2,2.13,53.95,48.97,31.78,56.44,66.24,67.09,72.87,42.77,70.14,68.74,60.64,64.61,35.59,29.22,49.87,39.07,37.95,62.86,70.54,37.5,58.0,65.93,40.58,42.99 +upstage/SOLAR-10.7B-v1.0 (few-shot),10732,32,4096,True,False,3780,2.09,1.96,2.25,2.06,58.03,46.63,15.09,62.15,66.81,64.79,68.59,65.29,68.11,68.19,55.33,65.51,10.15,7.51,55.33,35.57,62.76,59.65,77.48,16.94,62.65,65.19,39.82,68.87 +mistralai/Mistral-Nemo-Instruct-2407 (few-shot),12248,131,1024001,True,False,7095,2.12,2.02,2.21,2.13,53.95,48.97,31.78,56.44,66.24,67.09,72.87,42.77,70.14,68.74,60.64,64.61,35.59,29.22,49.87,39.07,37.95,62.86,70.54,37.5,58.0,65.93,40.58,42.99 mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot),341029,256,4096,True,False,1904,2.16,2.14,2.25,2.1,48.24,39.52,62.92,36.92,56.79,67.68,84.85,68.82,56.41,55.6,25.18,59.28,62.56,53.09,42.57,45.67,73.86,48.92,62.08,68.93,36.4,61.39,48.18,56.87 CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,2.24,2.03,2.54,2.14,63.63,50.82,35.58,54.33,65.71,58.67,72.01,33.61,73.2,72.26,35.8,63.37,36.86,23.4,40.32,38.62,40.5,65.01,77.68,34.06,56.78,66.08,39.39,37.81 -"RJuro/munin-neuralbeagle-7b (few-shot, val)",7242,32,32768,False,True,2493,2.27,2.01,2.51,2.3,51.44,54.91,22.77,56.51,68.06,74.24,70.86,41.49,61.18,65.16,55.61,65.99,20.84,9.12,42.92,27.77,39.67,62.96,77.13,15.73,58.43,67.58,32.54,34.94 +"RJuro/munin-neuralbeagle-7b (few-shot, val)",7242,32,32768,False,True,2493,2.28,2.01,2.52,2.3,51.44,54.91,22.77,56.51,68.06,74.24,70.86,41.49,61.18,65.16,55.61,65.99,20.84,9.12,42.92,27.77,39.67,62.96,77.13,15.73,58.43,67.58,32.54,34.94 skole-gpt-mixtral (few-shot),-1,32,32768,False,False,3583,2.28,2.09,2.5,2.25,49.17,51.51,32.04,58.52,66.83,65.77,77.56,30.34,62.52,61.55,52.09,64.7,21.99,16.84,47.3,39.99,32.16,54.14,78.27,32.49,58.95,65.86,43.62,28.84 mhenrichsen/danskgpt-chat-v2.1 (few-shot),-1,32,32768,True,False,5085,2.33,1.93,2.6,2.45,51.08,54.69,30.95,56.56,66.9,79.39,73.22,48.16,62.43,60.68,53.41,63.65,-1.16,0.3,49.15,38.47,40.13,54.37,75.98,17.98,55.07,64.42,32.81,36.24 -AI-Sweden-Models/Llama-3-8B-instruct (few-shot),8030,128,8192,False,False,1472,2.34,2.16,2.85,2.0,58.57,51.63,28.82,51.98,64.11,53.8,60.74,42.86,81.96,78.42,35.3,63.15,12.11,5.94,22.15,28.72,44.75,84.77,80.1,29.17,54.68,66.01,32.11,47.49 -meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1005,2.34,2.09,2.63,2.3,54.7,54.81,32.11,48.87,66.79,56.14,63.54,39.48,64.55,66.44,35.17,64.48,27.41,15.6,43.11,38.1,39.3,55.8,79.23,32.67,46.88,66.43,36.35,37.89 -"timpal0l/BeagleCatMunin2 (few-shot, val)",7242,32,32768,False,True,2477,2.34,2.13,2.56,2.33,51.53,47.95,14.1,58.28,68.04,67.37,66.75,42.21,61.17,65.44,58.69,66.08,15.03,5.95,42.42,27.31,41.63,60.87,73.72,6.78,58.75,68.06,33.71,41.45 -NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,2.35,2.32,2.45,2.27,49.18,49.76,41.28,12.83,67.44,77.55,74.67,36.42,58.05,59.65,57.94,65.07,51.36,42.84,14.72,43.18,37.58,57.66,80.04,45.21,52.73,59.91,42.25,28.88 -four-two-labs/orpo-llama-3-swe (few-shot),8030,128,8192,False,False,4974,2.36,2.26,2.54,2.27,46.75,51.73,24.73,59.97,65.21,55.72,61.59,25.43,61.63,61.3,48.85,63.67,24.15,21.33,53.66,33.52,26.04,60.93,79.74,26.02,59.84,64.99,36.35,27.22 -"meta-llama/Llama-2-70b-chat-hf (few-shot, val)",68977,32,4096,True,False,1979,2.36,2.14,2.61,2.33,52.22,50.66,23.57,53.82,67.13,69.26,46.4,39.72,60.21,62.99,55.12,65.11,27.12,6.82,38.5,32.3,34.43,55.91,64.52,23.85,58.88,67.57,37.6,31.78 -mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,2.37,2.11,2.69,2.32,52.62,50.07,37.37,54.87,66.46,69.0,79.86,24.68,63.92,62.15,46.68,63.04,33.38,19.99,31.87,38.91,20.29,60.92,79.78,34.88,50.35,62.82,43.74,19.86 -"timpal0l/BeagleCatMunin (few-shot, val)",7242,32,32768,False,True,2495,2.37,2.16,2.7,2.24,47.62,54.73,21.8,57.26,67.31,66.49,60.41,28.51,54.04,62.21,54.74,65.6,14.51,5.38,42.83,25.82,32.01,50.53,77.37,27.84,59.98,67.89,34.8,36.65 -"birgermoell/Munin-NeuralBeagle-NorskGPT (few-shot, val)",7242,32,32768,False,True,2903,2.38,2.27,2.48,2.4,51.85,44.02,1.22,57.69,67.69,68.45,65.51,42.09,63.33,68.84,58.28,65.94,18.65,10.72,44.39,26.61,46.64,63.85,73.72,-0.56,60.1,68.11,27.79,42.43 -"birgermoell/WestLake-Munin-Cat-NorskGPT (few-shot, val)",7242,32,32768,False,True,2856,2.38,2.27,2.48,2.4,51.85,44.02,1.22,57.69,67.69,68.45,65.51,42.09,63.33,68.84,58.28,65.94,18.65,10.72,44.39,26.61,46.64,63.85,73.72,-0.56,60.1,68.11,27.79,42.43 -"timpal0l/dolphin-2.9-llama3-8b-flashback (few-shot, val)",8030,128,8192,False,True,5018,2.38,2.26,2.61,2.26,51.37,52.17,27.98,51.65,66.25,57.31,54.17,27.65,64.51,65.66,52.9,64.12,29.34,17.42,38.49,25.77,31.8,65.33,74.99,32.65,55.71,66.53,33.16,32.51 +AI-Sweden-Models/Llama-3-8B-instruct (few-shot),8030,128,8192,False,False,1472,2.34,2.16,2.86,2.0,58.57,51.63,28.82,51.98,64.11,53.8,60.74,42.86,81.96,78.42,35.3,63.15,12.11,5.94,22.15,28.72,44.75,84.77,80.1,29.17,54.68,66.01,32.11,47.49 +meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1473,2.35,2.09,2.66,2.3,54.7,54.81,32.11,48.87,66.79,56.14,63.54,39.48,64.55,66.44,35.17,64.48,27.41,15.6,43.11,35.02,37.61,55.8,79.23,32.67,46.88,66.43,36.35,37.89 +"timpal0l/BeagleCatMunin2 (few-shot, val)",7242,32,32768,False,True,2477,2.35,2.13,2.58,2.33,51.53,47.95,14.1,58.28,68.04,67.37,66.75,42.21,61.17,65.44,58.69,66.08,15.03,5.95,42.42,27.31,41.63,60.87,73.72,6.78,58.75,68.06,33.71,41.45 +NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,2.36,2.35,2.45,2.27,49.18,49.76,41.28,12.83,67.44,77.55,74.67,36.42,58.05,59.65,57.94,65.07,51.36,42.84,14.72,43.18,37.58,57.66,80.04,45.21,52.73,59.91,42.25,28.88 +four-two-labs/orpo-llama-3-swe (few-shot),8030,128,8192,False,False,4974,2.36,2.26,2.55,2.27,46.75,51.73,24.73,59.97,65.21,55.72,61.59,25.43,61.63,61.3,48.85,63.67,24.15,21.33,53.66,33.52,26.04,60.93,79.74,26.02,59.84,64.99,36.35,27.22 +"meta-llama/Llama-2-70b-chat-hf (few-shot, val)",68977,32,4096,True,False,1979,2.36,2.14,2.62,2.33,52.22,50.66,23.57,53.82,67.13,69.26,46.4,39.72,60.21,62.99,55.12,65.11,27.12,6.82,38.5,32.3,34.43,55.91,64.52,23.85,58.88,67.57,37.6,31.78 +"timpal0l/BeagleCatMunin (few-shot, val)",7242,32,32768,False,True,2495,2.36,2.16,2.69,2.24,47.62,54.73,21.8,57.26,67.31,66.49,60.41,28.51,54.04,62.21,54.74,65.6,14.51,5.38,42.83,25.82,32.01,50.53,77.37,27.84,59.98,67.89,34.8,36.65 +"timpal0l/dolphin-2.9-llama3-8b-flashback (few-shot, val)",8030,128,8192,False,True,5018,2.37,2.26,2.6,2.26,51.37,52.17,27.98,51.65,66.25,57.31,54.17,27.65,64.51,65.66,52.9,64.12,29.34,17.42,38.49,25.77,31.8,65.33,74.99,32.65,55.71,66.53,33.16,32.51 +"birgermoell/Munin-NeuralBeagle-NorskGPT (few-shot, val)",7242,32,32768,False,True,2903,2.38,2.27,2.47,2.4,51.85,44.02,1.22,57.69,67.69,68.45,65.51,42.09,63.33,68.84,58.28,65.94,18.65,10.72,44.39,26.61,46.64,63.85,73.72,-0.56,60.1,68.11,27.79,42.43 +"birgermoell/WestLake-Munin-Cat-NorskGPT (few-shot, val)",7242,32,32768,False,True,2856,2.38,2.27,2.47,2.4,51.85,44.02,1.22,57.69,67.69,68.45,65.51,42.09,63.33,68.84,58.28,65.94,18.65,10.72,44.39,26.61,46.64,63.85,73.72,-0.56,60.1,68.11,27.79,42.43 +mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,2.38,2.11,2.7,2.32,52.62,50.07,37.37,54.87,66.46,69.0,79.86,24.68,63.92,62.15,46.68,63.04,33.38,19.99,31.87,38.91,20.29,60.92,79.78,34.88,50.35,62.82,43.74,19.86 +"mlabonne/NeuralBeagle14-7B (few-shot, val)",7242,32,8192,False,True,2549,2.38,2.15,2.63,2.37,53.02,51.29,19.73,51.69,67.33,65.38,62.78,39.07,62.47,66.69,54.04,65.74,16.75,13.0,34.48,28.39,35.19,61.25,76.03,16.28,50.96,68.35,32.3,38.78 utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.38,2.11,2.67,2.35,34.0,53.97,32.21,57.1,67.55,80.75,74.77,36.86,40.91,42.91,52.62,65.77,9.7,11.98,47.36,36.97,37.64,40.59,76.02,33.98,56.98,66.33,40.09,36.27 -"mlabonne/NeuralBeagle14-7B (few-shot, val)",7242,32,8192,False,True,2549,2.39,2.15,2.64,2.37,53.02,51.29,19.73,51.69,67.33,65.38,62.78,39.07,62.47,66.69,54.04,65.74,16.75,13.0,34.48,28.39,35.19,61.25,76.03,16.28,50.96,68.35,32.3,38.78 -Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,2.4,2.4,2.36,2.44,51.94,51.97,29.99,38.99,64.49,35.95,59.76,34.63,66.22,64.14,55.48,65.32,26.13,17.32,49.75,29.72,46.78,56.28,77.51,23.25,47.09,65.58,31.52,39.95 -mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot),46703,32,32768,True,False,5535,2.4,2.16,2.69,2.36,51.2,50.95,33.44,46.85,64.66,67.12,76.78,39.93,55.02,57.37,36.76,61.59,30.73,18.96,41.01,39.07,40.48,46.15,80.33,32.89,46.51,66.04,42.98,35.33 +Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,2.4,2.4,2.37,2.44,51.94,51.97,29.99,38.99,64.49,35.95,59.76,34.63,66.22,64.14,55.48,65.32,26.13,17.32,49.75,29.72,46.78,56.28,77.51,23.25,47.09,65.58,31.52,39.95 NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,2.41,2.16,2.59,2.49,51.95,52.11,44.47,43.32,64.64,77.33,79.05,23.81,58.53,60.26,59.48,54.54,51.85,41.89,25.62,43.35,26.91,57.01,80.12,43.04,30.44,63.55,41.92,19.13 +mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot),46703,32,32768,True,False,5535,2.41,2.16,2.7,2.36,51.2,50.95,33.44,46.85,64.66,67.12,76.78,39.93,55.02,57.37,36.76,61.59,30.73,18.96,41.01,39.07,40.48,46.15,80.33,32.89,46.51,66.04,42.98,35.33 "birgermoell/BeagleCatMunin-Flashback-Bellman (few-shot, val)",7242,32,32768,False,True,2890,2.42,2.16,2.7,2.4,50.4,52.3,21.3,58.17,66.25,65.32,64.76,33.7,53.96,63.45,52.7,65.23,14.87,2.48,41.43,27.42,36.05,52.96,76.99,14.27,59.92,67.62,27.95,36.11 -CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,2.44,2.3,2.66,2.36,51.32,52.0,18.48,52.43,66.18,41.32,52.24,37.67,66.55,63.63,38.61,64.48,15.8,12.3,43.26,36.48,35.85,57.38,78.43,14.52,53.14,65.69,37.32,38.28 +CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,2.44,2.3,2.65,2.36,51.32,52.0,18.48,52.43,66.18,41.32,52.24,37.67,66.55,63.63,38.61,64.48,15.8,12.3,43.26,36.48,35.85,57.38,78.43,14.52,53.14,65.69,37.32,38.28 "RJuro/munin-neuralbeagle-SkoleGPTOpenOrca-7b (few-shot, val)",7242,32,32768,False,True,3008,2.44,2.19,2.76,2.38,50.83,43.41,19.72,57.87,66.77,72.92,68.11,40.62,53.68,61.92,47.78,64.23,0.91,1.24,47.76,28.59,42.57,59.36,72.04,22.38,57.96,65.13,29.81,35.59 +"birgermoell/Rapid-Cycling (few-shot, val)",7242,32,32768,False,True,2346,2.44,2.19,2.73,2.41,49.99,51.25,20.66,56.82,65.58,67.48,63.69,32.11,55.93,63.85,50.41,65.1,15.74,2.23,39.81,26.34,34.85,53.66,77.72,16.22,59.75,67.57,27.24,32.04 "merge-crew/da-sv-task-arithmetic (few-shot, val)",7242,32,32768,True,True,2500,2.44,2.19,2.78,2.34,46.06,51.51,27.68,57.78,66.62,68.64,66.17,23.95,49.69,61.78,55.87,64.94,2.99,-1.29,44.62,28.26,25.83,47.28,76.62,33.23,60.0,66.68,29.95,31.12 -"birgermoell/Rapid-Cycling (few-shot, val)",7242,32,32768,False,True,2346,2.45,2.19,2.74,2.41,49.99,51.25,20.66,56.82,65.58,67.48,63.69,32.11,55.93,63.85,50.41,65.1,15.74,2.23,39.81,26.34,34.85,53.66,77.72,16.22,59.75,67.57,27.24,32.04 "merge-crew/da-sv-slerp (few-shot, val)",7242,32,32768,True,True,2467,2.45,2.22,2.78,2.34,45.94,51.75,28.04,57.65,66.65,68.88,64.81,23.63,49.67,61.11,56.07,64.97,3.81,-1.29,44.98,28.63,25.43,46.57,76.53,33.43,59.87,66.76,28.89,30.36 -meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1007,2.45,2.28,2.65,2.41,57.74,48.43,27.12,46.76,66.36,57.87,50.42,29.17,74.47,72.93,34.44,63.98,27.77,20.35,42.9,33.44,30.91,69.67,59.93,27.63,49.84,66.6,33.54,30.32 -senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.45,2.27,2.62,2.46,52.61,49.81,19.64,48.03,66.67,57.65,51.99,44.44,64.37,62.77,50.6,65.09,18.09,12.25,38.34,27.33,41.59,58.9,67.74,16.52,49.41,66.09,31.76,45.84 -"birgermoell/Flashback-Bellman (few-shot, val)",7242,32,32768,False,True,2887,2.46,2.34,2.72,2.33,47.71,48.21,19.55,56.46,65.46,61.29,60.29,28.33,56.44,66.56,53.24,64.96,11.96,2.5,39.21,26.64,31.14,55.29,78.29,18.45,58.42,67.54,29.44,37.45 +senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.45,2.27,2.63,2.46,52.61,49.81,19.64,48.03,66.67,57.65,51.99,44.44,64.37,62.77,50.6,65.09,18.09,12.25,38.34,27.33,41.59,58.9,67.74,16.52,49.41,66.09,31.76,45.84 +meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1483,2.46,2.28,2.69,2.41,57.74,48.43,27.12,46.76,66.36,57.87,50.42,29.17,66.56,68.29,34.47,63.8,28.22,18.21,47.34,30.78,31.49,69.67,59.93,27.63,49.84,66.6,33.54,30.32 +"birgermoell/Flashback-Bellman (few-shot, val)",7242,32,32768,False,True,2887,2.47,2.34,2.75,2.33,47.71,48.21,19.55,56.46,65.46,61.29,60.29,28.33,56.44,66.56,53.24,64.96,11.96,2.5,39.21,26.64,31.14,55.29,78.29,18.45,58.42,67.54,29.44,37.45 google/gemma-7b (few-shot),8538,256,8192,True,False,1378,2.5,2.41,2.77,2.33,19.59,46.55,32.64,59.4,66.63,74.32,65.58,24.74,26.43,32.66,41.82,64.02,25.82,20.16,52.68,39.96,27.82,43.68,77.72,36.25,58.62,64.44,39.94,25.96 "AI-Sweden-Models/tyr (few-shot, val)",7242,32,32768,False,True,6079,2.51,2.33,2.84,2.37,47.01,50.6,13.73,56.35,66.82,57.53,67.04,22.16,58.6,63.15,51.85,64.57,0.66,0.53,43.22,26.09,23.43,56.21,78.3,14.35,61.08,67.96,31.74,30.12 -mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,2.51,2.35,2.73,2.45,55.49,49.18,7.4,57.72,65.1,45.83,59.45,35.46,67.24,66.08,31.41,62.89,28.72,20.55,40.6,30.39,38.46,54.76,73.32,16.17,57.94,65.95,30.08,39.51 +"merge-crew/da-sv-dare-ties-density-0.9 (few-shot, val)",7242,32,32768,True,True,2443,2.51,2.38,2.76,2.4,45.61,53.73,17.08,56.67,66.14,60.58,57.89,16.45,48.24,61.5,49.4,64.56,24.12,13.2,47.93,26.21,17.0,46.61,76.38,34.16,58.77,66.77,29.77,25.38 +mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,2.51,2.35,2.74,2.45,55.49,49.18,7.4,57.72,65.1,45.83,59.45,35.46,67.24,66.08,31.41,62.89,28.72,20.55,40.6,30.39,38.46,54.76,73.32,16.17,57.94,65.95,30.08,39.51 nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,2.51,2.34,2.73,2.45,49.85,49.52,32.35,52.54,65.13,52.71,63.47,16.3,64.15,62.16,55.29,60.17,32.3,22.82,32.62,35.37,15.39,58.75,79.59,33.09,47.28,62.78,36.58,18.78 -danish-foundation-models/munin-7b-v0.1dev0 (few-shot),7242,32,8192,True,False,6113,2.52,2.27,2.77,2.52,39.12,36.47,26.76,58.75,67.89,91.32,79.92,24.76,50.43,54.2,39.21,65.46,20.51,11.66,51.57,28.97,25.41,47.1,73.05,30.29,57.39,64.69,27.4,21.08 -"merge-crew/da-sv-dare-ties-density-0.9 (few-shot, val)",7242,32,32768,True,True,2443,2.52,2.38,2.77,2.4,45.61,53.73,17.08,56.67,66.14,60.58,57.89,16.45,48.24,61.5,49.4,64.56,24.12,13.2,47.93,26.21,17.0,46.61,76.38,34.16,58.77,66.77,29.77,25.38 -meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1335,2.52,2.39,2.71,2.47,49.46,51.16,23.01,49.75,65.26,51.64,59.28,24.3,61.48,61.58,32.94,63.38,21.2,19.65,53.35,33.02,24.93,59.92,80.91,26.39,47.69,63.94,33.39,20.21 -"mlabonne/AlphaMonarch-7B (few-shot, val)",7242,32,8192,False,True,5340,2.52,2.32,2.74,2.51,52.72,49.11,16.09,46.28,66.62,60.03,59.83,40.4,61.9,66.92,48.8,64.72,19.53,9.83,30.27,28.18,36.2,60.53,67.03,15.1,42.46,67.94,27.51,42.29 +danish-foundation-models/munin-7b-v0.1dev0 (few-shot),7242,32,8192,True,False,6113,2.52,2.27,2.78,2.52,39.12,36.47,26.76,58.75,67.89,91.32,79.92,24.76,50.43,54.2,39.21,65.46,20.51,11.66,51.57,28.97,25.41,47.1,73.05,30.29,57.39,64.69,27.4,21.08 +"mlabonne/AlphaMonarch-7B (few-shot, val)",7242,32,8192,False,True,5340,2.52,2.32,2.73,2.51,52.72,49.11,16.09,46.28,66.62,60.03,59.83,40.4,61.9,66.92,48.8,64.72,19.53,9.83,30.27,28.18,36.2,60.53,67.03,15.1,42.46,67.94,27.51,42.29 timpal0l/njord-alpha (few-shot),7242,32,32768,True,False,5431,2.53,2.31,2.79,2.5,38.25,39.37,29.76,57.02,67.57,83.45,70.43,16.43,50.47,51.97,48.03,65.48,22.65,17.1,44.72,25.82,21.35,48.19,79.95,32.85,57.39,65.95,25.32,14.55 "KennethEnevoldsen/munin_mistral-7b (few-shot, val)",7242,32,32768,False,True,2543,2.55,2.31,2.76,2.58,46.7,47.52,8.04,60.05,67.18,70.49,66.28,25.13,51.82,62.55,56.37,63.74,6.04,-0.02,48.85,28.43,20.49,52.34,77.66,6.0,60.16,65.54,31.83,20.55 meta-llama/Llama-3.1-8B (few-shot),8030,128,131072,True,False,2986,2.55,2.34,2.85,2.46,50.92,47.86,29.19,48.38,64.89,51.88,62.31,27.37,65.17,60.22,34.02,62.16,32.48,18.38,33.06,32.4,24.89,62.19,80.31,30.29,42.78,64.14,35.1,23.18 "merge-crew/da-sv-ties (few-shot, val)",7242,32,32768,True,True,2457,2.57,2.42,2.83,2.45,45.39,51.95,13.25,58.51,66.33,57.16,60.06,13.62,47.61,60.57,44.46,64.59,23.99,11.6,47.02,27.13,15.65,48.36,76.57,20.94,59.07,66.59,31.44,26.04 -AI-Sweden-Models/Llama-3-8B (few-shot),8030,128,8192,True,False,4141,2.58,2.33,2.83,2.57,36.72,46.48,26.1,58.0,67.23,82.84,75.27,14.55,44.53,47.02,41.84,65.29,19.97,15.61,50.91,30.85,18.61,36.45,81.12,26.8,58.16,66.09,29.01,15.92 +AI-Sweden-Models/Llama-3-8B (few-shot),8030,128,8192,True,False,4141,2.58,2.33,2.84,2.57,36.72,46.48,26.1,58.0,67.23,82.84,75.27,14.55,44.53,47.02,41.84,65.29,19.97,15.61,50.91,30.85,18.61,36.45,81.12,26.8,58.16,66.09,29.01,15.92 bineric/NorskGPT-Llama3-8b (few-shot),8030,128,8192,False,False,3382,2.58,2.34,3.03,2.36,51.4,42.13,3.91,57.81,66.7,65.15,57.36,44.55,64.34,59.5,35.6,61.95,31.69,24.58,0.1,32.03,41.45,63.19,76.06,5.34,56.7,66.25,36.23,43.6 -bineric/NorskGPT-Mistral-7b (few-shot),7242,32,32768,False,False,1440,2.58,2.41,2.89,2.45,50.76,40.41,0.0,57.26,66.89,64.32,53.18,43.42,62.98,60.12,35.47,64.52,15.15,9.49,27.06,29.91,37.75,58.4,74.3,0.0,59.16,65.36,35.01,43.72 ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4097,True,False,2515,2.58,2.44,2.77,2.53,44.58,47.16,19.2,58.41,65.64,32.05,47.42,28.73,49.94,52.17,53.27,63.01,17.22,12.01,45.04,24.31,30.34,44.8,75.92,24.84,56.71,63.65,26.71,30.43 -Mabeck/Heidrun-Mistral-7B-chat (few-shot),7242,32,4096,False,False,1419,2.61,2.65,2.69,2.49,51.93,49.86,20.25,25.84,66.28,42.73,60.78,16.39,61.41,59.49,49.19,64.22,15.17,10.78,48.99,27.64,25.74,55.06,77.5,17.47,58.67,64.18,31.04,23.57 +bineric/NorskGPT-Mistral-7b (few-shot),7242,32,32768,False,False,1440,2.59,2.41,2.91,2.45,50.76,40.41,0.0,57.26,66.89,64.32,53.18,43.42,62.98,60.12,35.47,64.52,15.15,9.49,27.06,29.91,37.75,58.4,74.3,0.0,59.16,65.36,35.01,43.72 +Mabeck/Heidrun-Mistral-7B-chat (few-shot),7242,32,4096,False,False,1419,2.61,2.64,2.7,2.49,51.93,49.86,20.25,25.84,66.28,42.73,60.78,16.39,61.41,59.49,49.19,64.22,15.17,10.78,48.99,27.64,25.74,55.06,77.5,17.47,58.67,64.18,31.04,23.57 ThatsGroes/munin-SkoleGPTOpenOrca-7b-16bit (few-shot),7242,32,32768,False,False,3006,2.61,2.28,2.9,2.64,45.37,39.63,21.77,58.28,67.91,78.71,63.74,28.58,51.99,52.74,50.39,64.2,0.99,1.27,47.95,25.74,26.36,44.64,77.98,16.57,57.31,63.23,28.15,23.58 -"merge-crew/da-sv-dare-ties-density-0.6 (few-shot, val)",7242,32,32768,True,True,2515,2.62,2.48,2.87,2.5,46.03,49.59,12.72,57.03,65.24,59.88,59.51,16.86,47.26,59.35,54.93,64.25,9.0,5.26,45.95,21.89,15.32,45.12,78.74,19.74,60.15,66.41,31.24,22.3 -timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,2.62,2.52,2.92,2.42,42.43,47.82,16.51,56.95,65.43,50.76,50.82,14.47,48.97,51.52,49.05,63.32,14.37,9.96,44.07,25.07,15.56,44.14,80.14,34.23,57.07,65.15,33.24,25.5 +meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1477,2.61,2.39,2.96,2.47,49.46,51.16,23.01,49.75,65.26,51.64,59.28,24.3,62.89,56.18,33.07,61.95,30.73,20.57,30.77,30.95,21.98,59.92,80.91,26.39,47.69,63.94,33.39,20.21 +"merge-crew/da-sv-dare-ties-density-0.6 (few-shot, val)",7242,32,32768,True,True,2515,2.62,2.48,2.87,2.51,46.03,49.59,12.72,57.03,65.24,59.88,59.51,16.86,47.26,59.35,54.93,64.25,9.0,5.26,45.95,21.89,15.32,45.12,78.74,19.74,60.15,66.41,31.24,22.3 +timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,2.62,2.52,2.91,2.42,42.43,47.82,16.51,56.95,65.43,50.76,50.82,14.47,48.97,51.52,49.05,63.32,14.37,9.96,44.07,25.07,15.56,44.14,80.14,34.23,57.07,65.15,33.24,25.5 CohereForAI/c4ai-command-r-v01 (few-shot),34981,256,8192,False,False,1919,2.63,2.39,2.91,2.6,53.44,49.17,20.55,51.7,65.47,37.41,62.46,26.25,61.54,60.94,35.73,63.77,21.33,13.2,32.36,27.87,30.99,47.15,80.24,11.35,49.93,65.3,29.64,31.96 -timpal0l/Llama-3-8B-flashback-v1 (few-shot),8030,128,8192,True,False,3004,2.64,2.54,2.91,2.48,46.59,50.25,14.46,56.86,61.98,46.91,50.98,16.33,59.09,60.02,47.58,58.88,10.52,6.67,49.89,29.17,17.46,57.01,81.97,31.16,53.99,63.7,29.77,15.86 +timpal0l/Llama-3-8B-flashback-v1 (few-shot),8030,128,8192,True,False,3004,2.64,2.54,2.9,2.48,46.59,50.25,14.46,56.86,61.98,46.91,50.98,16.33,59.09,60.02,47.58,58.88,10.52,6.67,49.89,29.17,17.46,57.01,81.97,31.16,53.99,63.7,29.77,15.86 ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,2.66,2.54,2.84,2.59,44.92,49.31,10.14,57.34,66.02,33.71,43.55,21.34,53.79,56.13,51.36,62.4,6.83,8.09,48.01,24.55,26.71,44.94,76.78,16.96,56.83,65.09,26.57,24.62 -mhenrichsen/hestenettetLM (few-shot),7242,32,32768,True,False,1151,2.66,2.53,2.91,2.55,44.9,42.61,8.65,59.62,66.48,52.83,57.96,18.11,52.52,55.6,48.23,63.53,8.53,6.65,46.89,27.67,14.2,53.0,79.7,4.32,59.03,64.89,35.48,20.54 -mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,2.66,2.43,2.98,2.57,43.6,45.92,15.43,59.13,66.33,53.81,61.06,20.64,50.56,52.65,44.61,63.13,12.1,9.3,45.15,28.31,13.59,49.18,79.08,11.06,58.98,64.79,34.51,20.84 +mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,2.66,2.43,2.99,2.57,43.6,45.92,15.43,59.13,66.33,53.81,61.06,20.64,50.56,52.65,44.61,63.13,12.1,9.3,45.15,28.31,13.59,49.18,79.08,11.06,58.98,64.79,34.51,20.84 Mabeck/Heidrun-Mistral-7B-base (few-shot),7242,32,32768,True,False,3823,2.67,2.43,3.01,2.58,40.14,39.38,21.85,58.07,67.06,60.78,61.29,16.26,50.1,54.81,48.64,62.29,10.31,1.11,42.2,27.39,11.76,48.43,79.43,17.37,57.05,63.81,31.72,15.69 -alpindale/Mistral-7B-v0.2-hf (few-shot),7242,32,32768,True,False,1841,2.67,2.45,2.98,2.57,43.65,45.86,15.19,59.14,66.3,53.67,54.52,20.51,50.63,52.69,44.05,63.11,11.6,9.26,45.23,28.19,13.65,48.96,78.9,10.82,58.91,64.78,34.52,20.96 -bineric/NorskGPT-Llama-13B-v0.1 (few-shot),-1,32,4096,False,False,2856,2.67,2.53,2.81,2.67,44.17,44.28,3.11,55.59,66.63,59.51,50.89,25.32,56.72,57.62,48.86,64.86,9.87,6.9,41.27,24.51,31.41,49.26,79.05,0.22,56.78,65.99,25.56,28.26 -mistralai/Mistral-7B-v0.1 (few-shot),7242,32,32768,True,False,1446,2.67,2.53,2.94,2.55,45.42,43.16,8.79,59.43,66.47,53.26,58.26,18.53,52.0,55.12,47.25,63.49,8.66,6.8,46.86,27.78,10.88,53.34,80.0,4.61,58.99,64.87,35.52,19.67 +alpindale/Mistral-7B-v0.2-hf (few-shot),7242,32,32768,True,False,1841,2.67,2.45,2.99,2.57,43.65,45.86,15.19,59.14,66.3,53.67,54.52,20.51,50.63,52.69,44.05,63.11,11.6,9.26,45.23,28.19,13.65,48.96,78.9,10.82,58.91,64.78,34.52,20.96 +bineric/NorskGPT-Llama-13B-v0.1 (few-shot),-1,32,4096,False,False,2856,2.67,2.53,2.82,2.67,44.17,44.28,3.11,55.59,66.63,59.51,50.89,25.32,56.72,57.62,48.86,64.86,9.87,6.9,41.27,24.51,31.41,49.26,79.05,0.22,56.78,65.99,25.56,28.26 +mhenrichsen/hestenettetLM (few-shot),7242,32,32768,True,False,1151,2.67,2.53,2.91,2.56,44.9,42.61,8.65,59.62,66.48,52.83,57.96,18.11,52.52,55.6,48.23,63.53,8.53,6.65,46.89,27.67,14.2,53.0,79.7,4.32,59.03,64.89,35.48,20.54 meta-llama/Llama-2-13b-chat-hf (few-shot),13016,32,4096,True,False,2849,2.68,2.49,2.96,2.6,44.0,45.41,16.17,57.06,66.88,45.21,51.06,16.5,57.21,59.62,38.93,65.12,8.65,5.92,42.32,23.88,22.33,49.9,77.19,14.67,57.12,66.25,24.4,19.3 +mistralai/Mistral-7B-v0.1 (few-shot),7242,32,32768,True,False,1446,2.68,2.53,2.95,2.56,45.42,43.16,8.79,59.43,66.47,53.26,58.26,18.53,52.0,55.12,47.25,63.49,8.66,6.8,46.86,27.78,10.88,53.34,80.0,4.61,58.99,64.87,35.52,19.67 CohereForAI/aya-23-8B (few-shot),8028,256,8192,False,False,2707,2.71,2.53,3.01,2.59,47.08,47.16,8.41,58.83,65.03,36.64,39.24,27.29,60.94,59.61,35.73,62.45,6.18,4.0,46.52,20.14,27.5,60.04,76.21,7.54,58.6,63.0,20.97,28.96 danish-foundation-models/munin-7b-alpha (few-shot),7242,32,32768,True,False,6116,2.71,2.34,3.13,2.66,40.6,36.89,26.41,57.81,67.27,77.63,67.83,25.16,48.89,51.95,20.54,63.67,4.39,1.2,47.16,29.07,19.15,42.23,78.8,15.47,56.75,62.78,30.86,19.11 -RuterNorway/Llama-2-13b-chat-norwegian (few-shot),-1,32,4096,False,False,3254,2.74,2.54,2.97,2.71,43.17,43.4,11.08,56.81,67.46,52.94,41.65,17.57,58.61,60.4,41.36,65.33,6.52,3.95,38.93,23.32,22.3,50.85,74.17,7.51,57.32,65.2,23.92,17.67 +RuterNorway/Llama-2-13b-chat-norwegian (few-shot),-1,32,4096,False,False,3254,2.74,2.54,2.98,2.71,43.17,43.4,11.08,56.81,67.46,52.94,41.65,17.57,58.61,60.4,41.36,65.33,6.52,3.95,38.93,23.32,22.3,50.85,74.17,7.51,57.32,65.2,23.92,17.67 mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,2.74,2.5,3.03,2.7,44.89,48.09,19.06,51.56,66.84,51.6,35.85,22.21,53.42,54.34,38.79,64.43,17.06,11.0,35.74,20.37,21.16,47.92,62.9,19.95,52.51,66.11,25.6,21.75 -meta-llama/Llama-2-13b-hf (few-shot),13016,32,4096,True,False,2898,2.77,2.68,3.07,2.55,41.28,23.01,23.5,60.29,66.28,50.3,56.24,15.43,51.12,55.35,23.75,64.56,14.0,7.61,49.24,23.6,16.55,54.52,78.45,21.55,59.71,64.59,25.51,14.97 -google/gemma-2-2b-it (few-shot),2614,256,8193,True,False,5374,2.78,2.59,3.03,2.71,28.22,47.11,19.99,48.0,66.73,52.85,52.74,29.94,35.56,37.7,46.84,64.58,17.15,14.38,29.75,23.02,33.13,35.61,75.84,15.62,47.76,65.16,27.0,31.43 -bineric/NorskGPT-Llama-7B-v0.1 (few-shot),6738,32,4096,False,False,5384,2.81,2.69,2.88,2.85,41.63,47.73,0.0,54.25,66.02,47.16,38.28,22.27,56.18,56.96,50.94,64.21,8.19,5.55,41.35,21.27,26.81,53.95,60.91,0.32,55.28,63.73,20.96,25.76 +meta-llama/Llama-2-13b-hf (few-shot),13016,32,4096,True,False,2898,2.76,2.68,3.06,2.55,41.28,23.01,23.5,60.29,66.28,50.3,56.24,15.43,51.12,55.35,23.75,64.56,14.0,7.61,49.24,23.6,16.55,54.52,78.45,21.55,59.71,64.59,25.51,14.97 +google/gemma-2-2b-it (few-shot),2614,256,8193,True,False,5374,2.77,2.59,3.02,2.71,28.22,47.11,19.99,48.0,66.73,52.85,52.74,29.94,35.56,37.7,46.84,64.58,17.15,14.38,29.75,23.02,33.13,35.61,75.84,15.62,47.76,65.16,27.0,31.43 +bineric/NorskGPT-Llama-7B-v0.1 (few-shot),6738,32,4096,False,False,5384,2.8,2.69,2.87,2.85,41.63,47.73,0.0,54.25,66.02,47.16,38.28,22.27,56.18,56.96,50.94,64.21,8.19,5.55,41.35,21.27,26.81,53.95,60.91,0.32,55.28,63.73,20.96,25.76 occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,2.82,2.69,3.04,2.74,40.19,42.31,1.14,57.89,66.68,44.3,48.76,15.44,45.5,45.96,44.46,63.95,0.0,0.0,52.19,20.61,16.18,47.67,71.73,7.9,57.78,65.07,25.52,14.06 meta-llama/Llama-3.2-3B-Instruct (few-shot),3213,128,131073,False,False,10424,2.84,2.65,3.19,2.68,41.12,42.77,11.52,51.14,65.19,49.78,45.88,21.89,49.66,51.98,44.13,60.5,0.67,1.11,28.62,26.82,20.98,43.74,76.98,16.01,48.38,64.98,29.44,22.42 -NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,2.85,2.82,2.92,2.82,50.83,53.23,23.02,0.0,57.74,77.42,82.47,30.39,65.75,70.12,41.9,53.29,47.88,35.66,0.03,46.91,33.14,69.54,79.55,28.27,0.02,55.51,48.02,24.46 +NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,2.85,2.82,2.91,2.82,50.83,53.23,23.02,0.0,57.74,77.42,82.47,30.39,65.75,70.12,41.9,53.29,47.88,35.66,0.03,46.91,33.14,69.54,79.55,28.27,0.02,55.51,48.02,24.46 mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32768,False,False,634,2.87,2.69,3.1,2.82,37.93,44.49,14.09,51.38,65.8,45.07,35.36,14.85,50.08,51.27,43.65,62.39,14.09,8.28,37.23,20.44,15.87,45.01,73.33,11.59,52.12,63.1,24.03,15.37 neph1/bellman-7b-mistral-instruct-v0.2 (few-shot),7242,32,32768,False,False,2518,2.88,2.61,3.2,2.84,46.11,47.58,18.41,52.78,65.65,41.77,35.86,11.59,57.01,56.77,38.81,62.13,14.16,9.29,32.75,17.08,10.52,54.38,55.84,16.05,53.22,64.9,22.36,12.52 -occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,2.88,2.75,3.13,2.76,37.93,44.62,0.28,58.05,66.05,38.54,45.89,12.38,45.28,46.0,44.95,63.26,0.0,0.0,43.88,20.87,13.1,49.02,76.56,2.18,58.98,64.42,23.68,14.05 +occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,2.88,2.75,3.12,2.76,37.93,44.62,0.28,58.05,66.05,38.54,45.89,12.38,45.28,46.0,44.95,63.26,0.0,0.0,43.88,20.87,13.1,49.02,76.56,2.18,58.98,64.42,23.68,14.05 meta-llama/Llama-3.2-3B (few-shot),3213,128,131073,True,False,3713,2.9,2.79,3.17,2.73,41.13,38.9,9.6,56.85,63.35,45.46,46.77,12.19,49.57,52.13,39.96,58.99,3.2,3.72,45.54,21.88,14.24,51.06,77.76,5.88,57.43,62.73,27.05,13.3 -nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,2.9,2.83,3.07,2.8,49.01,47.95,32.89,0.0,62.7,51.21,62.3,33.79,63.7,62.53,34.35,61.42,31.53,22.71,0.06,35.46,36.47,48.51,78.68,29.18,0.0,65.35,40.16,39.81 -neuralmagic/Sparse-Llama-3.1-8B-2of4 (few-shot),8030,128,131072,True,False,2996,2.91,2.77,3.2,2.76,27.6,37.08,10.84,58.67,63.45,38.31,46.65,28.92,35.54,36.61,32.18,60.89,0.0,0.0,43.47,24.47,33.51,33.21,76.57,10.51,58.15,63.82,26.73,21.33 +nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,2.9,2.83,3.06,2.8,49.01,47.95,32.89,0.0,62.7,51.21,62.3,33.79,63.7,62.53,34.35,61.42,31.53,22.71,0.06,35.46,36.47,48.51,78.68,29.18,0.0,65.35,40.16,39.81 +neuralmagic/Sparse-Llama-3.1-8B-2of4 (few-shot),8030,128,131072,True,False,2996,2.91,2.77,3.21,2.76,27.6,37.08,10.84,58.67,63.45,38.31,46.65,28.92,35.54,36.61,32.18,60.89,0.0,0.0,43.47,24.47,33.51,33.21,76.57,10.51,58.15,63.82,26.73,21.33 "claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,2.94,2.4,3.4,3.03,59.48,56.46,20.57,38.23,64.38,53.54,69.09,21.03,69.39,62.76,3.97,62.53,31.65,5.86,36.65,16.62,2.63,57.06,59.89,9.3,39.97,65.29,16.34,0.0 -meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4096,False,False,2643,2.98,2.76,3.27,2.91,35.44,44.88,9.74,55.04,66.15,32.17,35.74,11.32,44.99,49.09,41.56,63.59,3.04,4.03,33.77,14.81,12.69,39.72,66.18,6.74,54.05,65.92,17.73,12.85 +meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4096,False,False,2643,2.98,2.76,3.27,2.92,35.44,44.88,9.74,55.04,66.15,32.17,35.74,11.32,44.99,49.09,41.56,63.59,3.04,4.03,33.77,14.81,12.69,39.72,66.18,6.74,54.05,65.92,17.73,12.85 ibm-granite/granite-8b-code-base-4k (few-shot),8055,49,4096,True,False,2313,2.99,2.88,3.18,2.91,48.44,39.07,9.72,51.18,63.93,33.11,18.96,9.03,68.4,65.15,42.0,61.27,5.2,3.32,37.51,12.42,8.32,59.77,74.45,3.97,50.18,62.61,14.34,7.4 google/gemma-7b-it (few-shot),8538,256,8192,False,False,1792,3.0,2.84,3.06,3.09,43.83,29.21,12.96,49.76,65.36,41.3,31.26,15.02,59.77,60.98,28.14,60.86,14.01,10.15,51.08,19.07,16.52,59.26,28.63,11.43,46.67,63.88,17.95,13.55 -01-ai/Yi-1.5-6B (few-shot),6061,64,4097,True,False,2867,3.01,3.05,3.22,2.75,35.21,12.73,4.75,55.95,64.28,46.17,36.46,18.01,46.02,48.72,27.86,60.92,2.41,2.5,44.7,23.93,22.39,45.55,70.71,4.83,55.25,63.18,26.05,27.09 -ibm-granite/granite-3.0-2b-instruct (few-shot),2634,49,4097,True,False,10194,3.01,2.88,3.19,2.96,41.79,41.86,11.86,51.97,64.86,29.55,26.2,3.64,52.68,53.17,39.87,62.24,12.08,7.18,36.0,15.78,9.98,45.23,72.76,11.25,52.22,61.56,18.14,6.77 +01-ai/Yi-1.5-6B (few-shot),6061,64,4097,True,False,2867,3.01,3.05,3.22,2.76,35.21,12.73,4.75,55.95,64.28,46.17,36.46,18.01,46.02,48.72,27.86,60.92,2.41,2.5,44.7,23.93,22.39,45.55,70.71,4.83,55.25,63.18,26.05,27.09 +ibm-granite/granite-3.0-2b-instruct (few-shot),2634,49,4097,True,False,10194,3.01,2.88,3.2,2.96,41.79,41.86,11.86,51.97,64.86,29.55,26.2,3.64,52.68,53.17,39.87,62.24,12.08,7.18,36.0,15.78,9.98,45.23,72.76,11.25,52.22,61.56,18.14,6.77 +microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,8681,3.01,2.83,3.25,2.96,41.37,42.6,6.52,50.57,64.55,38.64,42.12,13.66,56.33,54.68,37.18,61.44,6.76,6.79,30.11,15.54,17.55,46.15,67.17,5.3,51.12,59.2,21.33,16.12 timpal0l/Mistral-7B-v0.1-flashback-v2-instruct (few-shot),7242,32,32768,False,False,5172,3.01,2.83,3.53,2.67,37.02,40.65,7.48,52.71,64.46,47.26,49.54,9.02,50.34,52.06,32.19,58.71,-0.22,0.0,20.57,22.27,11.71,46.74,77.06,14.0,56.74,62.56,30.87,15.79 -microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,3194,3.02,2.83,3.26,2.96,41.37,42.6,6.52,50.57,64.55,38.64,42.12,13.66,56.33,54.68,37.18,61.44,6.76,6.79,30.11,15.54,17.55,46.15,67.17,5.3,51.12,59.2,21.33,16.12 emillykkejensen/Phi-3-mini-4k-instruct-dansk (few-shot),3821,32,4096,False,False,1360,3.03,2.78,3.3,3.0,39.96,44.93,4.01,55.01,65.29,35.28,41.62,11.81,56.41,53.95,42.27,60.58,0.0,0.21,29.35,18.57,13.36,47.81,68.43,3.63,53.03,56.14,23.29,12.06 meta-llama/Llama-2-7b-hf (few-shot),6738,32,4096,True,False,930,3.03,2.94,3.31,2.84,31.77,43.91,0.31,58.44,65.5,20.18,35.69,7.93,42.13,43.8,41.74,62.3,0.0,0.02,44.19,14.48,6.49,44.11,79.05,7.34,57.49,64.63,15.65,8.74 Qwen/Qwen1.5-4B-Chat (few-shot),3950,152,32768,False,False,4347,3.04,2.84,3.31,2.96,35.96,42.04,8.65,53.68,63.22,37.79,29.62,16.87,44.83,46.29,32.7,60.53,3.57,1.61,42.55,17.0,16.18,40.19,64.08,5.43,53.21,61.9,20.95,16.59 ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,3.06,2.97,3.24,2.97,45.9,37.11,11.7,50.11,63.86,32.44,7.46,5.62,66.91,62.82,40.71,60.59,9.5,6.74,32.83,11.35,6.21,52.85,73.93,8.27,48.49,60.98,13.69,5.68 -"merge-crew/da-sv-dare-ties-density-0.3 (few-shot, val)",7242,32,32768,True,True,2461,3.06,2.88,3.36,2.95,30.16,48.49,5.52,52.44,64.24,43.57,35.6,6.76,35.98,47.39,38.98,61.99,11.54,5.2,37.54,10.4,2.52,32.37,75.33,12.73,53.05,64.74,15.6,9.81 +"merge-crew/da-sv-dare-ties-density-0.3 (few-shot, val)",7242,32,32768,True,True,2461,3.07,2.88,3.37,2.95,30.16,48.49,5.52,52.44,64.24,43.57,35.6,6.76,35.98,47.39,38.98,61.99,11.54,5.2,37.54,10.4,2.52,32.37,75.33,12.73,53.05,64.74,15.6,9.81 NorwAI/NorwAI-Mistral-7B (few-shot),7537,68,4096,True,False,3035,3.08,2.84,3.31,3.09,21.47,48.39,12.46,52.51,66.53,36.65,49.97,5.64,21.09,26.31,49.0,63.87,7.15,7.98,47.7,5.83,5.39,23.88,80.26,13.5,55.02,64.78,6.62,2.66 -AI-Sweden-Models/gpt-sw3-40b (few-shot),39927,64,2048,True,False,409,3.11,2.85,3.49,3.0,26.57,47.81,11.13,53.78,66.21,33.48,29.86,6.67,24.07,26.67,31.05,61.58,10.8,8.89,48.78,6.67,6.25,32.0,80.44,10.73,53.8,65.16,8.35,5.74 -tollefj/nordavind-7b-instruct-warm (few-shot),7248,33,2048,False,False,6450,3.13,2.92,3.38,3.08,38.39,49.44,7.5,51.24,66.09,3.53,12.86,1.29,38.82,43.28,38.05,64.04,8.45,7.5,40.47,2.6,3.83,47.24,77.91,5.55,51.41,61.11,1.49,3.97 +AI-Sweden-Models/gpt-sw3-40b (few-shot),39927,64,2048,True,False,409,3.11,2.85,3.48,3.0,26.57,47.81,11.13,53.78,66.21,33.48,29.86,6.67,24.07,26.67,31.05,61.58,10.8,8.89,48.78,6.67,6.25,32.0,80.44,10.73,53.8,65.16,8.35,5.74 +tollefj/nordavind-7b-instruct-warm (few-shot),7248,33,2048,False,False,6450,3.13,2.92,3.39,3.08,38.39,49.44,7.5,51.24,66.09,3.53,12.86,1.29,38.82,43.28,38.05,64.04,8.45,7.5,40.47,2.6,3.83,47.24,77.91,5.55,51.41,61.11,1.49,3.97 norallm/normistral-7b-warm-instruct (few-shot),-1,33,2048,True,False,6194,3.14,2.96,3.35,3.12,39.83,47.48,4.55,49.23,66.17,10.83,14.66,2.71,46.49,51.46,37.98,64.52,7.86,7.23,33.31,5.22,9.32,51.45,63.64,5.8,48.95,62.18,4.88,4.63 NbAiLab/nb-llama-3.1-8B (few-shot),8030,128,131072,True,False,1297,3.15,3.04,3.34,3.06,44.83,37.14,10.13,8.09,62.82,76.03,75.6,14.76,51.85,54.79,31.84,57.35,36.3,32.19,0.71,31.23,11.21,53.65,80.41,5.81,10.43,62.48,33.96,15.36 -allenai/OLMo-1.7-7B-hf (few-shot),6888,50,4096,True,False,3371,3.15,3.11,3.42,2.93,33.8,31.57,2.76,54.2,64.19,17.75,28.24,4.5,42.78,42.85,36.68,59.58,2.39,1.91,39.16,13.41,7.83,41.25,76.6,6.37,54.87,62.9,16.18,8.52 +allenai/OLMo-1.7-7B-hf (few-shot),6888,50,4096,True,False,3371,3.15,3.1,3.42,2.93,33.8,31.57,2.76,54.2,64.19,17.75,28.24,4.5,42.78,42.85,36.68,59.58,2.39,1.91,39.16,13.41,7.83,41.25,76.6,6.37,54.87,62.9,16.18,8.52 ibm-granite/granite-3.0-2b-base (few-shot),2534,49,4097,True,False,10187,3.15,3.07,3.36,3.02,32.34,29.5,3.89,53.67,64.48,24.4,31.93,10.0,43.0,45.08,35.36,62.0,2.79,1.95,37.33,15.76,12.98,36.54,68.85,2.6,54.58,61.77,16.19,14.06 -TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot),7800,100,4096,True,False,6197,3.16,3.0,3.44,3.05,34.25,45.67,10.62,50.77,65.67,2.31,10.57,0.64,42.77,45.69,37.79,61.05,8.77,8.47,44.24,-1.34,-0.94,42.87,79.18,8.65,51.56,64.66,2.37,0.17 microsoft/Phi-3-mini-128k-instruct (few-shot),3821,32,131072,True,False,7312,3.16,3.12,3.3,3.06,4.51,40.85,5.43,51.76,64.64,37.45,36.39,17.42,52.18,50.53,33.3,60.69,2.63,4.0,37.08,17.34,17.43,42.36,51.53,3.11,51.11,59.28,22.99,20.19 -google/gemma-2-2b (few-shot),2614,256,8193,True,False,5235,3.17,3.01,3.51,3.0,17.29,34.94,6.39,54.94,64.82,48.07,45.03,9.1,20.47,24.18,32.61,60.17,3.22,3.91,41.16,19.03,7.35,30.45,76.36,6.06,55.19,63.12,20.8,6.24 -AI-Sweden-Models/gpt-sw3-20b (few-shot),20918,64,2048,True,False,1875,3.19,3.15,3.36,3.07,27.41,30.23,11.34,52.8,64.47,11.04,22.71,3.03,30.82,39.56,34.5,63.1,15.17,12.46,42.81,4.51,5.27,31.86,79.2,12.26,53.58,64.14,3.15,2.77 -NorwAI/NorwAI-Llama2-7B (few-shot),7033,68,4096,True,False,4438,3.19,3.01,3.42,3.15,16.72,45.89,11.25,53.17,66.51,14.84,27.95,2.41,31.45,33.85,36.06,63.06,8.34,6.84,48.31,3.28,1.87,24.98,79.36,5.75,54.74,64.6,3.83,4.4 +TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot),7800,100,4096,True,False,6197,3.17,3.0,3.45,3.05,34.25,45.67,10.62,50.77,65.67,2.31,10.57,0.64,42.77,45.69,37.79,61.05,8.77,8.47,44.24,-1.34,-0.94,42.87,79.18,8.65,51.56,64.66,2.37,0.17 +google/gemma-2-2b (few-shot),2614,256,8193,True,False,5235,3.18,3.02,3.51,3.0,17.29,34.94,6.39,54.94,64.82,48.07,45.03,9.1,20.47,24.18,32.61,60.17,3.22,3.91,41.16,19.03,7.35,30.45,76.36,6.06,55.19,63.12,20.8,6.24 +AI-Sweden-Models/gpt-sw3-20b (few-shot),20918,64,2048,True,False,1875,3.2,3.15,3.37,3.07,27.41,30.23,11.34,52.8,64.47,11.04,22.71,3.03,30.82,39.56,34.5,63.1,15.17,12.46,42.81,4.51,5.27,31.86,79.2,12.26,53.58,64.14,3.15,2.77 +NorwAI/NorwAI-Llama2-7B (few-shot),7033,68,4096,True,False,4438,3.2,3.03,3.42,3.15,16.72,45.89,11.25,53.17,66.51,14.84,27.95,2.41,31.45,33.85,36.06,63.06,8.34,6.84,48.31,3.28,1.87,24.98,79.36,5.75,54.74,64.6,3.83,4.4 ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,3.26,3.14,3.54,3.11,37.37,31.44,5.27,48.41,63.82,18.25,19.54,2.64,44.89,48.08,32.29,59.77,7.49,4.65,26.37,11.54,3.42,40.68,68.96,4.77,49.73,60.93,13.55,5.27 LumiOpen/Viking-33B@1000B (few-shot),33119,131,4099,True,False,2080,3.27,3.08,3.57,3.15,34.22,45.05,9.4,54.92,62.78,0.43,4.81,1.77,40.4,44.45,40.79,56.55,5.91,2.98,37.75,1.16,-0.29,42.35,77.68,8.08,54.57,58.3,1.73,-0.32 Qwen/Qwen1.5-4B (few-shot),3950,152,32768,True,False,3248,3.29,2.94,3.45,3.49,32.28,39.62,5.38,54.16,62.74,37.49,29.21,15.58,32.12,36.86,36.97,57.64,5.27,1.4,40.0,16.5,13.27,37.26,5.2,1.85,54.15,58.24,22.04,14.76 @@ -134,66 +134,67 @@ AI-Sweden-Models/gpt-sw3-6.7b-v2 (few-shot),7111,64,2048,True,False,2351,3.32,3. openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,3.32,3.12,3.67,3.17,34.66,21.93,1.5,52.36,64.84,24.39,33.37,13.98,37.36,42.83,16.02,61.93,-0.08,2.29,31.6,8.67,9.8,35.02,51.8,6.15,50.85,65.68,10.11,8.76 AI-Sweden-Models/gpt-sw3-6.7b-v2-instruct (few-shot),7111,64,2048,True,False,1473,3.38,3.37,3.46,3.32,15.35,2.85,10.99,50.51,66.38,30.9,16.81,11.08,24.67,29.03,34.39,64.07,2.42,5.11,42.52,6.89,12.81,14.58,56.6,10.92,50.18,64.89,6.16,10.9 MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,3.38,3.38,3.64,3.11,28.18,29.32,2.9,56.48,53.81,27.86,34.62,4.73,36.96,39.38,32.67,51.44,2.18,5.33,45.23,9.35,4.85,41.49,75.64,0.66,57.48,55.94,10.56,5.03 -LumiOpen/Viking-13B (few-shot),14030,131,4097,True,False,840,3.39,3.14,3.79,3.23,28.6,48.71,2.3,53.85,64.05,0.55,0.0,0.36,26.76,35.38,29.22,56.69,2.58,2.79,34.41,-0.56,-0.43,31.55,78.66,5.69,52.93,60.05,1.32,0.35 +LumiOpen/Viking-13B (few-shot),14030,131,4097,True,False,840,3.39,3.14,3.8,3.23,28.6,48.71,2.3,53.85,64.05,0.55,0.0,0.36,26.76,35.38,29.22,56.69,2.58,2.79,34.41,-0.56,-0.43,31.55,78.66,5.69,52.93,60.05,1.32,0.35 meta-llama/Llama-3.2-1B-Instruct (few-shot),1236,128,131073,False,False,7436,3.4,3.23,3.74,3.23,35.45,36.94,1.12,44.61,61.33,27.14,18.57,3.16,44.66,47.78,27.43,57.67,0.07,1.14,18.0,11.67,3.53,41.6,71.86,3.72,43.57,56.69,14.64,3.1 -google/gemma-2b (few-shot),2506,256,8192,True,False,6087,3.42,3.19,3.76,3.32,19.97,40.21,2.27,50.55,63.07,15.04,30.63,4.9,15.53,19.78,32.89,57.65,1.18,0.0,33.33,11.27,5.1,14.67,75.45,3.82,51.73,59.72,10.98,4.24 ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,3.43,3.27,3.63,3.4,37.21,31.54,6.3,44.86,61.56,17.92,10.79,1.7,53.78,55.14,26.21,57.11,3.9,2.42,24.86,10.36,5.85,50.1,65.67,4.55,42.83,45.16,7.58,3.79 openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.43,3.19,3.7,3.41,29.49,13.77,0.0,51.53,66.31,24.59,39.09,15.6,34.78,39.0,10.69,62.73,6.17,5.9,31.25,7.97,10.39,37.17,20.2,6.13,46.66,65.28,10.89,9.2 HPLT/gpt-33b-nordic-prerelease (few-shot),33119,131,4099,True,False,501,3.44,3.25,3.77,3.31,25.35,44.7,1.43,52.29,62.23,-1.01,1.33,-0.19,31.38,37.84,38.88,55.01,3.41,3.11,30.39,-1.56,0.51,33.61,76.75,1.66,50.68,56.37,-0.31,-0.04 -ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,3.45,3.45,3.63,3.27,31.8,6.85,0.97,49.83,63.43,15.97,17.19,3.07,40.08,43.96,31.9,59.98,-0.07,1.27,23.32,11.78,5.48,36.01,57.18,1.52,51.04,58.57,13.42,7.33 +google/gemma-2b (few-shot),2506,256,8192,True,False,6087,3.44,3.23,3.76,3.32,19.97,40.21,2.27,50.55,63.07,15.04,30.63,4.9,15.53,19.78,32.89,57.65,1.18,0.0,33.33,11.27,5.1,14.67,75.45,3.82,51.73,59.72,10.98,4.24 +ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,3.45,3.44,3.63,3.28,31.8,6.85,0.97,49.83,63.43,15.97,17.19,3.07,40.08,43.96,31.9,59.98,-0.07,1.27,23.32,11.78,5.48,36.01,57.18,1.52,51.04,58.57,13.42,7.33 utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,3.45,3.3,3.73,3.31,19.61,37.92,2.81,50.05,63.91,2.54,17.85,0.52,31.43,36.92,30.63,59.45,0.98,1.67,33.24,1.85,-1.18,27.41,72.24,0.13,49.77,63.23,3.39,0.54 -stabilityai/stablelm-2-1_6b (few-shot),1645,100,4096,True,False,7259,3.47,3.34,3.82,3.25,28.45,39.09,1.43,51.67,57.67,11.94,18.18,2.67,41.64,42.37,33.71,49.22,-0.19,-0.01,30.14,6.67,3.5,38.0,75.15,1.04,53.11,55.63,8.72,3.19 +stabilityai/stablelm-2-1_6b (few-shot),1645,100,4096,True,False,7259,3.47,3.33,3.82,3.25,28.45,39.09,1.43,51.67,57.67,11.94,18.18,2.67,41.64,42.37,33.71,49.22,-0.19,-0.01,30.14,6.67,3.5,38.0,75.15,1.04,53.11,55.63,8.72,3.19 AI-Sweden-Models/gpt-sw3-1.3b-instruct (few-shot),1445,64,2048,True,False,4544,3.49,3.47,3.61,3.38,14.73,27.14,2.65,46.38,65.48,0.32,7.0,0.13,33.08,38.28,35.58,63.11,0.82,1.43,36.06,-0.68,-0.32,19.04,73.34,2.9,47.45,63.33,0.65,-0.18 ibm-granite/granite-7b-base (few-shot),6738,32,2048,True,False,4405,3.5,3.27,3.9,3.33,24.93,31.65,0.06,51.47,62.67,16.13,24.21,1.27,32.21,36.62,16.98,55.91,1.57,0.97,26.28,8.41,2.47,33.34,72.0,0.25,52.53,52.86,11.71,0.81 ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,3.5,3.37,3.74,3.39,23.87,31.21,2.04,47.36,62.33,17.68,17.54,1.72,37.73,40.07,21.5,59.05,0.86,2.01,27.03,9.41,7.04,29.08,65.51,0.5,46.52,56.02,8.0,5.76 -NorwAI/NorwAI-Mistral-7B-instruct (few-shot),7537,68,4096,False,False,3027,3.52,3.37,3.75,3.43,13.78,42.16,3.52,20.02,65.03,40.74,43.43,4.5,27.49,32.33,47.78,62.75,3.92,4.27,2.46,8.41,2.92,20.97,77.76,2.35,28.65,63.75,9.17,3.98 -ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,3.52,3.16,4.0,3.4,38.62,35.47,5.07,45.21,62.5,13.46,15.31,6.0,53.93,54.04,23.83,50.59,3.91,1.55,2.37,8.68,6.19,51.76,70.61,6.24,44.67,41.31,7.41,5.42 +NorwAI/NorwAI-Mistral-7B-instruct (few-shot),7537,68,4096,False,False,3027,3.51,3.35,3.75,3.43,13.78,42.16,3.52,20.02,65.03,40.74,43.43,4.5,27.49,32.33,47.78,62.75,3.92,4.27,2.46,8.41,2.92,20.97,77.76,2.35,28.65,63.75,9.17,3.98 +ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,3.52,3.15,4.0,3.4,38.62,35.47,5.07,45.21,62.5,13.46,15.31,6.0,53.93,54.04,23.83,50.59,3.91,1.55,2.37,8.68,6.19,51.76,70.61,6.24,44.67,41.31,7.41,5.42 +LumiOpen/Viking-7B (few-shot),7550,131,4096,True,False,1431,3.53,3.36,3.88,3.36,23.98,38.74,1.04,50.17,61.96,-0.06,-1.04,0.73,22.37,29.9,35.86,53.25,1.03,2.92,34.39,-1.16,-0.55,30.64,72.02,1.08,48.72,57.93,1.14,1.13 HPLT/gpt-13b-nordic-prerelease (few-shot),14030,131,4099,True,False,3520,3.54,3.3,3.96,3.36,28.72,37.19,2.96,49.53,61.62,1.17,11.38,-0.16,28.94,33.83,27.32,54.05,1.46,-0.59,25.62,0.32,0.92,32.19,72.26,2.39,48.92,57.46,-0.49,0.5 -LumiOpen/Viking-7B (few-shot),7550,131,4096,True,False,1431,3.54,3.37,3.88,3.36,23.98,38.74,1.04,50.17,61.96,-0.06,-1.04,0.73,22.37,29.9,35.86,53.25,1.03,2.92,34.39,-1.16,-0.55,30.64,72.02,1.08,48.72,57.93,1.14,1.13 AI-Sweden-Models/gpt-sw3-6.7b (few-shot),7111,64,2048,True,False,2285,3.58,3.44,3.81,3.5,18.23,22.71,5.03,49.11,64.58,1.11,8.76,1.7,22.35,21.98,18.23,60.38,1.68,2.49,41.8,2.13,0.98,18.83,53.68,3.49,49.81,61.05,1.22,0.6 -HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,3.6,3.44,4.0,3.37,29.44,18.49,1.73,44.39,61.76,22.03,12.61,2.06,37.6,38.38,24.05,48.55,3.56,2.61,13.58,9.52,3.62,37.37,64.46,4.49,43.92,54.5,8.61,4.51 -HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,3.63,3.59,3.98,3.33,24.47,9.93,1.22,42.09,61.62,19.65,19.01,1.34,26.7,28.23,23.25,56.31,-0.47,0.26,13.4,11.12,2.53,35.96,68.31,3.61,43.26,57.04,10.86,2.53 -meta-llama/Llama-3.2-1B (few-shot),1236,128,131073,True,False,7577,3.64,3.49,3.98,3.46,19.82,35.97,2.14,46.59,58.65,2.65,9.51,-0.88,30.54,31.34,29.5,53.31,-0.13,0.02,19.59,2.49,2.53,29.89,74.33,1.06,46.89,52.06,0.93,0.09 +HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,3.61,3.44,4.01,3.37,29.44,18.49,1.73,44.39,61.76,22.03,12.61,2.06,37.6,38.38,24.05,48.55,3.56,2.61,13.58,9.52,3.62,37.37,64.46,4.49,43.92,54.5,8.61,4.51 +HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,3.64,3.59,3.99,3.33,24.47,9.93,1.22,42.09,61.62,19.65,19.01,1.34,26.7,28.23,23.25,56.31,-0.47,0.26,13.4,11.12,2.53,35.96,68.31,3.61,43.26,57.04,10.86,2.53 google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,3.65,3.4,3.75,3.81,24.44,34.03,2.25,42.12,62.41,15.16,12.67,2.67,39.78,43.58,22.01,55.43,2.76,1.45,32.42,7.68,1.06,33.51,43.97,0.53,39.39,40.55,11.06,1.03 +meta-llama/Llama-3.2-1B (few-shot),1236,128,131073,True,False,7577,3.65,3.52,3.98,3.46,19.82,35.97,2.14,46.59,58.65,2.65,9.51,-0.88,30.54,31.34,29.5,53.31,-0.13,0.02,19.59,2.49,2.53,29.89,74.33,1.06,46.89,52.06,0.93,0.09 AI-Sweden-Models/gpt-sw3-1.3b (few-shot),1445,64,2048,True,False,4608,3.66,3.58,3.89,3.52,8.8,28.65,2.84,45.34,62.17,-1.31,3.02,-0.33,13.49,14.74,27.28,58.46,3.09,1.86,34.91,-0.01,0.25,6.08,71.38,1.17,45.55,60.11,2.2,0.67 mhenrichsen/danskgpt-tiny-chat (few-shot),1100,32,2048,False,False,1745,3.66,3.39,3.91,3.67,22.31,34.05,0.7,41.82,65.27,6.27,6.25,2.11,28.74,30.34,27.49,60.01,-2.17,0.26,19.1,3.21,0.18,27.31,45.94,-0.97,35.57,55.79,0.14,0.52 NbAiLab/nb-llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1296,3.68,3.45,4.08,3.52,21.87,24.82,2.89,44.86,63.2,-0.22,7.84,0.81,31.65,31.54,10.64,60.02,1.81,1.72,16.32,0.61,-1.4,32.54,43.55,1.93,44.8,62.67,0.13,-0.18 AI-Sweden-Models/gpt-sw3-356m-instruct (few-shot),471,64,2048,True,False,5855,3.71,3.56,3.89,3.68,11.28,34.94,2.08,36.59,63.38,-0.09,-0.76,0.28,24.38,31.28,30.88,60.73,-0.3,0.45,23.99,-1.01,-0.5,14.84,59.0,0.06,34.37,61.28,0.48,0.33 Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,3.71,3.6,4.05,3.48,18.0,26.58,0.63,41.66,57.19,22.17,12.85,7.01,26.99,25.74,19.85,55.08,1.96,-0.01,16.33,7.79,5.61,20.94,52.54,0.34,43.55,61.19,10.74,4.83 -HPLT/gpt-7b-nordic-prerelease (few-shot),7550,131,4096,True,False,1382,3.74,3.52,4.19,3.52,21.98,37.77,1.26,46.03,58.21,0.0,-0.87,0.0,20.25,28.99,17.44,49.59,3.2,2.61,21.5,0.86,0.0,27.07,61.96,2.65,46.16,55.11,0.32,-0.0 +HPLT/gpt-7b-nordic-prerelease (few-shot),7550,131,4096,True,False,1382,3.75,3.52,4.19,3.53,21.98,37.77,1.26,46.03,58.21,0.0,-0.87,0.0,20.25,28.99,17.44,49.59,3.2,2.61,21.5,0.86,0.0,27.07,61.96,2.65,46.16,55.11,0.32,-0.0 Qwen/Qwen1.5-1.8B (few-shot),1837,152,32768,True,False,5666,3.76,3.63,4.09,3.56,9.83,29.03,0.56,46.43,56.43,14.86,17.56,4.78,12.1,13.42,22.82,54.48,2.7,2.21,16.31,9.57,6.02,18.01,51.91,1.49,44.83,54.82,11.54,7.19 -ibm-granite/granite-3.0-1b-a400m-instruct (few-shot),1335,49,4096,True,False,7964,3.76,3.61,4.11,3.56,30.63,22.35,1.95,37.3,59.66,2.78,11.15,-1.22,36.99,37.27,19.55,56.69,1.95,2.31,7.33,-0.29,0.99,33.8,58.78,0.72,35.45,56.7,2.09,-0.46 -NbAiLab/nb-gpt-j-6B-alpaca (few-shot),6055,50,1024,False,False,2607,3.78,3.57,3.9,3.88,12.95,27.68,1.65,38.6,63.32,4.49,12.81,-0.68,23.82,26.04,32.6,58.08,0.34,2.26,21.33,2.13,1.87,13.28,60.17,1.52,37.23,46.68,-0.03,0.02 -state-spaces/mamba-2.8b-hf (few-shot),2768,50,32769,True,False,2722,3.78,3.68,4.06,3.59,17.58,10.47,1.23,42.56,62.56,2.18,4.94,1.84,26.9,34.59,31.06,53.77,0.21,-0.17,10.35,-0.16,1.32,23.25,71.7,-0.82,40.48,54.39,-0.43,1.98 -AI-Sweden-Models/gpt-sw3-356m (few-shot),471,64,2048,True,False,5758,3.86,3.7,3.96,3.92,16.13,27.61,1.96,34.79,59.05,1.0,4.85,0.28,27.37,31.22,34.21,54.28,0.92,1.25,18.52,0.33,0.11,23.77,34.29,1.57,33.7,51.36,-0.96,0.3 -ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,3.87,3.75,4.16,3.71,16.82,17.52,1.53,40.21,60.13,0.29,0.55,-0.21,31.16,29.73,17.59,55.43,1.07,1.59,6.92,0.05,-0.1,23.26,55.06,1.81,35.49,54.8,-1.51,0.41 +ibm-granite/granite-3.0-1b-a400m-instruct (few-shot),1335,49,4096,True,False,7964,3.76,3.61,4.11,3.55,30.63,22.35,1.95,37.3,59.66,2.78,11.15,-1.22,36.99,37.27,19.55,56.69,1.95,2.31,7.33,-0.29,0.99,33.8,58.78,0.72,35.45,56.7,2.09,-0.46 +state-spaces/mamba-2.8b-hf (few-shot),2768,50,32769,True,False,2722,3.78,3.69,4.06,3.59,17.58,10.47,1.23,42.56,62.56,2.18,4.94,1.84,26.9,34.59,31.06,53.77,0.21,-0.17,10.35,-0.16,1.32,23.25,71.7,-0.82,40.48,54.39,-0.43,1.98 +NbAiLab/nb-gpt-j-6B-alpaca (few-shot),6055,50,1024,False,False,2607,3.79,3.57,3.91,3.88,12.95,27.68,1.65,38.6,63.32,4.49,12.81,-0.68,23.82,26.04,32.6,58.08,0.34,2.26,21.33,2.13,1.87,13.28,60.17,1.52,37.23,46.68,-0.03,0.02 +AI-Sweden-Models/gpt-sw3-356m (few-shot),471,64,2048,True,False,5758,3.87,3.71,3.97,3.92,16.13,27.61,1.96,34.79,59.05,1.0,4.85,0.28,27.37,31.22,34.21,54.28,0.92,1.25,18.52,0.33,0.11,23.77,34.29,1.57,33.7,51.36,-0.96,0.3 +ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,3.88,3.77,4.17,3.71,16.82,17.52,1.53,40.21,60.13,0.29,0.55,-0.21,31.16,29.73,17.59,55.43,1.07,1.59,6.92,0.05,-0.1,23.26,55.06,1.81,35.49,54.8,-1.51,0.41 allenai/OLMo-7B (few-shot),6888,50,2051,True,False,5403,3.9,3.71,4.35,3.65,26.76,30.76,0.55,45.65,50.86,0.26,6.94,-0.11,34.42,35.17,21.46,45.34,0.34,0.26,0.12,2.61,0.96,37.36,72.08,-0.86,45.16,41.03,-0.83,-0.62 -PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.94,3.86,4.21,3.75,15.93,13.01,0.05,36.85,58.6,-0.09,2.94,-0.35,28.82,27.81,18.74,53.53,-0.46,-0.84,12.66,-1.29,1.29,21.42,45.75,-0.25,32.71,57.21,1.57,0.36 -mhenrichsen/danskgpt-tiny (few-shot),1100,32,2048,True,False,8597,3.96,3.73,4.19,3.95,14.13,26.31,-0.54,32.12,62.61,-2.76,5.08,-0.7,27.37,27.59,18.09,56.77,-0.19,-0.8,5.84,-0.5,0.07,23.92,31.93,0.46,30.81,52.68,-0.85,-1.24 -PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,3.99,3.66,4.23,4.08,28.3,28.95,0.2,36.39,56.6,-0.19,11.52,0.06,38.96,40.42,19.42,49.86,-0.13,0.77,4.7,-0.21,-0.56,36.29,39.68,0.96,32.64,28.38,1.35,-0.19 -RuterNorway/Llama-2-7b-chat-norwegian (few-shot),-1,32,4096,False,False,10890,4.01,4.07,4.19,3.78,10.12,10.65,-0.66,26.08,56.92,0.1,4.29,-0.88,21.04,18.71,12.22,53.49,-1.18,0.36,26.86,0.21,-0.3,22.38,31.11,0.09,44.36,55.44,1.12,-0.91 -Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,4.04,3.95,4.34,3.83,19.01,8.88,0.66,32.78,55.57,7.21,16.56,0.62,34.46,33.41,6.31,49.88,-1.59,0.61,5.95,2.81,2.92,28.96,26.58,-1.88,34.59,53.36,6.52,1.91 +PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.94,3.88,4.21,3.74,15.93,13.01,0.05,36.85,58.6,-0.09,2.94,-0.35,28.82,27.81,18.74,53.53,-0.46,-0.84,12.66,-1.29,1.29,21.42,45.75,-0.25,32.71,57.21,1.57,0.36 +mhenrichsen/danskgpt-tiny (few-shot),1100,32,2048,True,False,8597,3.95,3.73,4.18,3.95,14.13,26.31,-0.54,32.12,62.61,-2.76,5.08,-0.7,27.37,27.59,18.09,56.77,-0.19,-0.8,5.84,-0.5,0.07,23.92,31.93,0.46,30.81,52.68,-0.85,-1.24 +PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,3.99,3.66,4.24,4.08,28.3,28.95,0.2,36.39,56.6,-0.19,11.52,0.06,38.96,40.42,19.42,49.86,-0.13,0.77,4.7,-0.21,-0.56,36.29,39.68,0.96,32.64,28.38,1.35,-0.19 +tiiuae/Falcon3-1B-Instruct (few-shot),1669,131,8192,True,False,9270,4.0,3.9,4.24,3.87,20.03,15.96,0.86,28.98,56.75,2.15,2.51,0.88,29.25,25.45,11.28,55.58,1.52,0.52,8.47,2.62,-0.06,26.41,25.99,1.64,21.39,58.09,3.51,1.05 +RuterNorway/Llama-2-7b-chat-norwegian (few-shot),-1,32,4096,False,False,10890,4.01,4.06,4.2,3.78,10.12,10.65,-0.66,26.08,56.92,0.1,4.29,-0.88,21.04,18.71,12.22,53.49,-1.18,0.36,26.86,0.21,-0.3,22.38,31.11,0.09,44.36,55.44,1.12,-0.91 +Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,4.04,3.94,4.34,3.83,19.01,8.88,0.66,32.78,55.57,7.21,16.56,0.62,34.46,33.41,6.31,49.88,-1.59,0.61,5.95,2.81,2.92,28.96,26.58,-1.88,34.59,53.36,6.52,1.91 allenai/OLMo-7B-Twin-2T (few-shot),6888,50,2051,True,False,5484,4.07,3.92,4.53,3.77,7.52,18.3,3.23,46.35,53.01,2.17,0.22,-0.65,9.06,17.16,25.52,40.94,0.68,0.17,0.46,2.43,2.35,20.49,70.04,2.28,45.85,39.53,0.69,0.12 -AI-Sweden-Models/gpt-sw3-126m-instruct (few-shot),186,64,2048,True,False,7717,4.1,3.98,4.25,4.06,13.98,6.37,0.41,20.46,60.87,0.53,4.72,-0.07,27.66,30.88,5.13,58.91,0.0,0.0,7.55,-0.68,0.32,23.05,12.47,0.08,20.43,59.8,0.72,0.11 -PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,4.14,3.69,4.32,4.4,16.17,29.12,-0.47,34.8,59.04,-0.22,6.94,0.52,27.47,23.82,22.22,49.84,-2.06,-0.77,2.48,0.39,-1.07,14.09,23.71,1.74,32.0,28.3,0.94,-0.48 -NbAiLab/nb-llama-3.2-1B (few-shot),1236,128,131072,True,False,3424,4.17,4.01,4.52,3.97,9.2,32.94,1.59,23.1,50.75,1.23,13.87,-1.16,10.6,22.63,19.76,44.3,2.8,0.17,3.99,0.29,0.57,4.49,73.13,2.5,22.14,47.1,0.88,-1.21 -HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.28,4.1,4.49,4.25,12.68,3.61,1.79,28.12,56.85,-0.03,6.03,0.2,26.6,23.7,6.21,48.59,-0.39,0.21,4.65,-1.13,-0.51,18.22,11.52,1.72,27.27,45.57,0.69,0.68 -Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,4.29,3.85,5.14,3.89,17.38,10.72,1.32,34.58,55.87,4.56,22.41,1.71,29.52,31.27,11.49,9.92,0.29,-0.12,7.8,0.29,0.49,18.57,40.23,0.21,29.49,53.29,2.59,-0.84 +AI-Sweden-Models/gpt-sw3-126m-instruct (few-shot),186,64,2048,True,False,7717,4.09,3.99,4.24,4.05,13.98,6.37,0.41,20.46,60.87,0.53,4.72,-0.07,27.66,30.88,5.13,58.91,0.0,0.0,7.55,-0.68,0.32,23.05,12.47,0.08,20.43,59.8,0.72,0.11 +PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,4.14,3.71,4.32,4.4,16.17,29.12,-0.47,34.8,59.04,-0.22,6.94,0.52,27.47,23.82,22.22,49.84,-2.06,-0.77,2.48,0.39,-1.07,14.09,23.71,1.74,32.0,28.3,0.94,-0.48 +NbAiLab/nb-llama-3.2-1B (few-shot),1236,128,131072,True,False,3424,4.16,4.0,4.51,3.97,9.2,32.94,1.59,23.1,50.75,1.23,13.87,-1.16,10.6,22.63,19.76,44.3,2.8,0.17,3.99,0.29,0.57,4.49,73.13,2.5,22.14,47.1,0.88,-1.21 +HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.28,4.09,4.5,4.25,12.68,3.61,1.79,28.12,56.85,-0.03,6.03,0.2,26.6,23.7,6.21,48.59,-0.39,0.21,4.65,-1.13,-0.51,18.22,11.52,1.72,27.27,45.57,0.69,0.68 +allenai/OLMo-1B (few-shot),1177,50,2051,True,False,8536,4.29,4.2,4.61,4.07,13.39,17.94,-2.02,23.65,48.87,-0.33,0.05,-0.08,30.79,31.12,9.95,40.45,-0.95,-0.04,0.0,0.32,0.12,29.39,38.95,-1.35,17.85,43.75,-0.22,0.75 NbAiLab/nb-llama-3.2-3B (few-shot),3213,128,131072,True,False,1880,4.3,4.37,4.52,4.0,0.06,8.71,2.9,1.4,52.26,20.78,34.59,1.18,2.73,0.26,15.1,45.63,13.58,7.78,0.15,11.57,1.19,1.37,72.06,8.44,0.45,51.36,10.37,2.74 -allenai/OLMo-1B (few-shot),1177,50,2051,True,False,8536,4.3,4.21,4.61,4.07,13.39,17.94,-2.02,23.65,48.87,-0.33,0.05,-0.08,30.79,31.12,9.95,40.45,-0.95,-0.04,0.0,0.32,0.12,29.39,38.95,-1.35,17.85,43.75,-0.22,0.75 -NorwAI/NorwAI-Mistral-7B-pretrain (few-shot),7537,68,4096,True,False,3024,4.35,4.22,4.53,4.31,12.82,3.55,0.68,19.85,55.58,-0.11,-2.13,0.88,12.77,10.51,8.7,49.76,0.0,0.82,1.85,-2.42,1.23,9.75,17.76,1.22,14.98,48.74,-0.62,0.99 -HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.37,4.17,4.5,4.43,8.97,2.66,1.65,24.92,55.39,0.28,3.55,-0.41,20.37,21.27,7.6,49.27,1.31,0.51,4.8,-0.9,-1.0,13.64,9.34,2.2,26.06,37.49,-0.0,0.78 -AI-Sweden-Models/gpt-sw3-126m (few-shot),186,64,2048,True,False,8958,4.39,4.3,4.54,4.32,3.43,9.18,-0.22,16.64,52.34,-0.58,5.18,-1.28,13.55,9.38,7.78,51.68,-1.46,-2.97,2.32,0.39,-0.8,5.66,8.15,-0.81,16.4,51.48,-0.49,1.17 -HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.39,4.44,4.4,4.33,13.72,3.79,-0.45,14.69,47.44,-0.58,10.99,-0.51,24.37,24.69,8.84,53.61,-1.2,-0.5,0.16,-0.81,-0.71,19.15,-3.03,0.06,14.18,51.51,0.02,0.04 +Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,4.3,3.86,5.14,3.89,17.38,10.72,1.32,34.58,55.87,4.56,22.41,1.71,29.52,31.27,11.49,9.92,0.29,-0.12,7.8,0.29,0.49,18.57,40.23,0.21,29.49,53.29,2.59,-0.84 +NorwAI/NorwAI-Mistral-7B-pretrain (few-shot),7537,68,4096,True,False,3024,4.35,4.2,4.53,4.31,12.82,3.55,0.68,19.85,55.58,-0.11,-2.13,0.88,12.77,10.51,8.7,49.76,0.0,0.82,1.85,-2.42,1.23,9.75,17.76,1.22,14.98,48.74,-0.62,0.99 +HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.38,4.2,4.51,4.42,8.97,2.66,1.65,24.92,55.39,0.28,3.55,-0.41,20.37,21.27,7.6,49.27,1.31,0.51,4.8,-0.9,-1.0,13.64,9.34,2.2,26.06,37.49,-0.0,0.78 +AI-Sweden-Models/gpt-sw3-126m (few-shot),186,64,2048,True,False,8958,4.39,4.32,4.54,4.32,3.43,9.18,-0.22,16.64,52.34,-0.58,5.18,-1.28,13.55,9.38,7.78,51.68,-1.46,-2.97,2.32,0.39,-0.8,5.66,8.15,-0.81,16.4,51.48,-0.49,1.17 +HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.39,4.43,4.4,4.33,13.72,3.79,-0.45,14.69,47.44,-0.58,10.99,-0.51,24.37,24.69,8.84,53.61,-1.2,-0.5,0.16,-0.81,-0.71,19.15,-3.03,0.06,14.18,51.51,0.02,0.04 RJuro/kanelsnegl-v0.1 (few-shot),7242,32,512,True,False,5847,4.39,4.29,4.58,4.31,0.0,13.0,0.0,0.0,61.25,0.0,0.0,0.04,0.0,0.0,0.95,59.32,0.0,0.0,0.0,0.18,0.3,0.0,34.63,0.0,0.0,59.04,-0.25,0.08 -HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.4,4.21,4.48,4.5,12.11,2.61,0.25,14.02,56.53,1.05,10.81,-0.5,20.89,19.62,2.78,53.93,-0.98,0.93,0.15,-0.48,-0.53,17.09,7.41,0.47,11.73,38.3,0.3,0.06 -RJuro/kanelsnegl-v0.2 (few-shot),7242,32,512,True,False,1373,4.43,4.37,4.58,4.33,0.0,4.81,0.0,0.0,61.06,0.0,0.0,0.15,0.0,0.0,1.27,59.1,0.0,0.0,0.0,0.83,0.09,0.0,28.62,0.0,0.0,58.16,0.47,0.71 -PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.46,4.22,4.63,4.53,10.59,13.31,0.52,16.61,53.52,0.89,4.3,-0.01,25.02,21.59,8.05,43.81,-0.15,-0.97,0.37,-0.31,-0.61,16.28,17.38,-0.45,17.78,27.12,-1.38,0.94 +HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.4,4.23,4.48,4.5,12.11,2.61,0.25,14.02,56.53,1.05,10.81,-0.5,20.89,19.62,2.78,53.93,-0.98,0.93,0.15,-0.48,-0.53,17.09,7.41,0.47,11.73,38.3,0.3,0.06 +RJuro/kanelsnegl-v0.2 (few-shot),7242,32,512,True,False,1373,4.42,4.37,4.58,4.32,0.0,4.81,0.0,0.0,61.06,0.0,0.0,0.15,0.0,0.0,1.27,59.1,0.0,0.0,0.0,0.83,0.09,0.0,28.62,0.0,0.0,58.16,0.47,0.71 +PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.45,4.21,4.63,4.52,10.59,13.31,0.52,16.61,53.52,0.89,4.3,-0.01,25.02,21.59,8.05,43.81,-0.15,-0.97,0.37,-0.31,-0.61,16.28,17.38,-0.45,17.78,27.12,-1.38,0.94 NbAiLab/nb-gpt-j-6B-v2 (few-shot),6051,50,1024,False,False,2556,4.48,4.25,4.58,4.61,0.24,27.8,0.56,6.84,53.76,-1.83,0.99,-0.48,5.29,6.77,20.84,44.23,0.45,0.48,2.43,0.17,-0.49,0.31,27.42,0.07,17.82,27.09,-0.67,0.86 -PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.48,4.32,4.55,4.57,13.84,9.47,-0.36,22.1,45.28,-0.27,2.54,-0.66,26.59,26.78,7.91,45.83,0.28,0.04,0.65,0.57,-0.66,22.09,14.15,-0.04,21.6,27.08,-0.65,0.48 +PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.48,4.33,4.55,4.57,13.84,9.47,-0.36,22.1,45.28,-0.27,2.54,-0.66,26.59,26.78,7.91,45.83,0.28,0.04,0.65,0.57,-0.66,22.09,14.15,-0.04,21.6,27.08,-0.65,0.48 NbAiLab/nb-gpt-j-6B@sharded (few-shot),-1,50,1024,True,False,2630,4.65,4.43,4.77,4.76,0.36,11.0,-0.11,5.15,51.83,-0.96,3.51,0.73,0.22,0.24,20.64,38.55,-0.99,-0.15,0.53,0.63,-0.09,0.01,33.5,-0.02,4.79,26.97,-0.11,0.56 NorGLM/NorGPT-369M (few-shot),-1,64,1024,True,False,19896,4.72,4.54,4.78,4.83,1.13,2.06,-0.36,0.32,54.0,-2.57,3.26,-0.62,3.14,3.0,3.41,45.57,0.22,0.27,0.0,-0.45,-0.27,1.47,5.5,-2.19,0.1,37.4,-0.53,0.25 peter-sk/gpt-neox-da (few-shot),1515,50,1024,True,False,6025,4.79,4.67,4.92,4.78,0.64,-0.52,-0.02,0.48,50.23,0.9,4.1,0.04,0.29,0.25,-1.43,41.89,-0.42,1.11,0.0,0.69,0.55,0.26,4.75,-0.6,0.06,41.84,-0.41,0.52 ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.87,4.88,4.91,4.82,0.0,0.0,0.0,0.02,42.82,4.55,-1.17,-1.26,0.0,0.0,0.0,41.98,0.0,0.0,0.0,1.14,0.64,0.0,0.0,0.0,0.0,40.82,1.19,1.55 -Sigurdur/icebreaker (few-shot),110,32,1024,False,False,48619,4.96,5.03,4.97,4.89,0.0,0.0,0.0,0.0,37.85,0.26,-0.75,-0.88,0.0,0.0,0.0,39.58,0.0,0.0,0.0,0.18,-0.09,0.0,-3.6,0.0,0.0,39.68,-0.2,-0.25 -ai-forever/mGPT (few-shot),-1,100,1024,True,False,11734,4.98,4.96,5.07,4.91,0.65,2.61,-0.73,1.99,35.64,-0.61,6.94,-1.12,0.08,0.0,4.76,31.66,0.67,-0.88,0.0,0.72,-0.2,0.0,0.0,0.49,6.24,31.89,-0.37,0.36 +Sigurdur/icebreaker (few-shot),110,32,1024,False,False,48619,4.96,5.03,4.97,4.88,0.0,0.0,0.0,0.0,37.85,0.26,-0.75,-0.88,0.0,0.0,0.0,39.58,0.0,0.0,0.0,0.18,-0.09,0.0,-3.6,0.0,0.0,39.68,-0.2,-0.25 +ai-forever/mGPT (few-shot),-1,100,1024,True,False,11734,4.98,4.95,5.07,4.91,0.65,2.61,-0.73,1.99,35.64,-0.61,6.94,-1.12,0.08,0.0,4.76,31.66,0.67,-0.88,0.0,0.72,-0.2,0.0,0.0,0.49,6.24,31.89,-0.37,0.36 diff --git a/mainland-scandinavian-nlg.md b/mainland-scandinavian-nlg.md index 1b6a7be2..ecbc9968 100644 --- a/mainland-scandinavian-nlg.md +++ b/mainland-scandinavian-nlg.md @@ -3,7 +3,7 @@ layout: leaderboard title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 --- -
Last updated: 10/01/2025 12:31:18 CET
+
Last updated: 11/01/2025 11:04:28 CET
@@ -145,9 +145,9 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 128000 True 576 ± 221 / 81 ± 28 - 1.27 + 1.28 1.19 - 1.39 + 1.40 1.24 66.80 ± 3.01 / 45.69 ± 2.85 61.62 ± 2.17 / 73.99 ± 1.48 @@ -866,7 +866,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 True 2,062 ± 397 / 589 ± 178 1.76 - 1.66 + 1.65 1.85 1.78 49.05 ± 2.41 / 30.91 ± 2.53 @@ -1106,8 +1106,8 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 True 784 ± 310 / 95 ± 28 1.89 - 1.71 - 1.93 + 1.73 + 1.92 2.03 59.96 ± 1.64 / 41.55 ± 2.90 56.91 ± 2.34 / 71.25 ± 1.60 @@ -1527,7 +1527,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 3,780 ± 906 / 799 ± 261 2.09 1.96 - 2.26 + 2.25 2.06 58.03 ± 2.18 / 38.25 ± 2.31 46.63 ± 2.35 / 59.02 ± 3.07 @@ -1587,7 +1587,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 7,095 ± 2,193 / 1,063 ± 344 2.12 2.02 - 2.20 + 2.21 2.13 53.95 ± 2.29 / 34.84 ± 2.23 48.97 ± 1.81 / 64.19 ± 1.99 @@ -1765,9 +1765,9 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 32768 False 2,493 ± 466 / 773 ± 243 - 2.27 + 2.28 2.01 - 2.51 + 2.52 2.30 51.44 ± 3.28 / 41.38 ± 2.79 54.91 ± 2.59 / 67.84 ± 2.53 @@ -1947,7 +1947,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 1,472 ± 376 / 284 ± 96 2.34 2.16 - 2.85 + 2.86 2.00 58.57 ± 1.03 / 41.07 ± 2.81 51.63 ± 1.00 / 67.43 ± 0.78 @@ -2004,10 +2004,10 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 128 131072 True - 1,005 ± 330 / 196 ± 74 - 2.34 + 1,473 ± 377 / 283 ± 96 + 2.35 2.09 - 2.63 + 2.66 2.30 54.70 ± 1.69 / 38.11 ± 2.31 54.81 ± 1.51 / 67.88 ± 1.39 @@ -2024,8 +2024,8 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 27.41 ± 1.97 / 54.94 ± 2.06 15.60 ± 2.05 / 46.51 ± 2.41 43.11 ± 2.22 / 69.74 ± 1.60 - 38.10 ± 0.57 / 52.64 ± 0.47 - 39.30 ± 1.01 / 54.03 ± 0.82 + 35.02 ± 1.24 / 51.13 ± 0.94 + 37.61 ± 0.88 / 53.12 ± 0.65 55.80 ± 2.68 / 34.65 ± 1.98 79.23 ± 0.48 / 76.86 ± 0.80 32.67 ± 2.18 / 63.89 ± 1.49 @@ -2048,8 +2048,8 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 14.1.2 14.1.2 14.1.2 - 13.0.0 - 13.0.0 + 14.1.2 + 14.1.2 14.0.4 14.0.4 14.0.4 @@ -2065,9 +2065,9 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 32768 False 2,477 ± 459 / 767 ± 241 - 2.34 + 2.35 2.13 - 2.56 + 2.58 2.33 51.53 ± 2.82 / 40.66 ± 2.30 47.95 ± 3.02 / 55.70 ± 3.32 @@ -2125,8 +2125,8 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 32768 True 9,015 ± 2,966 / 1,121 ± 510 - 2.35 - 2.32 + 2.36 + 2.35 2.45 2.27 49.18 ± 2.27 / 32.04 ± 1.45 @@ -2187,7 +2187,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 4,974 ± 1,208 / 1,032 ± 342 2.36 2.26 - 2.54 + 2.55 2.27 46.75 ± 2.79 / 29.40 ± 2.22 51.73 ± 1.40 / 66.43 ± 1.93 @@ -2247,7 +2247,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 1,979 ± 621 / 320 ± 105 2.36 2.14 - 2.61 + 2.62 2.33 52.22 ± 2.07 / 38.82 ± 1.90 50.66 ± 1.88 / 62.04 ± 2.83 @@ -2298,66 +2298,6 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 12.7.0 12.7.0 - - mistralai/Mixtral-8x7B-v0.1 (few-shot) - 46703 - 32 - 32768 - True - 2,363 ± 794 / 311 ± 105 - 2.37 - 2.11 - 2.69 - 2.32 - 52.62 ± 1.84 / 38.57 ± 2.99 - 50.07 ± 1.62 / 63.30 ± 1.97 - 37.37 ± 2.49 / 67.81 ± 1.44 - 54.87 ± 3.63 / 60.32 ± 3.48 - 66.46 ± 0.25 / 21.44 ± 0.33 - 69.00 ± 0.96 / 76.69 ± 0.73 - 79.86 ± 1.46 / 86.58 ± 1.01 - 24.68 ± 4.80 / 42.06 ± 3.69 - 63.92 ± 1.45 / 51.71 ± 3.49 - 62.15 ± 1.56 / 49.74 ± 3.23 - 46.68 ± 3.59 / 59.99 ± 4.44 - 63.04 ± 0.86 / 15.52 ± 1.02 - 33.38 ± 2.21 / 64.72 ± 1.46 - 19.99 ± 2.53 / 58.41 ± 1.97 - 31.87 ± 4.76 / 53.96 ± 5.86 - 38.91 ± 1.15 / 53.89 ± 0.87 - 20.29 ± 3.40 / 38.96 ± 2.34 - 60.92 ± 1.45 / 47.51 ± 4.83 - 79.78 ± 0.72 / 77.07 ± 1.23 - 34.88 ± 2.27 / 64.32 ± 3.30 - 50.35 ± 4.89 / 56.29 ± 5.07 - 62.82 ± 0.39 / 17.12 ± 0.65 - 43.74 ± 0.88 / 57.66 ± 0.66 - 19.86 ± 1.92 / 37.84 ± 1.96 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - timpal0l/BeagleCatMunin (few-shot, val) 7242 @@ -2365,9 +2305,9 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 32768 False 2,495 ± 458 / 775 ± 244 - 2.37 + 2.36 2.16 - 2.70 + 2.69 2.24 47.62 ± 3.01 / 36.77 ± 2.96 54.73 ± 3.20 / 68.74 ± 2.21 @@ -2418,6 +2358,66 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 9.3.2 9.3.2 + + timpal0l/dolphin-2.9-llama3-8b-flashback (few-shot, val) + 8030 + 128 + 8192 + False + 5,018 ± 1,216 / 996 ± 324 + 2.37 + 2.26 + 2.60 + 2.26 + 51.37 ± 2.38 / 37.56 ± 2.50 + 52.17 ± 3.44 / 67.14 ± 2.61 + 27.98 ± 7.51 / 60.57 ± 5.72 + 51.65 ± 0.84 / 59.94 ± 0.78 + 66.25 ± 1.15 / 19.93 ± 1.18 + 57.31 ± 2.81 / 68.05 ± 2.05 + 54.17 ± 2.95 / 68.20 ± 1.83 + 27.65 ± 3.28 / 44.53 ± 2.63 + 64.51 ± 3.28 / 51.06 ± 4.78 + 65.66 ± 3.82 / 53.90 ± 4.32 + 52.90 ± 4.31 / 65.38 ± 3.73 + 64.12 ± 0.39 / 15.41 ± 0.84 + 29.34 ± 4.34 / 59.36 ± 4.64 + 17.42 ± 4.38 / 52.01 ± 3.50 + 38.49 ± 4.41 / 67.16 ± 3.41 + 25.77 ± 3.46 / 43.40 ± 2.66 + 31.80 ± 2.89 / 46.80 ± 2.26 + 65.33 ± 2.38 / 46.88 ± 3.97 + 74.99 ± 3.45 / 76.76 ± 1.80 + 32.65 ± 5.08 / 61.25 ± 4.41 + 55.71 ± 1.34 / 64.54 ± 1.00 + 66.53 ± 0.29 / 19.24 ± 0.57 + 33.16 ± 2.11 / 49.26 ± 1.55 + 32.51 ± 2.97 / 48.24 ± 2.10 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + birgermoell/Munin-NeuralBeagle-NorskGPT (few-shot, val) 7242 @@ -2427,7 +2427,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 2,903 ± 407 / 1,157 ± 350 2.38 2.27 - 2.48 + 2.47 2.40 51.85 ± 3.08 / 40.02 ± 2.48 44.02 ± 2.44 / 47.74 ± 1.98 @@ -2487,7 +2487,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 2,856 ± 391 / 1,142 ± 342 2.38 2.27 - 2.48 + 2.47 2.40 51.85 ± 3.08 / 40.02 ± 2.48 44.02 ± 2.44 / 47.74 ± 1.98 @@ -2538,65 +2538,125 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 9.3.1 9.3.1 + + mistralai/Mixtral-8x7B-v0.1 (few-shot) + 46703 + 32 + 32768 + True + 2,363 ± 794 / 311 ± 105 + 2.38 + 2.11 + 2.70 + 2.32 + 52.62 ± 1.84 / 38.57 ± 2.99 + 50.07 ± 1.62 / 63.30 ± 1.97 + 37.37 ± 2.49 / 67.81 ± 1.44 + 54.87 ± 3.63 / 60.32 ± 3.48 + 66.46 ± 0.25 / 21.44 ± 0.33 + 69.00 ± 0.96 / 76.69 ± 0.73 + 79.86 ± 1.46 / 86.58 ± 1.01 + 24.68 ± 4.80 / 42.06 ± 3.69 + 63.92 ± 1.45 / 51.71 ± 3.49 + 62.15 ± 1.56 / 49.74 ± 3.23 + 46.68 ± 3.59 / 59.99 ± 4.44 + 63.04 ± 0.86 / 15.52 ± 1.02 + 33.38 ± 2.21 / 64.72 ± 1.46 + 19.99 ± 2.53 / 58.41 ± 1.97 + 31.87 ± 4.76 / 53.96 ± 5.86 + 38.91 ± 1.15 / 53.89 ± 0.87 + 20.29 ± 3.40 / 38.96 ± 2.34 + 60.92 ± 1.45 / 47.51 ± 4.83 + 79.78 ± 0.72 / 77.07 ± 1.23 + 34.88 ± 2.27 / 64.32 ± 3.30 + 50.35 ± 4.89 / 56.29 ± 5.07 + 62.82 ± 0.39 / 17.12 ± 0.65 + 43.74 ± 0.88 / 57.66 ± 0.66 + 19.86 ± 1.92 / 37.84 ± 1.96 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + - timpal0l/dolphin-2.9-llama3-8b-flashback (few-shot, val) - 8030 - 128 + mlabonne/NeuralBeagle14-7B (few-shot, val) + 7242 + 32 8192 False - 5,018 ± 1,216 / 996 ± 324 + 2,549 ± 472 / 784 ± 245 2.38 - 2.26 - 2.61 - 2.26 - 51.37 ± 2.38 / 37.56 ± 2.50 - 52.17 ± 3.44 / 67.14 ± 2.61 - 27.98 ± 7.51 / 60.57 ± 5.72 - 51.65 ± 0.84 / 59.94 ± 0.78 - 66.25 ± 1.15 / 19.93 ± 1.18 - 57.31 ± 2.81 / 68.05 ± 2.05 - 54.17 ± 2.95 / 68.20 ± 1.83 - 27.65 ± 3.28 / 44.53 ± 2.63 - 64.51 ± 3.28 / 51.06 ± 4.78 - 65.66 ± 3.82 / 53.90 ± 4.32 - 52.90 ± 4.31 / 65.38 ± 3.73 - 64.12 ± 0.39 / 15.41 ± 0.84 - 29.34 ± 4.34 / 59.36 ± 4.64 - 17.42 ± 4.38 / 52.01 ± 3.50 - 38.49 ± 4.41 / 67.16 ± 3.41 - 25.77 ± 3.46 / 43.40 ± 2.66 - 31.80 ± 2.89 / 46.80 ± 2.26 - 65.33 ± 2.38 / 46.88 ± 3.97 - 74.99 ± 3.45 / 76.76 ± 1.80 - 32.65 ± 5.08 / 61.25 ± 4.41 - 55.71 ± 1.34 / 64.54 ± 1.00 - 66.53 ± 0.29 / 19.24 ± 0.57 - 33.16 ± 2.11 / 49.26 ± 1.55 - 32.51 ± 2.97 / 48.24 ± 2.10 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 + 2.15 + 2.63 + 2.37 + 53.02 ± 2.85 / 41.35 ± 3.42 + 51.29 ± 3.42 / 66.57 ± 2.46 + 19.73 ± 4.11 / 57.07 ± 3.09 + 51.69 ± 2.29 / 61.26 ± 1.32 + 67.33 ± 1.40 / 22.77 ± 1.39 + 65.38 ± 2.59 / 73.95 ± 1.84 + 62.78 ± 4.04 / 74.61 ± 2.90 + 39.07 ± 2.57 / 53.83 ± 2.06 + 62.47 ± 2.56 / 57.71 ± 3.02 + 66.69 ± 2.91 / 58.83 ± 3.70 + 54.04 ± 2.91 / 66.46 ± 2.59 + 65.74 ± 0.37 / 19.13 ± 0.54 + 16.75 ± 4.54 / 49.11 ± 4.45 + 13.00 ± 4.46 / 49.33 ± 2.69 + 34.48 ± 2.13 / 65.43 ± 2.07 + 28.39 ± 1.76 / 45.59 ± 1.28 + 35.19 ± 3.28 / 50.12 ± 3.13 + 61.25 ± 3.35 / 50.76 ± 5.94 + 76.03 ± 2.11 / 78.25 ± 1.95 + 16.28 ± 4.81 / 49.04 ± 3.60 + 50.96 ± 2.34 / 60.05 ± 1.18 + 68.35 ± 0.32 / 24.05 ± 0.66 + 32.30 ± 2.48 / 48.98 ± 1.96 + 38.78 ± 5.70 / 52.89 ± 4.91 + 9.3.2 + 9.3.2 + 9.3.2 + 12.5.2 + 12.6.1 + 10.0.1 + 10.0.1 + 9.3.2 + 9.3.2 + 9.3.2 + 9.3.2 + 9.3.2 + 9.3.2 + 9.3.2 + 12.5.2 + 9.3.2 + 9.3.2 + 9.3.2 + 9.3.2 + 9.3.2 + 12.5.2 + 9.3.2 + 9.3.2 + 9.3.2 utter-project/EuroLLM-9B-Instruct (few-shot) @@ -2658,66 +2718,6 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 13.1.0 13.1.0 - - mlabonne/NeuralBeagle14-7B (few-shot, val) - 7242 - 32 - 8192 - False - 2,549 ± 472 / 784 ± 245 - 2.39 - 2.15 - 2.64 - 2.37 - 53.02 ± 2.85 / 41.35 ± 3.42 - 51.29 ± 3.42 / 66.57 ± 2.46 - 19.73 ± 4.11 / 57.07 ± 3.09 - 51.69 ± 2.29 / 61.26 ± 1.32 - 67.33 ± 1.40 / 22.77 ± 1.39 - 65.38 ± 2.59 / 73.95 ± 1.84 - 62.78 ± 4.04 / 74.61 ± 2.90 - 39.07 ± 2.57 / 53.83 ± 2.06 - 62.47 ± 2.56 / 57.71 ± 3.02 - 66.69 ± 2.91 / 58.83 ± 3.70 - 54.04 ± 2.91 / 66.46 ± 2.59 - 65.74 ± 0.37 / 19.13 ± 0.54 - 16.75 ± 4.54 / 49.11 ± 4.45 - 13.00 ± 4.46 / 49.33 ± 2.69 - 34.48 ± 2.13 / 65.43 ± 2.07 - 28.39 ± 1.76 / 45.59 ± 1.28 - 35.19 ± 3.28 / 50.12 ± 3.13 - 61.25 ± 3.35 / 50.76 ± 5.94 - 76.03 ± 2.11 / 78.25 ± 1.95 - 16.28 ± 4.81 / 49.04 ± 3.60 - 50.96 ± 2.34 / 60.05 ± 1.18 - 68.35 ± 0.32 / 24.05 ± 0.66 - 32.30 ± 2.48 / 48.98 ± 1.96 - 38.78 ± 5.70 / 52.89 ± 4.91 - 9.3.2 - 9.3.2 - 9.3.2 - 12.5.2 - 12.6.1 - 10.0.1 - 10.0.1 - 9.3.2 - 9.3.2 - 9.3.2 - 9.3.2 - 9.3.2 - 9.3.2 - 9.3.2 - 12.5.2 - 9.3.2 - 9.3.2 - 9.3.2 - 9.3.2 - 9.3.2 - 12.5.2 - 9.3.2 - 9.3.2 - 9.3.2 - Nexusflow/Starling-LM-7B-beta (few-shot) 7242 @@ -2727,7 +2727,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 4,136 ± 1,282 / 668 ± 326 2.40 2.40 - 2.36 + 2.37 2.44 51.94 ± 2.00 / 27.59 ± 1.79 51.97 ± 1.36 / 68.62 ± 0.93 @@ -2778,66 +2778,6 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 14.0.4 14.0.4 - - mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot) - 46703 - 32 - 32768 - True - 5,535 ± 1,837 / 760 ± 256 - 2.40 - 2.16 - 2.69 - 2.36 - 51.20 ± 3.31 / 30.53 ± 2.46 - 50.95 ± 1.15 / 66.13 ± 0.87 - 33.44 ± 1.67 / 64.34 ± 1.32 - 46.85 ± 1.67 / 61.54 ± 1.02 - 64.66 ± 0.25 / 14.48 ± 0.73 - 67.12 ± 1.23 / 75.29 ± 0.88 - 76.78 ± 1.29 / 84.47 ± 0.92 - 39.93 ± 1.65 / 54.24 ± 1.32 - 55.02 ± 2.74 / 36.31 ± 1.97 - 57.37 ± 2.34 / 38.25 ± 2.37 - 36.76 ± 0.69 / 38.81 ± 0.52 - 61.59 ± 0.17 / 7.66 ± 0.45 - 30.73 ± 1.86 / 63.33 ± 2.10 - 18.96 ± 2.45 / 52.64 ± 4.21 - 41.01 ± 2.00 / 70.24 ± 1.75 - 39.07 ± 1.18 / 53.90 ± 0.87 - 40.48 ± 1.57 / 54.73 ± 1.23 - 46.15 ± 1.63 / 25.08 ± 1.69 - 80.33 ± 0.51 / 79.04 ± 0.76 - 32.89 ± 2.06 / 64.41 ± 1.78 - 46.51 ± 1.69 / 61.35 ± 1.02 - 66.04 ± 0.10 / 17.27 ± 0.37 - 42.98 ± 0.91 / 56.81 ± 0.67 - 35.33 ± 1.71 / 49.88 ± 1.41 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - NorwAI/NorwAI-Mixtral-8x7B (few-shot) 46998 @@ -2898,6 +2838,66 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 14.0.4 14.0.4 + + mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot) + 46703 + 32 + 32768 + True + 5,535 ± 1,837 / 760 ± 256 + 2.41 + 2.16 + 2.70 + 2.36 + 51.20 ± 3.31 / 30.53 ± 2.46 + 50.95 ± 1.15 / 66.13 ± 0.87 + 33.44 ± 1.67 / 64.34 ± 1.32 + 46.85 ± 1.67 / 61.54 ± 1.02 + 64.66 ± 0.25 / 14.48 ± 0.73 + 67.12 ± 1.23 / 75.29 ± 0.88 + 76.78 ± 1.29 / 84.47 ± 0.92 + 39.93 ± 1.65 / 54.24 ± 1.32 + 55.02 ± 2.74 / 36.31 ± 1.97 + 57.37 ± 2.34 / 38.25 ± 2.37 + 36.76 ± 0.69 / 38.81 ± 0.52 + 61.59 ± 0.17 / 7.66 ± 0.45 + 30.73 ± 1.86 / 63.33 ± 2.10 + 18.96 ± 2.45 / 52.64 ± 4.21 + 41.01 ± 2.00 / 70.24 ± 1.75 + 39.07 ± 1.18 / 53.90 ± 0.87 + 40.48 ± 1.57 / 54.73 ± 1.23 + 46.15 ± 1.63 / 25.08 ± 1.69 + 80.33 ± 0.51 / 79.04 ± 0.76 + 32.89 ± 2.06 / 64.41 ± 1.78 + 46.51 ± 1.69 / 61.35 ± 1.02 + 66.04 ± 0.10 / 17.27 ± 0.37 + 42.98 ± 0.91 / 56.81 ± 0.67 + 35.33 ± 1.71 / 49.88 ± 1.41 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + birgermoell/BeagleCatMunin-Flashback-Bellman (few-shot, val) 7242 @@ -2967,7 +2967,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 2,686 ± 685 / 491 ± 164 2.44 2.30 - 2.66 + 2.65 2.36 51.32 ± 3.82 / 25.54 ± 2.10 52.00 ± 1.67 / 66.25 ± 1.77 @@ -3078,6 +3078,66 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 9.3.2 9.3.2 + + birgermoell/Rapid-Cycling (few-shot, val) + 7242 + 32 + 32768 + False + 2,346 ± 450 / 666 ± 249 + 2.44 + 2.19 + 2.73 + 2.41 + 49.99 ± 2.62 / 38.37 ± 3.04 + 51.25 ± 2.70 / 62.67 ± 2.82 + 20.66 ± 5.69 / 49.98 ± 4.94 + 56.82 ± 1.75 / 62.40 ± 1.40 + 65.58 ± 1.30 / 17.82 ± 1.12 + 67.48 ± 3.13 / 75.62 ± 2.33 + 63.69 ± 2.43 / 75.31 ± 1.76 + 32.11 ± 3.47 / 48.55 ± 2.67 + 55.93 ± 2.70 / 50.51 ± 3.15 + 63.85 ± 2.45 / 53.11 ± 4.11 + 50.41 ± 5.49 / 64.49 ± 4.37 + 65.10 ± 0.51 / 18.12 ± 0.74 + 15.74 ± 4.15 / 41.16 ± 2.21 + 2.23 ± 4.69 / 34.70 ± 1.39 + 39.81 ± 2.81 / 65.65 ± 2.64 + 26.34 ± 1.48 / 44.69 ± 1.13 + 34.85 ± 4.33 / 50.23 ± 3.39 + 53.66 ± 3.57 / 41.97 ± 4.83 + 77.72 ± 2.51 / 78.40 ± 2.65 + 16.22 ± 4.46 / 43.17 ± 3.88 + 59.75 ± 1.13 / 64.72 ± 1.04 + 67.57 ± 0.48 / 23.65 ± 0.72 + 27.24 ± 2.07 / 45.51 ± 1.53 + 32.04 ± 4.21 / 48.67 ± 3.11 + 9.3.2 + 9.3.2 + 9.3.2 + 12.5.2 + 12.6.1 + 10.0.1 + 10.0.1 + 9.3.2 + 9.3.2 + 9.3.2 + 9.3.2 + 9.3.2 + 9.3.2 + 9.3.2 + 12.5.2 + 9.3.2 + 9.3.2 + 9.3.2 + 9.3.2 + 9.3.2 + 12.5.2 + 9.3.2 + 9.3.2 + 9.3.2 + merge-crew/da-sv-task-arithmetic (few-shot, val) 7242 @@ -3138,66 +3198,6 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 10.0.1 10.0.1 - - birgermoell/Rapid-Cycling (few-shot, val) - 7242 - 32 - 32768 - False - 2,346 ± 450 / 666 ± 249 - 2.45 - 2.19 - 2.74 - 2.41 - 49.99 ± 2.62 / 38.37 ± 3.04 - 51.25 ± 2.70 / 62.67 ± 2.82 - 20.66 ± 5.69 / 49.98 ± 4.94 - 56.82 ± 1.75 / 62.40 ± 1.40 - 65.58 ± 1.30 / 17.82 ± 1.12 - 67.48 ± 3.13 / 75.62 ± 2.33 - 63.69 ± 2.43 / 75.31 ± 1.76 - 32.11 ± 3.47 / 48.55 ± 2.67 - 55.93 ± 2.70 / 50.51 ± 3.15 - 63.85 ± 2.45 / 53.11 ± 4.11 - 50.41 ± 5.49 / 64.49 ± 4.37 - 65.10 ± 0.51 / 18.12 ± 0.74 - 15.74 ± 4.15 / 41.16 ± 2.21 - 2.23 ± 4.69 / 34.70 ± 1.39 - 39.81 ± 2.81 / 65.65 ± 2.64 - 26.34 ± 1.48 / 44.69 ± 1.13 - 34.85 ± 4.33 / 50.23 ± 3.39 - 53.66 ± 3.57 / 41.97 ± 4.83 - 77.72 ± 2.51 / 78.40 ± 2.65 - 16.22 ± 4.46 / 43.17 ± 3.88 - 59.75 ± 1.13 / 64.72 ± 1.04 - 67.57 ± 0.48 / 23.65 ± 0.72 - 27.24 ± 2.07 / 45.51 ± 1.53 - 32.04 ± 4.21 / 48.67 ± 3.11 - 9.3.2 - 9.3.2 - 9.3.2 - 12.5.2 - 12.6.1 - 10.0.1 - 10.0.1 - 9.3.2 - 9.3.2 - 9.3.2 - 9.3.2 - 9.3.2 - 9.3.2 - 9.3.2 - 12.5.2 - 9.3.2 - 9.3.2 - 9.3.2 - 9.3.2 - 9.3.2 - 12.5.2 - 9.3.2 - 9.3.2 - 9.3.2 - merge-crew/da-sv-slerp (few-shot, val) 7242 @@ -3258,66 +3258,6 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 10.0.1 10.0.1 - - meta-llama/Meta-Llama-3-8B-Instruct (few-shot) - 8030 - 128 - 8192 - True - 1,007 ± 316 / 162 ± 45 - 2.45 - 2.28 - 2.65 - 2.41 - 57.74 ± 2.06 / 40.66 ± 2.58 - 48.43 ± 3.31 / 62.09 ± 3.62 - 27.12 ± 2.83 / 60.40 ± 2.70 - 46.76 ± 1.20 / 59.77 ± 0.51 - 66.36 ± 0.47 / 19.75 ± 0.84 - 57.87 ± 1.67 / 67.43 ± 1.34 - 50.42 ± 3.32 / 65.43 ± 2.41 - 29.17 ± 2.24 / 44.59 ± 2.00 - 74.47 ± 1.47 / 65.57 ± 2.39 - 72.93 ± 1.00 / 65.44 ± 2.55 - 34.44 ± 0.42 / 37.94 ± 0.39 - 63.98 ± 0.50 / 14.75 ± 0.79 - 27.77 ± 1.63 / 61.75 ± 1.77 - 20.35 ± 1.92 / 57.74 ± 2.28 - 42.90 ± 3.57 / 69.90 ± 3.17 - 33.44 ± 0.67 / 48.76 ± 0.58 - 30.91 ± 1.88 / 45.85 ± 1.93 - 69.67 ± 1.30 / 52.94 ± 4.01 - 59.93 ± 4.70 / 67.54 ± 3.04 - 27.63 ± 3.19 / 60.85 ± 3.29 - 49.84 ± 1.61 / 60.85 ± 0.93 - 66.60 ± 0.07 / 19.13 ± 0.31 - 33.54 ± 1.40 / 49.20 ± 1.13 - 30.32 ± 2.27 / 45.96 ± 1.87 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 14.0.4 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - senseable/WestLake-7B-v2 (few-shot) 7242 @@ -3327,7 +3267,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 5,993 ± 1,028 / 1,742 ± 561 2.45 2.27 - 2.62 + 2.63 2.46 52.61 ± 1.77 / 33.64 ± 2.67 49.81 ± 1.43 / 66.32 ± 1.25 @@ -3378,6 +3318,66 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 12.6.1 12.6.1 + + meta-llama/Meta-Llama-3-8B-Instruct (few-shot) + 8030 + 128 + 8192 + True + 1,483 ± 377 / 287 ± 97 + 2.46 + 2.28 + 2.69 + 2.41 + 57.74 ± 2.06 / 40.66 ± 2.58 + 48.43 ± 3.31 / 62.09 ± 3.62 + 27.12 ± 2.83 / 60.40 ± 2.70 + 46.76 ± 1.20 / 59.77 ± 0.51 + 66.36 ± 0.47 / 19.75 ± 0.84 + 57.87 ± 1.67 / 67.43 ± 1.34 + 50.42 ± 3.32 / 65.43 ± 2.41 + 29.17 ± 2.24 / 44.59 ± 2.00 + 66.56 ± 1.70 / 58.47 ± 2.40 + 68.29 ± 1.47 / 61.22 ± 2.25 + 34.47 ± 0.44 / 37.96 ± 0.40 + 63.80 ± 0.09 / 13.37 ± 0.15 + 28.22 ± 1.37 / 59.51 ± 1.44 + 18.21 ± 2.21 / 52.84 ± 2.30 + 47.34 ± 1.99 / 73.23 ± 1.29 + 30.78 ± 0.81 / 47.33 ± 0.75 + 31.49 ± 1.29 / 48.15 ± 0.99 + 69.67 ± 1.30 / 52.94 ± 4.01 + 59.93 ± 4.70 / 67.54 ± 3.04 + 27.63 ± 3.19 / 60.85 ± 3.29 + 49.84 ± 1.61 / 60.85 ± 0.93 + 66.60 ± 0.07 / 19.13 ± 0.31 + 33.54 ± 1.40 / 49.20 ± 1.13 + 30.32 ± 2.27 / 45.96 ± 1.87 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + birgermoell/Flashback-Bellman (few-shot, val) 7242 @@ -3385,9 +3385,9 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 32768 False 2,887 ± 403 / 1,144 ± 345 - 2.46 + 2.47 2.34 - 2.72 + 2.75 2.33 47.71 ± 3.50 / 35.65 ± 3.07 48.21 ± 3.58 / 60.08 ± 3.41 @@ -3558,6 +3558,66 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 12.3.2 12.3.2 + + merge-crew/da-sv-dare-ties-density-0.9 (few-shot, val) + 7242 + 32 + 32768 + True + 2,443 ± 458 / 750 ± 240 + 2.51 + 2.38 + 2.76 + 2.40 + 45.61 ± 3.06 / 35.04 ± 2.94 + 53.73 ± 3.06 / 67.51 ± 2.16 + 17.08 ± 5.36 / 52.62 ± 5.62 + 56.67 ± 1.19 / 61.18 ± 1.07 + 66.14 ± 1.40 / 20.98 ± 1.16 + 60.58 ± 2.62 / 69.80 ± 2.03 + 57.89 ± 3.66 / 70.00 ± 2.59 + 16.45 ± 1.87 / 35.47 ± 1.77 + 48.24 ± 3.18 / 42.53 ± 3.52 + 61.50 ± 1.54 / 50.90 ± 4.58 + 49.40 ± 3.40 / 60.71 ± 3.33 + 64.56 ± 0.80 / 17.62 ± 1.06 + 24.12 ± 3.24 / 59.38 ± 2.25 + 13.20 ± 3.16 / 54.42 ± 3.04 + 47.93 ± 3.46 / 69.52 ± 3.06 + 26.21 ± 2.32 / 42.54 ± 1.71 + 17.00 ± 2.59 / 33.52 ± 2.18 + 46.61 ± 3.11 / 34.10 ± 4.61 + 76.38 ± 2.01 / 78.30 ± 2.42 + 34.16 ± 4.39 / 60.06 ± 4.67 + 58.77 ± 1.76 / 63.50 ± 1.47 + 66.77 ± 0.46 / 22.42 ± 0.84 + 29.77 ± 2.44 / 46.25 ± 1.64 + 25.38 ± 3.56 / 39.34 ± 3.89 + 10.0.1 + 10.0.1 + 10.0.1 + 10.0.1 + 12.6.1 + 10.0.1 + 10.0.1 + 10.0.1 + 10.0.1 + 10.0.1 + 10.0.1 + 10.0.1 + 10.0.1 + 10.0.1 + 10.0.1 + 10.0.1 + 10.0.1 + 10.0.1 + 10.0.1 + 10.0.1 + 10.0.1 + 10.0.1 + 10.0.1 + 10.0.1 + mistralai/Ministral-8B-Instruct-2410 (few-shot) 8020 @@ -3567,7 +3627,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 1,302 ± 323 / 253 ± 86 2.51 2.35 - 2.73 + 2.74 2.45 55.49 ± 2.05 / 34.11 ± 1.79 49.18 ± 1.89 / 65.27 ± 1.69 @@ -3687,7 +3747,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 6,113 ± 1,044 / 1,790 ± 579 2.52 2.27 - 2.77 + 2.78 2.52 39.12 ± 4.28 / 28.74 ± 2.75 36.47 ± 4.90 / 50.72 ± 6.21 @@ -3738,126 +3798,6 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 12.4.0 12.4.0 - - merge-crew/da-sv-dare-ties-density-0.9 (few-shot, val) - 7242 - 32 - 32768 - True - 2,443 ± 458 / 750 ± 240 - 2.52 - 2.38 - 2.77 - 2.40 - 45.61 ± 3.06 / 35.04 ± 2.94 - 53.73 ± 3.06 / 67.51 ± 2.16 - 17.08 ± 5.36 / 52.62 ± 5.62 - 56.67 ± 1.19 / 61.18 ± 1.07 - 66.14 ± 1.40 / 20.98 ± 1.16 - 60.58 ± 2.62 / 69.80 ± 2.03 - 57.89 ± 3.66 / 70.00 ± 2.59 - 16.45 ± 1.87 / 35.47 ± 1.77 - 48.24 ± 3.18 / 42.53 ± 3.52 - 61.50 ± 1.54 / 50.90 ± 4.58 - 49.40 ± 3.40 / 60.71 ± 3.33 - 64.56 ± 0.80 / 17.62 ± 1.06 - 24.12 ± 3.24 / 59.38 ± 2.25 - 13.20 ± 3.16 / 54.42 ± 3.04 - 47.93 ± 3.46 / 69.52 ± 3.06 - 26.21 ± 2.32 / 42.54 ± 1.71 - 17.00 ± 2.59 / 33.52 ± 2.18 - 46.61 ± 3.11 / 34.10 ± 4.61 - 76.38 ± 2.01 / 78.30 ± 2.42 - 34.16 ± 4.39 / 60.06 ± 4.67 - 58.77 ± 1.76 / 63.50 ± 1.47 - 66.77 ± 0.46 / 22.42 ± 0.84 - 29.77 ± 2.44 / 46.25 ± 1.64 - 25.38 ± 3.56 / 39.34 ± 3.89 - 10.0.1 - 10.0.1 - 10.0.1 - 10.0.1 - 12.6.1 - 10.0.1 - 10.0.1 - 10.0.1 - 10.0.1 - 10.0.1 - 10.0.1 - 10.0.1 - 10.0.1 - 10.0.1 - 10.0.1 - 10.0.1 - 10.0.1 - 10.0.1 - 10.0.1 - 10.0.1 - 10.0.1 - 10.0.1 - 10.0.1 - 10.0.1 - - - meta-llama/Meta-Llama-3-8B (few-shot) - 8030 - 128 - 8192 - True - 1,335 ± 338 / 260 ± 88 - 2.52 - 2.39 - 2.71 - 2.47 - 49.46 ± 1.88 / 32.11 ± 2.41 - 51.16 ± 2.15 / 67.00 ± 1.51 - 23.01 ± 3.93 / 49.99 ± 4.63 - 49.75 ± 5.10 / 56.13 ± 4.89 - 65.26 ± 0.48 / 18.81 ± 0.72 - 51.64 ± 2.09 / 63.34 ± 1.71 - 59.28 ± 1.85 / 72.62 ± 1.29 - 24.30 ± 3.14 / 42.50 ± 2.67 - 61.48 ± 1.83 / 47.65 ± 2.94 - 61.58 ± 2.21 / 50.10 ± 2.68 - 32.94 ± 0.86 / 37.52 ± 0.43 - 63.38 ± 1.15 / 15.74 ± 1.68 - 21.20 ± 6.57 / 52.29 ± 7.43 - 19.65 ± 4.32 / 56.66 ± 4.40 - 53.35 ± 4.33 / 74.98 ± 3.70 - 33.02 ± 1.35 / 49.25 ± 1.04 - 24.93 ± 3.13 / 42.47 ± 2.74 - 59.92 ± 2.46 / 40.98 ± 4.90 - 80.91 ± 0.41 / 78.09 ± 1.22 - 26.39 ± 3.47 / 52.38 ± 4.49 - 47.69 ± 6.29 / 54.30 ± 6.65 - 63.94 ± 0.48 / 17.92 ± 0.37 - 33.39 ± 0.95 / 49.21 ± 0.77 - 20.21 ± 1.46 / 39.29 ± 1.54 - 14.0.3 - 14.1.2 - 14.1.2 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 12.6.1 - 12.6.1 - 14.1.2 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - mlabonne/AlphaMonarch-7B (few-shot, val) 7242 @@ -3867,7 +3807,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 5,340 ± 1,262 / 1,157 ± 375 2.52 2.32 - 2.74 + 2.73 2.51 52.72 ± 2.21 / 39.49 ± 3.47 49.11 ± 3.91 / 64.78 ± 2.61 @@ -4167,7 +4107,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 4,141 ± 994 / 905 ± 299 2.58 2.33 - 2.83 + 2.84 2.57 36.72 ± 3.33 / 27.73 ± 2.57 46.48 ± 1.17 / 55.20 ± 2.51 @@ -4278,66 +4218,6 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 12.7.0 12.7.0 - - bineric/NorskGPT-Mistral-7b (few-shot) - 7242 - 32 - 32768 - False - 1,440 ± 352 / 293 ± 99 - 2.58 - 2.41 - 2.89 - 2.45 - 50.76 ± 1.60 / 32.89 ± 2.11 - 40.41 ± 0.79 / 44.17 ± 0.56 - 0.00 ± 0.00 / 33.41 ± 0.23 - 57.26 ± 0.79 / 63.80 ± 0.52 - 66.89 ± 0.71 / 21.43 ± 0.57 - 64.32 ± 1.99 / 73.00 ± 1.51 - 53.18 ± 2.37 / 67.48 ± 1.70 - 43.42 ± 1.05 / 57.50 ± 0.80 - 62.98 ± 1.81 / 46.61 ± 3.01 - 60.12 ± 1.27 / 43.49 ± 2.87 - 35.47 ± 0.80 / 38.01 ± 0.57 - 64.52 ± 0.21 / 16.96 ± 0.28 - 15.15 ± 1.99 / 45.37 ± 3.71 - 9.49 ± 1.78 / 43.92 ± 3.35 - 27.06 ± 2.88 / 49.25 ± 4.63 - 29.91 ± 1.13 / 47.03 ± 0.81 - 37.75 ± 1.52 / 52.97 ± 1.19 - 58.40 ± 2.62 / 40.55 ± 3.65 - 74.30 ± 1.26 / 60.35 ± 0.41 - 0.00 ± 0.00 / 33.37 ± 0.27 - 59.16 ± 1.23 / 65.78 ± 0.72 - 65.36 ± 0.14 / 18.81 ± 0.17 - 35.01 ± 0.99 / 51.07 ± 0.70 - 43.72 ± 0.69 / 57.66 ± 0.50 - 9.3.1 - 9.3.1 - 9.3.1 - 12.5.1 - 12.6.1 - 10.0.1 - 10.0.1 - 9.3.1 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 9.3.1 - 9.3.1 - 9.3.1 - 12.5.1 - 11.0.0 - 9.3.1 - 9.3.1 - ibm-granite/granite-3.0-8b-base (few-shot) 8171 @@ -4398,6 +4278,66 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 13.0.0 13.0.0 + + bineric/NorskGPT-Mistral-7b (few-shot) + 7242 + 32 + 32768 + False + 1,440 ± 352 / 293 ± 99 + 2.59 + 2.41 + 2.91 + 2.45 + 50.76 ± 1.60 / 32.89 ± 2.11 + 40.41 ± 0.79 / 44.17 ± 0.56 + 0.00 ± 0.00 / 33.41 ± 0.23 + 57.26 ± 0.79 / 63.80 ± 0.52 + 66.89 ± 0.71 / 21.43 ± 0.57 + 64.32 ± 1.99 / 73.00 ± 1.51 + 53.18 ± 2.37 / 67.48 ± 1.70 + 43.42 ± 1.05 / 57.50 ± 0.80 + 62.98 ± 1.81 / 46.61 ± 3.01 + 60.12 ± 1.27 / 43.49 ± 2.87 + 35.47 ± 0.80 / 38.01 ± 0.57 + 64.52 ± 0.21 / 16.96 ± 0.28 + 15.15 ± 1.99 / 45.37 ± 3.71 + 9.49 ± 1.78 / 43.92 ± 3.35 + 27.06 ± 2.88 / 49.25 ± 4.63 + 29.91 ± 1.13 / 47.03 ± 0.81 + 37.75 ± 1.52 / 52.97 ± 1.19 + 58.40 ± 2.62 / 40.55 ± 3.65 + 74.30 ± 1.26 / 60.35 ± 0.41 + 0.00 ± 0.00 / 33.37 ± 0.27 + 59.16 ± 1.23 / 65.78 ± 0.72 + 65.36 ± 0.14 / 18.81 ± 0.17 + 35.01 ± 0.99 / 51.07 ± 0.70 + 43.72 ± 0.69 / 57.66 ± 0.50 + 9.3.1 + 9.3.1 + 9.3.1 + 12.5.1 + 12.6.1 + 10.0.1 + 10.0.1 + 9.3.1 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 9.3.1 + 9.3.1 + 9.3.1 + 12.5.1 + 11.0.0 + 9.3.1 + 9.3.1 + Mabeck/Heidrun-Mistral-7B-chat (few-shot) 7242 @@ -4406,8 +4346,8 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 False 1,419 ± 349 / 286 ± 97 2.61 - 2.65 - 2.69 + 2.64 + 2.70 2.49 51.93 ± 1.66 / 34.88 ± 2.57 49.86 ± 1.51 / 65.11 ± 1.52 @@ -4518,6 +4458,66 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 11.0.0 11.0.0 + + meta-llama/Meta-Llama-3-8B (few-shot) + 8030 + 128 + 8192 + True + 1,477 ± 376 / 285 ± 97 + 2.61 + 2.39 + 2.96 + 2.47 + 49.46 ± 1.88 / 32.11 ± 2.41 + 51.16 ± 2.15 / 67.00 ± 1.51 + 23.01 ± 3.93 / 49.99 ± 4.63 + 49.75 ± 5.10 / 56.13 ± 4.89 + 65.26 ± 0.48 / 18.81 ± 0.72 + 51.64 ± 2.09 / 63.34 ± 1.71 + 59.28 ± 1.85 / 72.62 ± 1.29 + 24.30 ± 3.14 / 42.50 ± 2.67 + 62.89 ± 2.01 / 50.34 ± 1.92 + 56.18 ± 3.26 / 47.30 ± 2.62 + 33.07 ± 0.73 / 37.60 ± 0.49 + 61.95 ± 0.69 / 14.09 ± 0.61 + 30.73 ± 3.19 / 64.06 ± 2.41 + 20.57 ± 3.62 / 54.22 ± 5.15 + 30.77 ± 4.33 / 54.66 ± 4.41 + 30.95 ± 1.33 / 47.50 ± 0.99 + 21.98 ± 2.12 / 41.07 ± 1.52 + 59.92 ± 2.46 / 40.98 ± 4.90 + 80.91 ± 0.41 / 78.09 ± 1.22 + 26.39 ± 3.47 / 52.38 ± 4.49 + 47.69 ± 6.29 / 54.30 ± 6.65 + 63.94 ± 0.48 / 17.92 ± 0.37 + 33.39 ± 0.95 / 49.21 ± 0.77 + 20.21 ± 1.46 / 39.29 ± 1.54 + 14.0.3 + 14.1.2 + 14.1.2 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.0.4 + 14.1.2 + 14.1.2 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + merge-crew/da-sv-dare-ties-density-0.6 (few-shot, val) 7242 @@ -4528,7 +4528,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 2.62 2.48 2.87 - 2.50 + 2.51 46.03 ± 3.93 / 34.23 ± 2.86 49.59 ± 3.26 / 63.45 ± 2.61 12.72 ± 3.51 / 46.56 ± 5.33 @@ -4587,7 +4587,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 5,054 ± 1,200 / 1,056 ± 339 2.62 2.52 - 2.92 + 2.91 2.42 42.43 ± 3.36 / 29.30 ± 2.53 47.82 ± 2.00 / 63.19 ± 2.09 @@ -4707,7 +4707,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 3,004 ± 771 / 534 ± 179 2.64 2.54 - 2.91 + 2.90 2.48 46.59 ± 4.16 / 31.16 ± 3.95 50.25 ± 1.25 / 66.18 ± 1.01 @@ -4818,66 +4818,6 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 13.0.0 13.0.0 - - mhenrichsen/hestenettetLM (few-shot) - 7242 - 32 - 32768 - True - 1,151 ± 294 / 227 ± 76 - 2.66 - 2.53 - 2.91 - 2.55 - 44.90 ± 3.15 / 31.91 ± 2.65 - 42.61 ± 1.79 / 53.47 ± 3.00 - 8.65 ± 3.44 / 38.18 ± 4.21 - 59.62 ± 1.12 / 64.70 ± 0.75 - 66.48 ± 0.99 / 22.13 ± 1.09 - 52.83 ± 1.98 / 64.27 ± 1.66 - 57.96 ± 2.76 / 71.31 ± 1.89 - 18.11 ± 3.15 / 37.65 ± 2.44 - 52.52 ± 1.85 / 43.46 ± 2.21 - 55.60 ± 3.22 / 45.25 ± 4.20 - 48.23 ± 3.31 / 65.51 ± 3.01 - 63.53 ± 1.47 / 16.54 ± 1.59 - 8.53 ± 3.72 / 38.61 ± 3.22 - 6.65 ± 1.40 / 39.32 ± 2.51 - 46.89 ± 3.29 / 70.96 ± 2.84 - 27.67 ± 0.91 / 45.77 ± 0.66 - 14.20 ± 3.45 / 34.89 ± 2.57 - 53.00 ± 2.53 / 39.09 ± 3.72 - 79.70 ± 0.65 / 79.45 ± 0.68 - 4.32 ± 2.19 / 34.43 ± 0.87 - 59.03 ± 1.03 / 64.74 ± 0.84 - 64.89 ± 0.28 / 19.31 ± 0.40 - 35.48 ± 0.99 / 51.54 ± 0.72 - 20.54 ± 2.14 / 39.66 ± 1.80 - 12.5.2 - 12.3.2 - 12.3.2 - 12.3.2 - 12.6.1 - 12.3.2 - 12.3.2 - 12.3.2 - 12.5.2 - 12.5.2 - 12.3.2 - 12.3.2 - 12.3.2 - 12.3.2 - 12.3.2 - 12.3.2 - 12.3.2 - 12.5.2 - 12.3.2 - 12.3.2 - 12.3.2 - 12.3.2 - 12.3.2 - 12.3.2 - mistralai/Mistral-7B-v0.3 (few-shot) 7248 @@ -4887,7 +4827,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 1,364 ± 343 / 266 ± 90 2.66 2.43 - 2.98 + 2.99 2.57 43.60 ± 2.94 / 32.17 ± 2.26 45.92 ± 1.50 / 61.91 ± 1.50 @@ -5007,7 +4947,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 1,841 ± 297 / 651 ± 193 2.67 2.45 - 2.98 + 2.99 2.57 43.65 ± 2.87 / 32.21 ± 2.13 45.86 ± 1.63 / 61.89 ± 1.57 @@ -5067,7 +5007,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 2,856 ± 645 / 709 ± 243 2.67 2.53 - 2.81 + 2.82 2.67 44.17 ± 3.14 / 31.63 ± 3.03 44.28 ± 1.94 / 55.59 ± 3.06 @@ -5119,64 +5059,64 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 12.10.4 - mistralai/Mistral-7B-v0.1 (few-shot) + mhenrichsen/hestenettetLM (few-shot) 7242 32 32768 True - 1,446 ± 354 / 295 ± 100 + 1,151 ± 294 / 227 ± 76 2.67 2.53 - 2.94 - 2.55 - 45.42 ± 2.88 / 32.66 ± 2.49 - 43.16 ± 1.69 / 54.53 ± 2.83 - 8.79 ± 3.23 / 38.38 ± 4.22 - 59.43 ± 1.04 / 64.55 ± 0.68 - 66.47 ± 1.00 / 22.11 ± 1.08 - 53.26 ± 1.94 / 64.50 ± 1.68 - 58.26 ± 2.62 / 71.56 ± 1.79 - 18.53 ± 2.03 / 37.79 ± 1.68 - 52.00 ± 1.91 / 43.55 ± 2.21 - 55.12 ± 3.14 / 45.34 ± 4.15 - 47.25 ± 4.11 / 64.53 ± 3.71 - 63.49 ± 1.49 / 16.48 ± 1.62 - 8.66 ± 4.12 / 38.87 ± 3.40 - 6.80 ± 1.59 / 39.72 ± 2.50 - 46.86 ± 3.27 / 70.86 ± 2.79 - 27.78 ± 1.08 / 45.76 ± 0.79 - 10.88 ± 3.63 / 32.43 ± 2.67 - 53.34 ± 2.55 / 40.48 ± 3.66 - 80.00 ± 0.70 / 79.80 ± 0.66 - 4.61 ± 2.18 / 34.51 ± 0.86 - 58.99 ± 1.05 / 64.65 ± 0.83 - 64.87 ± 0.31 / 19.30 ± 0.43 - 35.52 ± 1.01 / 51.52 ± 0.73 - 19.67 ± 2.31 / 38.98 ± 1.98 - 0.0.0 - 0.0.0 - 0.0.0 - 12.5.1 + 2.91 + 2.56 + 44.90 ± 3.15 / 31.91 ± 2.65 + 42.61 ± 1.79 / 53.47 ± 3.00 + 8.65 ± 3.44 / 38.18 ± 4.21 + 59.62 ± 1.12 / 64.70 ± 0.75 + 66.48 ± 0.99 / 22.13 ± 1.09 + 52.83 ± 1.98 / 64.27 ± 1.66 + 57.96 ± 2.76 / 71.31 ± 1.89 + 18.11 ± 3.15 / 37.65 ± 2.44 + 52.52 ± 1.85 / 43.46 ± 2.21 + 55.60 ± 3.22 / 45.25 ± 4.20 + 48.23 ± 3.31 / 65.51 ± 3.01 + 63.53 ± 1.47 / 16.54 ± 1.59 + 8.53 ± 3.72 / 38.61 ± 3.22 + 6.65 ± 1.40 / 39.32 ± 2.51 + 46.89 ± 3.29 / 70.96 ± 2.84 + 27.67 ± 0.91 / 45.77 ± 0.66 + 14.20 ± 3.45 / 34.89 ± 2.57 + 53.00 ± 2.53 / 39.09 ± 3.72 + 79.70 ± 0.65 / 79.45 ± 0.68 + 4.32 ± 2.19 / 34.43 ± 0.87 + 59.03 ± 1.03 / 64.74 ± 0.84 + 64.89 ± 0.28 / 19.31 ± 0.40 + 35.48 ± 0.99 / 51.54 ± 0.72 + 20.54 ± 2.14 / 39.66 ± 1.80 + 12.5.2 + 12.3.2 + 12.3.2 + 12.3.2 12.6.1 - 10.0.1 - 10.0.1 - 9.1.2 - 0.0.0 - 0.0.0 - 0.0.0 - 11.0.0 - 0.0.0 - 0.0.0 - 12.5.1 - 9.1.2 - 9.1.2 - 0.0.0 - 0.0.0 - 0.0.0 - 12.5.1 - 11.0.0 - 9.1.2 - 9.1.2 + 12.3.2 + 12.3.2 + 12.3.2 + 12.5.2 + 12.5.2 + 12.3.2 + 12.3.2 + 12.3.2 + 12.3.2 + 12.3.2 + 12.3.2 + 12.3.2 + 12.5.2 + 12.3.2 + 12.3.2 + 12.3.2 + 12.3.2 + 12.3.2 + 12.3.2 meta-llama/Llama-2-13b-chat-hf (few-shot) @@ -5238,6 +5178,66 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 12.10.4 12.10.4 + + mistralai/Mistral-7B-v0.1 (few-shot) + 7242 + 32 + 32768 + True + 1,446 ± 354 / 295 ± 100 + 2.68 + 2.53 + 2.95 + 2.56 + 45.42 ± 2.88 / 32.66 ± 2.49 + 43.16 ± 1.69 / 54.53 ± 2.83 + 8.79 ± 3.23 / 38.38 ± 4.22 + 59.43 ± 1.04 / 64.55 ± 0.68 + 66.47 ± 1.00 / 22.11 ± 1.08 + 53.26 ± 1.94 / 64.50 ± 1.68 + 58.26 ± 2.62 / 71.56 ± 1.79 + 18.53 ± 2.03 / 37.79 ± 1.68 + 52.00 ± 1.91 / 43.55 ± 2.21 + 55.12 ± 3.14 / 45.34 ± 4.15 + 47.25 ± 4.11 / 64.53 ± 3.71 + 63.49 ± 1.49 / 16.48 ± 1.62 + 8.66 ± 4.12 / 38.87 ± 3.40 + 6.80 ± 1.59 / 39.72 ± 2.50 + 46.86 ± 3.27 / 70.86 ± 2.79 + 27.78 ± 1.08 / 45.76 ± 0.79 + 10.88 ± 3.63 / 32.43 ± 2.67 + 53.34 ± 2.55 / 40.48 ± 3.66 + 80.00 ± 0.70 / 79.80 ± 0.66 + 4.61 ± 2.18 / 34.51 ± 0.86 + 58.99 ± 1.05 / 64.65 ± 0.83 + 64.87 ± 0.31 / 19.30 ± 0.43 + 35.52 ± 1.01 / 51.52 ± 0.73 + 19.67 ± 2.31 / 38.98 ± 1.98 + 0.0.0 + 0.0.0 + 0.0.0 + 12.5.1 + 12.6.1 + 10.0.1 + 10.0.1 + 9.1.2 + 0.0.0 + 0.0.0 + 0.0.0 + 11.0.0 + 0.0.0 + 0.0.0 + 12.5.1 + 9.1.2 + 9.1.2 + 0.0.0 + 0.0.0 + 0.0.0 + 12.5.1 + 11.0.0 + 9.1.2 + 9.1.2 + CohereForAI/aya-23-8B (few-shot) 8028 @@ -5367,7 +5367,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 3,254 ± 1,068 / 484 ± 173 2.74 2.54 - 2.97 + 2.98 2.71 43.17 ± 2.78 / 31.37 ± 2.95 43.40 ± 2.20 / 57.24 ± 3.52 @@ -5485,9 +5485,9 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 4096 True 2,898 ± 637 / 736 ± 236 - 2.77 + 2.76 2.68 - 3.07 + 3.06 2.55 41.28 ± 3.92 / 31.98 ± 3.26 23.01 ± 3.87 / 36.55 ± 6.42 @@ -5545,9 +5545,9 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 8193 True 5,374 ± 1,233 / 1,193 ± 377 - 2.78 + 2.77 2.59 - 3.03 + 3.02 2.71 28.22 ± 1.66 / 19.95 ± 1.55 47.11 ± 1.36 / 63.36 ± 1.39 @@ -5605,9 +5605,9 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 4096 False 5,384 ± 879 / 1,746 ± 553 - 2.81 + 2.80 2.69 - 2.88 + 2.87 2.85 41.63 ± 2.33 / 28.51 ± 2.43 47.73 ± 1.52 / 60.64 ± 2.33 @@ -5787,7 +5787,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 1,220 ± 411 / 158 ± 53 2.85 2.82 - 2.92 + 2.91 2.82 50.83 ± 1.31 / 41.56 ± 1.86 53.23 ± 2.63 / 64.70 ± 3.82 @@ -5967,7 +5967,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 2,219 ± 427 / 717 ± 224 2.88 2.75 - 3.13 + 3.12 2.76 37.93 ± 3.09 / 29.50 ± 2.18 44.62 ± 1.98 / 62.62 ± 1.54 @@ -6087,7 +6087,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 3,161 ± 676 / 1,247 ± 481 2.90 2.83 - 3.07 + 3.06 2.80 49.01 ± 1.94 / 29.13 ± 2.09 47.95 ± 1.37 / 64.82 ± 0.89 @@ -6147,7 +6147,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 2,996 ± 817 / 284 ± 96 2.91 2.77 - 3.20 + 3.21 2.76 27.60 ± 3.21 / 25.68 ± 2.70 37.08 ± 2.51 / 54.37 ± 2.72 @@ -6268,7 +6268,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 2.98 2.76 3.27 - 2.91 + 2.92 35.44 ± 3.00 / 24.63 ± 1.65 44.88 ± 1.45 / 62.35 ± 1.33 9.74 ± 1.96 / 47.42 ± 4.19 @@ -6448,7 +6448,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 3.01 3.05 3.22 - 2.75 + 2.76 35.21 ± 2.52 / 23.65 ± 2.52 12.73 ± 2.87 / 22.69 ± 1.80 4.75 ± 2.45 / 35.71 ± 3.01 @@ -6507,7 +6507,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 10,194 ± 2,403 / 2,193 ± 731 3.01 2.88 - 3.19 + 3.20 2.96 41.79 ± 2.11 / 32.67 ± 2.33 41.86 ± 1.28 / 61.22 ± 0.99 @@ -6558,6 +6558,66 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 13.0.0 13.0.0 + + microsoft/Phi-3-mini-4k-instruct (few-shot) + 3821 + 32 + 4096 + True + 8,681 ± 1,650 / 2,177 ± 717 + 3.01 + 2.83 + 3.25 + 2.96 + 41.37 ± 2.50 / 24.64 ± 2.50 + 42.60 ± 1.06 / 61.52 ± 0.75 + 6.52 ± 1.34 / 45.01 ± 2.64 + 50.57 ± 1.03 / 57.75 ± 0.64 + 64.55 ± 0.78 / 19.26 ± 0.48 + 38.64 ± 1.52 / 53.92 ± 1.16 + 42.12 ± 2.16 / 61.23 ± 1.46 + 13.66 ± 1.32 / 35.16 ± 1.02 + 56.33 ± 1.63 / 36.68 ± 3.27 + 54.68 ± 1.29 / 37.85 ± 3.79 + 37.18 ± 1.30 / 55.44 ± 1.46 + 61.44 ± 0.71 / 13.62 ± 0.60 + 6.76 ± 2.81 / 41.69 ± 2.82 + 6.79 ± 1.51 / 45.45 ± 3.51 + 30.11 ± 2.09 / 52.56 ± 2.38 + 15.54 ± 0.89 / 36.69 ± 0.67 + 17.55 ± 0.88 / 37.93 ± 0.71 + 46.15 ± 2.77 / 24.28 ± 3.74 + 67.17 ± 1.93 / 70.99 ± 1.64 + 5.30 ± 1.62 / 47.01 ± 3.23 + 51.12 ± 1.02 / 57.49 ± 0.81 + 59.20 ± 0.99 / 15.57 ± 0.62 + 21.33 ± 1.03 / 41.04 ± 0.78 + 16.12 ± 1.15 / 36.99 ± 0.85 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.4 + 12.10.4 + 12.10.4 + 12.10.4 + 12.10.4 + 12.10.4 + 12.10.4 + 12.10.4 + 12.10.4 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + timpal0l/Mistral-7B-v0.1-flashback-v2-instruct (few-shot) 7242 @@ -6618,66 +6678,6 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 12.3.2 12.3.2 - - microsoft/Phi-3-mini-4k-instruct (few-shot) - 3821 - 32 - 4096 - True - 3,194 ± 687 / 650 ± 216 - 3.02 - 2.83 - 3.26 - 2.96 - 41.37 ± 2.50 / 24.64 ± 2.50 - 42.60 ± 1.06 / 61.52 ± 0.75 - 6.52 ± 1.34 / 45.01 ± 2.64 - 50.57 ± 1.03 / 57.75 ± 0.64 - 64.55 ± 0.78 / 19.26 ± 0.48 - 38.64 ± 1.52 / 53.92 ± 1.16 - 42.12 ± 2.16 / 61.23 ± 1.46 - 13.66 ± 1.32 / 35.16 ± 1.02 - 56.33 ± 1.63 / 36.68 ± 3.27 - 54.68 ± 1.29 / 37.85 ± 3.79 - 37.18 ± 1.30 / 55.44 ± 1.46 - 61.44 ± 0.71 / 13.62 ± 0.60 - 6.76 ± 2.81 / 41.69 ± 2.82 - 6.79 ± 1.51 / 45.45 ± 3.51 - 30.11 ± 2.09 / 52.56 ± 2.38 - 15.54 ± 0.89 / 36.69 ± 0.67 - 17.55 ± 0.88 / 37.93 ± 0.71 - 46.15 ± 2.77 / 24.28 ± 3.74 - 67.17 ± 1.93 / 70.99 ± 1.64 - 5.30 ± 1.62 / 47.01 ± 3.23 - 51.12 ± 1.02 / 57.49 ± 0.81 - 59.20 ± 0.99 / 15.57 ± 0.62 - 21.33 ± 1.03 / 41.04 ± 0.78 - 16.12 ± 1.15 / 36.99 ± 0.85 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.4 - 12.10.4 - 12.10.4 - 12.10.4 - 12.10.4 - 12.10.4 - 12.10.4 - 12.10.4 - 12.10.4 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - emillykkejensen/Phi-3-mini-4k-instruct-dansk (few-shot) 3821 @@ -6925,9 +6925,9 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 32768 True 2,461 ± 476 / 773 ± 248 - 3.06 + 3.07 2.88 - 3.36 + 3.37 2.95 30.16 ± 4.47 / 25.03 ± 3.01 48.49 ± 2.41 / 63.06 ± 1.91 @@ -7047,7 +7047,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 409 ± 53 / 182 ± 54 3.11 2.85 - 3.49 + 3.48 3.00 26.57 ± 5.22 / 17.35 ± 2.79 47.81 ± 1.32 / 64.26 ± 1.28 @@ -7107,7 +7107,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 6,450 ± 961 / 2,082 ± 658 3.13 2.92 - 3.38 + 3.39 3.08 38.39 ± 3.57 / 24.87 ± 2.52 49.44 ± 1.03 / 66.00 ± 0.88 @@ -7286,7 +7286,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 True 3,371 ± 876 / 561 ± 184 3.15 - 3.11 + 3.10 3.42 2.93 33.80 ± 2.66 / 25.32 ± 3.06 @@ -7352,87 +7352,27 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 32.34 ± 3.77 / 24.48 ± 3.17 29.50 ± 3.63 / 42.61 ± 4.86 3.89 ± 1.49 / 37.29 ± 3.65 - 53.67 ± 0.84 / 59.15 ± 0.69 - 64.48 ± 0.83 / 19.25 ± 0.78 - 24.40 ± 2.55 / 42.44 ± 1.87 - 31.93 ± 1.90 / 54.43 ± 1.22 - 10.00 ± 1.73 / 32.31 ± 0.87 - 43.00 ± 2.81 / 35.39 ± 2.28 - 45.08 ± 0.83 / 38.16 ± 1.91 - 35.36 ± 2.31 / 54.88 ± 2.08 - 62.00 ± 0.62 / 14.22 ± 0.76 - 2.79 ± 1.92 / 41.90 ± 4.62 - 1.95 ± 2.02 / 38.91 ± 3.27 - 37.33 ± 3.11 / 59.74 ± 2.74 - 15.76 ± 1.18 / 36.68 ± 0.90 - 12.98 ± 1.32 / 34.07 ± 1.34 - 36.54 ± 2.70 / 28.79 ± 3.85 - 68.85 ± 5.19 / 70.02 ± 3.95 - 2.60 ± 2.58 / 40.21 ± 4.08 - 54.58 ± 0.89 / 59.78 ± 0.77 - 61.77 ± 1.85 / 17.01 ± 0.69 - 16.19 ± 1.01 / 37.23 ± 0.78 - 14.06 ± 1.52 / 34.80 ± 1.35 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - - - TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot) - 7800 - 100 - 4096 - True - 6,197 ± 1,118 / 1,730 ± 577 - 3.16 - 3.00 - 3.44 - 3.05 - 34.25 ± 2.28 / 30.39 ± 2.14 - 45.67 ± 2.41 / 58.41 ± 3.04 - 10.62 ± 2.37 / 53.20 ± 3.73 - 50.77 ± 2.48 / 56.92 ± 2.42 - 65.67 ± 1.05 / 19.97 ± 0.93 - 2.31 ± 1.92 / 24.65 ± 1.65 - 10.57 ± 2.47 / 40.33 ± 1.67 - 0.64 ± 1.55 / 25.40 ± 0.77 - 42.77 ± 2.42 / 40.31 ± 2.34 - 45.69 ± 3.69 / 43.11 ± 3.62 - 37.79 ± 2.38 / 56.41 ± 2.91 - 61.05 ± 1.44 / 13.39 ± 1.24 - 8.77 ± 1.96 / 49.11 ± 4.22 - 8.47 ± 3.16 / 49.65 ± 4.39 - 44.24 ± 4.15 / 65.11 ± 3.79 - -1.34 ± 1.57 / 22.60 ± 0.91 - -0.94 ± 1.03 / 24.17 ± 0.64 - 42.87 ± 3.17 / 40.34 ± 2.52 - 79.18 ± 0.46 / 76.66 ± 1.44 - 8.65 ± 1.60 / 46.95 ± 3.15 - 51.56 ± 0.79 / 57.58 ± 0.79 - 64.66 ± 0.41 / 18.04 ± 0.38 - 2.37 ± 1.04 / 24.51 ± 0.95 - 0.17 ± 0.83 / 25.26 ± 0.69 + 53.67 ± 0.84 / 59.15 ± 0.69 + 64.48 ± 0.83 / 19.25 ± 0.78 + 24.40 ± 2.55 / 42.44 ± 1.87 + 31.93 ± 1.90 / 54.43 ± 1.22 + 10.00 ± 1.73 / 32.31 ± 0.87 + 43.00 ± 2.81 / 35.39 ± 2.28 + 45.08 ± 0.83 / 38.16 ± 1.91 + 35.36 ± 2.31 / 54.88 ± 2.08 + 62.00 ± 0.62 / 14.22 ± 0.76 + 2.79 ± 1.92 / 41.90 ± 4.62 + 1.95 ± 2.02 / 38.91 ± 3.27 + 37.33 ± 3.11 / 59.74 ± 2.74 + 15.76 ± 1.18 / 36.68 ± 0.90 + 12.98 ± 1.32 / 34.07 ± 1.34 + 36.54 ± 2.70 / 28.79 ± 3.85 + 68.85 ± 5.19 / 70.02 ± 3.95 + 2.60 ± 2.58 / 40.21 ± 4.08 + 54.58 ± 0.89 / 59.78 ± 0.77 + 61.77 ± 1.85 / 17.01 ± 0.69 + 16.19 ± 1.01 / 37.23 ± 0.78 + 14.06 ± 1.52 / 34.80 ± 1.35 13.0.0 13.0.0 13.0.0 @@ -7518,6 +7458,66 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 12.10.0 12.10.0 + + TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot) + 7800 + 100 + 4096 + True + 6,197 ± 1,118 / 1,730 ± 577 + 3.17 + 3.00 + 3.45 + 3.05 + 34.25 ± 2.28 / 30.39 ± 2.14 + 45.67 ± 2.41 / 58.41 ± 3.04 + 10.62 ± 2.37 / 53.20 ± 3.73 + 50.77 ± 2.48 / 56.92 ± 2.42 + 65.67 ± 1.05 / 19.97 ± 0.93 + 2.31 ± 1.92 / 24.65 ± 1.65 + 10.57 ± 2.47 / 40.33 ± 1.67 + 0.64 ± 1.55 / 25.40 ± 0.77 + 42.77 ± 2.42 / 40.31 ± 2.34 + 45.69 ± 3.69 / 43.11 ± 3.62 + 37.79 ± 2.38 / 56.41 ± 2.91 + 61.05 ± 1.44 / 13.39 ± 1.24 + 8.77 ± 1.96 / 49.11 ± 4.22 + 8.47 ± 3.16 / 49.65 ± 4.39 + 44.24 ± 4.15 / 65.11 ± 3.79 + -1.34 ± 1.57 / 22.60 ± 0.91 + -0.94 ± 1.03 / 24.17 ± 0.64 + 42.87 ± 3.17 / 40.34 ± 2.52 + 79.18 ± 0.46 / 76.66 ± 1.44 + 8.65 ± 1.60 / 46.95 ± 3.15 + 51.56 ± 0.79 / 57.58 ± 0.79 + 64.66 ± 0.41 / 18.04 ± 0.38 + 2.37 ± 1.04 / 24.51 ± 0.95 + 0.17 ± 0.83 / 25.26 ± 0.69 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + google/gemma-2-2b (few-shot) 2614 @@ -7525,8 +7525,8 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 8193 True 5,235 ± 1,226 / 1,154 ± 366 - 3.17 - 3.01 + 3.18 + 3.02 3.51 3.00 17.29 ± 2.84 / 13.87 ± 2.03 @@ -7585,9 +7585,9 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 2048 True 1,875 ± 673 / 261 ± 91 - 3.19 + 3.20 3.15 - 3.36 + 3.37 3.07 27.41 ± 3.48 / 19.03 ± 1.76 30.23 ± 3.43 / 41.05 ± 4.38 @@ -7645,8 +7645,8 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 4096 True 4,438 ± 1,128 / 1,028 ± 346 - 3.19 - 3.01 + 3.20 + 3.03 3.42 3.15 16.72 ± 2.23 / 15.96 ± 2.08 @@ -8187,7 +8187,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 840 ± 79 / 400 ± 124 3.39 3.14 - 3.79 + 3.80 3.23 28.60 ± 4.69 / 20.29 ± 3.37 48.71 ± 1.27 / 60.90 ± 2.95 @@ -8298,66 +8298,6 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 13.0.0 13.0.0 - - google/gemma-2b (few-shot) - 2506 - 256 - 8192 - True - 6,087 ± 1,046 / 1,902 ± 563 - 3.42 - 3.19 - 3.76 - 3.32 - 19.97 ± 3.91 / 16.51 ± 3.20 - 40.21 ± 1.00 / 46.73 ± 1.82 - 2.27 ± 2.39 / 38.71 ± 4.03 - 50.55 ± 1.22 / 56.27 ± 1.09 - 63.07 ± 1.07 / 16.98 ± 0.98 - 15.04 ± 1.21 / 35.51 ± 0.80 - 30.63 ± 3.36 / 50.02 ± 2.13 - 4.90 ± 0.95 / 28.18 ± 0.88 - 15.53 ± 5.69 / 15.49 ± 5.08 - 19.78 ± 4.54 / 18.86 ± 4.22 - 32.89 ± 1.65 / 42.58 ± 3.16 - 57.65 ± 2.16 / 10.13 ± 1.30 - 1.18 ± 1.00 / 33.34 ± 0.26 - 0.00 ± 0.00 / 32.79 ± 0.34 - 33.33 ± 3.73 / 53.15 ± 4.42 - 11.27 ± 1.41 / 32.73 ± 1.25 - 5.10 ± 1.44 / 28.63 ± 1.05 - 14.67 ± 4.71 / 14.85 ± 3.77 - 75.45 ± 1.10 / 64.08 ± 1.47 - 3.82 ± 1.23 / 44.81 ± 3.55 - 51.73 ± 0.88 / 57.35 ± 0.82 - 59.72 ± 1.46 / 15.26 ± 0.64 - 10.98 ± 0.98 / 31.92 ± 0.80 - 4.24 ± 0.47 / 27.53 ± 0.44 - 12.5.2 - 12.1.0 - 12.1.0 - 12.1.0 - 12.6.1 - 12.1.0 - 12.1.0 - 12.1.0 - 12.5.2 - 12.5.2 - 12.1.0 - 12.1.0 - 12.1.0 - 12.1.0 - 12.1.0 - 12.1.0 - 12.1.0 - 12.5.2 - 12.1.0 - 12.1.0 - 12.1.0 - 12.1.0 - 12.1.0 - 12.1.0 - ibm-granite/granite-3b-code-instruct-2k (few-shot) 3483 @@ -8538,6 +8478,66 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 12.10.0 12.10.0 + + google/gemma-2b (few-shot) + 2506 + 256 + 8192 + True + 6,087 ± 1,046 / 1,902 ± 563 + 3.44 + 3.23 + 3.76 + 3.32 + 19.97 ± 3.91 / 16.51 ± 3.20 + 40.21 ± 1.00 / 46.73 ± 1.82 + 2.27 ± 2.39 / 38.71 ± 4.03 + 50.55 ± 1.22 / 56.27 ± 1.09 + 63.07 ± 1.07 / 16.98 ± 0.98 + 15.04 ± 1.21 / 35.51 ± 0.80 + 30.63 ± 3.36 / 50.02 ± 2.13 + 4.90 ± 0.95 / 28.18 ± 0.88 + 15.53 ± 5.69 / 15.49 ± 5.08 + 19.78 ± 4.54 / 18.86 ± 4.22 + 32.89 ± 1.65 / 42.58 ± 3.16 + 57.65 ± 2.16 / 10.13 ± 1.30 + 1.18 ± 1.00 / 33.34 ± 0.26 + 0.00 ± 0.00 / 32.79 ± 0.34 + 33.33 ± 3.73 / 53.15 ± 4.42 + 11.27 ± 1.41 / 32.73 ± 1.25 + 5.10 ± 1.44 / 28.63 ± 1.05 + 14.67 ± 4.71 / 14.85 ± 3.77 + 75.45 ± 1.10 / 64.08 ± 1.47 + 3.82 ± 1.23 / 44.81 ± 3.55 + 51.73 ± 0.88 / 57.35 ± 0.82 + 59.72 ± 1.46 / 15.26 ± 0.64 + 10.98 ± 0.98 / 31.92 ± 0.80 + 4.24 ± 0.47 / 27.53 ± 0.44 + 12.5.2 + 12.1.0 + 12.1.0 + 12.1.0 + 12.6.1 + 12.1.0 + 12.1.0 + 12.1.0 + 12.5.2 + 12.5.2 + 12.1.0 + 12.1.0 + 12.1.0 + 12.1.0 + 12.1.0 + 12.1.0 + 12.1.0 + 12.5.2 + 12.1.0 + 12.1.0 + 12.1.0 + 12.1.0 + 12.1.0 + 12.1.0 + ibm-granite/granite-3.0-3b-a800m-base (few-shot) 3374 @@ -8546,9 +8546,9 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 True 10,504 ± 3,028 / 1,678 ± 559 3.45 - 3.45 + 3.44 3.63 - 3.27 + 3.28 31.80 ± 2.87 / 23.06 ± 2.09 6.85 ± 2.25 / 19.42 ± 0.91 0.97 ± 1.10 / 36.48 ± 3.91 @@ -8666,7 +8666,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 True 7,259 ± 2,120 / 1,240 ± 432 3.47 - 3.34 + 3.33 3.82 3.25 28.45 ± 1.61 / 22.90 ± 1.63 @@ -8905,8 +8905,8 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 4096 False 3,027 ± 503 / 903 ± 296 - 3.52 - 3.37 + 3.51 + 3.35 3.75 3.43 13.78 ± 2.85 / 11.90 ± 2.13 @@ -8966,7 +8966,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 True 2,732 ± 868 / 662 ± 238 3.52 - 3.16 + 3.15 4.00 3.40 38.62 ± 3.40 / 27.71 ± 3.01 @@ -9018,6 +9018,66 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 13.0.0 13.0.0 + + LumiOpen/Viking-7B (few-shot) + 7550 + 131 + 4096 + True + 1,431 ± 352 / 287 ± 97 + 3.53 + 3.36 + 3.88 + 3.36 + 23.98 ± 3.74 / 17.18 ± 2.86 + 38.74 ± 2.15 / 49.48 ± 3.14 + 1.04 ± 1.57 / 33.67 ± 0.22 + 50.17 ± 0.92 / 56.29 ± 0.78 + 61.96 ± 0.87 / 13.71 ± 1.26 + -0.06 ± 0.90 / 23.97 ± 0.72 + -1.04 ± 2.16 / 34.90 ± 1.48 + 0.73 ± 0.90 / 25.11 ± 0.51 + 22.37 ± 5.96 / 23.75 ± 3.97 + 29.90 ± 4.44 / 26.84 ± 3.45 + 35.86 ± 3.52 / 52.28 ± 3.76 + 53.25 ± 1.69 / 6.87 ± 0.67 + 1.03 ± 2.07 / 36.12 ± 2.97 + 2.92 ± 1.89 / 36.47 ± 2.72 + 34.39 ± 3.15 / 54.65 ± 3.56 + -1.16 ± 0.91 / 21.94 ± 0.46 + -0.55 ± 1.14 / 25.09 ± 0.85 + 30.64 ± 4.19 / 23.90 ± 3.44 + 72.02 ± 3.18 / 72.36 ± 3.96 + 1.08 ± 1.36 / 38.63 ± 3.03 + 48.72 ± 1.05 / 54.59 ± 1.10 + 57.93 ± 2.32 / 13.16 ± 1.05 + 1.14 ± 0.79 / 22.39 ± 0.53 + 1.13 ± 1.01 / 25.61 ± 0.65 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + HPLT/gpt-13b-nordic-prerelease (few-shot) 14030 @@ -9078,66 +9138,6 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 12.6.1 12.6.1 - - LumiOpen/Viking-7B (few-shot) - 7550 - 131 - 4096 - True - 1,431 ± 352 / 287 ± 97 - 3.54 - 3.37 - 3.88 - 3.36 - 23.98 ± 3.74 / 17.18 ± 2.86 - 38.74 ± 2.15 / 49.48 ± 3.14 - 1.04 ± 1.57 / 33.67 ± 0.22 - 50.17 ± 0.92 / 56.29 ± 0.78 - 61.96 ± 0.87 / 13.71 ± 1.26 - -0.06 ± 0.90 / 23.97 ± 0.72 - -1.04 ± 2.16 / 34.90 ± 1.48 - 0.73 ± 0.90 / 25.11 ± 0.51 - 22.37 ± 5.96 / 23.75 ± 3.97 - 29.90 ± 4.44 / 26.84 ± 3.45 - 35.86 ± 3.52 / 52.28 ± 3.76 - 53.25 ± 1.69 / 6.87 ± 0.67 - 1.03 ± 2.07 / 36.12 ± 2.97 - 2.92 ± 1.89 / 36.47 ± 2.72 - 34.39 ± 3.15 / 54.65 ± 3.56 - -1.16 ± 0.91 / 21.94 ± 0.46 - -0.55 ± 1.14 / 25.09 ± 0.85 - 30.64 ± 4.19 / 23.90 ± 3.44 - 72.02 ± 3.18 / 72.36 ± 3.96 - 1.08 ± 1.36 / 38.63 ± 3.03 - 48.72 ± 1.05 / 54.59 ± 1.10 - 57.93 ± 2.32 / 13.16 ± 1.05 - 1.14 ± 0.79 / 22.39 ± 0.53 - 1.13 ± 1.01 / 25.61 ± 0.65 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - AI-Sweden-Models/gpt-sw3-6.7b (few-shot) 7111 @@ -9205,9 +9205,9 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 8192 True 15,971 ± 3,654 / 3,609 ± 1,197 - 3.60 + 3.61 3.44 - 4.00 + 4.01 3.37 29.44 ± 1.81 / 20.31 ± 1.68 18.49 ± 2.47 / 35.29 ± 2.83 @@ -9265,9 +9265,9 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 8192 True 16,249 ± 3,690 / 3,689 ± 1,226 - 3.63 + 3.64 3.59 - 3.98 + 3.99 3.33 24.47 ± 3.42 / 18.70 ± 2.18 9.93 ± 2.70 / 23.57 ± 3.00 @@ -9318,66 +9318,6 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 13.1.0 13.1.0 - - meta-llama/Llama-3.2-1B (few-shot) - 1236 - 128 - 131073 - True - 7,577 ± 1,884 / 1,555 ± 492 - 3.64 - 3.49 - 3.98 - 3.46 - 19.82 ± 4.70 / 17.20 ± 3.57 - 35.97 ± 3.00 / 49.88 ± 3.80 - 2.14 ± 2.61 / 44.16 ± 4.48 - 46.59 ± 5.44 / 51.92 ± 6.14 - 58.65 ± 1.07 / 11.57 ± 0.77 - 2.65 ± 1.26 / 25.17 ± 1.16 - 9.51 ± 2.67 / 40.41 ± 1.62 - -0.88 ± 1.13 / 24.79 ± 0.98 - 30.54 ± 3.75 / 29.88 ± 3.25 - 31.34 ± 4.72 / 30.46 ± 4.56 - 29.50 ± 4.18 / 49.19 ± 4.59 - 53.31 ± 0.98 / 7.37 ± 0.54 - -0.13 ± 1.28 / 37.46 ± 3.25 - 0.02 ± 1.75 / 39.49 ± 4.41 - 19.59 ± 5.61 / 34.02 ± 8.33 - 2.49 ± 1.09 / 25.63 ± 0.78 - 2.53 ± 1.16 / 26.30 ± 0.82 - 29.89 ± 7.13 / 27.65 ± 6.45 - 74.33 ± 1.07 / 73.73 ± 1.77 - 1.06 ± 1.79 / 43.95 ± 3.08 - 46.89 ± 2.72 / 52.70 ± 3.39 - 52.06 ± 2.11 / 12.53 ± 0.81 - 0.93 ± 1.44 / 26.10 ± 1.04 - 0.09 ± 1.37 / 24.84 ± 0.69 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - google/gemma-2b-it (few-shot) 2506 @@ -9438,6 +9378,66 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 12.1.0 12.1.0 + + meta-llama/Llama-3.2-1B (few-shot) + 1236 + 128 + 131073 + True + 7,577 ± 1,884 / 1,555 ± 492 + 3.65 + 3.52 + 3.98 + 3.46 + 19.82 ± 4.70 / 17.20 ± 3.57 + 35.97 ± 3.00 / 49.88 ± 3.80 + 2.14 ± 2.61 / 44.16 ± 4.48 + 46.59 ± 5.44 / 51.92 ± 6.14 + 58.65 ± 1.07 / 11.57 ± 0.77 + 2.65 ± 1.26 / 25.17 ± 1.16 + 9.51 ± 2.67 / 40.41 ± 1.62 + -0.88 ± 1.13 / 24.79 ± 0.98 + 30.54 ± 3.75 / 29.88 ± 3.25 + 31.34 ± 4.72 / 30.46 ± 4.56 + 29.50 ± 4.18 / 49.19 ± 4.59 + 53.31 ± 0.98 / 7.37 ± 0.54 + -0.13 ± 1.28 / 37.46 ± 3.25 + 0.02 ± 1.75 / 39.49 ± 4.41 + 19.59 ± 5.61 / 34.02 ± 8.33 + 2.49 ± 1.09 / 25.63 ± 0.78 + 2.53 ± 1.16 / 26.30 ± 0.82 + 29.89 ± 7.13 / 27.65 ± 6.45 + 74.33 ± 1.07 / 73.73 ± 1.77 + 1.06 ± 1.79 / 43.95 ± 3.08 + 46.89 ± 2.72 / 52.70 ± 3.39 + 52.06 ± 2.11 / 12.53 ± 0.81 + 0.93 ± 1.44 / 26.10 ± 1.04 + 0.09 ± 1.37 / 24.84 ± 0.69 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + AI-Sweden-Models/gpt-sw3-1.3b (few-shot) 1445 @@ -9745,10 +9745,10 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 4096 True 1,382 ± 337 / 257 ± 91 - 3.74 + 3.75 3.52 4.19 - 3.52 + 3.53 21.98 ± 3.33 / 18.42 ± 2.62 37.77 ± 3.06 / 55.35 ± 4.51 1.26 ± 1.86 / 34.03 ± 0.86 @@ -9868,7 +9868,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 3.76 3.61 4.11 - 3.56 + 3.55 30.63 ± 2.60 / 20.55 ± 1.52 22.35 ± 2.26 / 44.97 ± 2.66 1.95 ± 1.48 / 46.94 ± 1.55 @@ -9918,66 +9918,6 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 13.2.0 13.2.0 - - NbAiLab/nb-gpt-j-6B-alpaca (few-shot) - 6055 - 50 - 1024 - False - 2,607 ± 592 / 680 ± 208 - 3.78 - 3.57 - 3.90 - 3.88 - 12.95 ± 3.80 / 11.68 ± 2.31 - 27.68 ± 3.64 / 46.61 ± 4.11 - 1.65 ± 1.96 / 47.94 ± 2.55 - 38.60 ± 0.65 / 47.40 ± 0.64 - 63.32 ± 0.32 / 16.03 ± 0.49 - 4.49 ± 1.44 / 27.32 ± 0.89 - 12.81 ± 4.57 / 37.83 ± 2.98 - -0.68 ± 1.31 / 25.00 ± 0.79 - 23.82 ± 4.25 / 22.08 ± 2.50 - 26.04 ± 6.38 / 24.47 ± 3.69 - 32.60 ± 1.84 / 47.47 ± 3.33 - 58.08 ± 0.42 / 11.74 ± 0.50 - 0.34 ± 1.43 / 44.47 ± 2.44 - 2.26 ± 2.27 / 45.41 ± 3.25 - 21.33 ± 0.98 / 42.76 ± 1.02 - 2.13 ± 1.32 / 26.30 ± 1.12 - 1.87 ± 1.34 / 25.87 ± 0.75 - 13.28 ± 4.32 / 13.40 ± 2.95 - 60.17 ± 8.39 / 65.99 ± 4.66 - 1.52 ± 1.94 / 45.19 ± 3.80 - 37.23 ± 1.07 / 46.83 ± 0.82 - 46.68 ± 0.33 / 12.40 ± 0.17 - -0.03 ± 1.31 / 23.73 ± 1.11 - 0.02 ± 0.88 / 25.04 ± 0.61 - 9.3.1 - 10.0.1 - 10.0.1 - 12.4.0 - 12.6.1 - 10.0.1 - 10.0.1 - 10.0.1 - 9.3.2 - 9.3.2 - 10.0.1 - 12.4.0 - 10.0.1 - 10.0.1 - 12.4.0 - 10.0.1 - 10.0.1 - 9.3.1 - 10.0.1 - 10.0.1 - 12.4.0 - 12.4.0 - 10.0.1 - 10.0.1 - state-spaces/mamba-2.8b-hf (few-shot) 2768 @@ -9986,7 +9926,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 True 2,722 ± 495 / 766 ± 250 3.78 - 3.68 + 3.69 4.06 3.59 17.58 ± 1.95 / 15.48 ± 1.39 @@ -10038,6 +9978,66 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 13.0.0 13.0.0 + + NbAiLab/nb-gpt-j-6B-alpaca (few-shot) + 6055 + 50 + 1024 + False + 2,607 ± 592 / 680 ± 208 + 3.79 + 3.57 + 3.91 + 3.88 + 12.95 ± 3.80 / 11.68 ± 2.31 + 27.68 ± 3.64 / 46.61 ± 4.11 + 1.65 ± 1.96 / 47.94 ± 2.55 + 38.60 ± 0.65 / 47.40 ± 0.64 + 63.32 ± 0.32 / 16.03 ± 0.49 + 4.49 ± 1.44 / 27.32 ± 0.89 + 12.81 ± 4.57 / 37.83 ± 2.98 + -0.68 ± 1.31 / 25.00 ± 0.79 + 23.82 ± 4.25 / 22.08 ± 2.50 + 26.04 ± 6.38 / 24.47 ± 3.69 + 32.60 ± 1.84 / 47.47 ± 3.33 + 58.08 ± 0.42 / 11.74 ± 0.50 + 0.34 ± 1.43 / 44.47 ± 2.44 + 2.26 ± 2.27 / 45.41 ± 3.25 + 21.33 ± 0.98 / 42.76 ± 1.02 + 2.13 ± 1.32 / 26.30 ± 1.12 + 1.87 ± 1.34 / 25.87 ± 0.75 + 13.28 ± 4.32 / 13.40 ± 2.95 + 60.17 ± 8.39 / 65.99 ± 4.66 + 1.52 ± 1.94 / 45.19 ± 3.80 + 37.23 ± 1.07 / 46.83 ± 0.82 + 46.68 ± 0.33 / 12.40 ± 0.17 + -0.03 ± 1.31 / 23.73 ± 1.11 + 0.02 ± 0.88 / 25.04 ± 0.61 + 9.3.1 + 10.0.1 + 10.0.1 + 12.4.0 + 12.6.1 + 10.0.1 + 10.0.1 + 10.0.1 + 9.3.2 + 9.3.2 + 10.0.1 + 12.4.0 + 10.0.1 + 10.0.1 + 12.4.0 + 10.0.1 + 10.0.1 + 9.3.1 + 10.0.1 + 10.0.1 + 12.4.0 + 12.4.0 + 10.0.1 + 10.0.1 + AI-Sweden-Models/gpt-sw3-356m (few-shot) 471 @@ -10045,9 +10045,9 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 2048 True 5,758 ± 1,348 / 1,215 ± 391 - 3.86 - 3.70 - 3.96 + 3.87 + 3.71 + 3.97 3.92 16.13 ± 4.02 / 14.90 ± 3.13 27.61 ± 2.14 / 39.77 ± 1.85 @@ -10105,9 +10105,9 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 4096 True 7,808 ± 2,183 / 1,289 ± 428 - 3.87 - 3.75 - 4.16 + 3.88 + 3.77 + 4.17 3.71 16.82 ± 3.53 / 14.28 ± 3.01 17.52 ± 2.86 / 27.96 ± 2.94 @@ -10226,9 +10226,9 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 True 6,513 ± 1,241 / 1,282 ± 644 3.94 - 3.86 + 3.88 4.21 - 3.75 + 3.74 15.93 ± 3.91 / 14.68 ± 2.81 13.01 ± 2.33 / 28.28 ± 4.63 0.05 ± 1.37 / 40.73 ± 3.78 @@ -10285,9 +10285,9 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 2048 True 8,597 ± 1,983 / 1,926 ± 600 - 3.96 + 3.95 3.73 - 4.19 + 4.18 3.95 14.13 ± 3.50 / 12.15 ± 3.14 26.31 ± 5.33 / 44.07 ± 6.36 @@ -10347,7 +10347,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 10,756 ± 3,589 / 1,157 ± 670 3.99 3.66 - 4.23 + 4.24 4.08 28.30 ± 2.45 / 22.93 ± 1.82 28.95 ± 4.05 / 48.32 ± 5.01 @@ -10398,6 +10398,66 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 14.0.4 14.0.4 + + tiiuae/Falcon3-1B-Instruct (few-shot) + 1669 + 131 + 8192 + True + 9,270 ± 2,690 / 1,434 ± 437 + 4.00 + 3.90 + 4.24 + 3.87 + 20.03 ± 1.60 / 17.52 ± 1.50 + 15.96 ± 2.77 / 38.74 ± 2.88 + 0.86 ± 1.76 / 42.48 ± 3.82 + 28.98 ± 1.93 / 35.38 ± 2.10 + 56.75 ± 0.67 / 10.37 ± 0.34 + 2.15 ± 1.69 / 25.19 ± 0.96 + 2.51 ± 2.17 / 35.61 ± 1.10 + 0.88 ± 0.95 / 25.68 ± 0.52 + 29.25 ± 2.15 / 28.34 ± 2.25 + 25.45 ± 1.96 / 24.62 ± 1.86 + 11.28 ± 0.71 / 22.46 ± 2.54 + 55.58 ± 1.17 / 8.04 ± 0.65 + 1.52 ± 1.26 / 40.82 ± 4.08 + 0.52 ± 1.60 / 41.23 ± 4.21 + 8.47 ± 0.94 / 20.43 ± 2.10 + 2.62 ± 1.32 / 26.23 ± 0.92 + -0.06 ± 0.97 / 24.96 ± 0.73 + 26.41 ± 2.74 / 25.18 ± 2.46 + 25.99 ± 3.51 / 35.36 ± 4.53 + 1.64 ± 1.90 / 37.52 ± 2.69 + 21.39 ± 1.66 / 26.70 ± 1.83 + 58.09 ± 0.53 / 13.29 ± 0.62 + 3.51 ± 1.35 / 27.14 ± 1.02 + 1.05 ± 1.05 / 25.62 ± 0.84 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + RuterNorway/Llama-2-7b-chat-norwegian (few-shot) unknown @@ -10406,8 +10466,8 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 False 10,890 ± 2,686 / 2,186 ± 750 4.01 - 4.07 - 4.19 + 4.06 + 4.20 3.78 10.12 ± 1.24 / 9.84 ± 1.12 10.65 ± 3.65 / 28.33 ± 5.27 @@ -10466,7 +10526,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 True 11,371 ± 2,924 / 2,122 ± 692 4.04 - 3.95 + 3.94 4.34 3.83 19.01 ± 1.91 / 17.08 ± 1.83 @@ -10585,10 +10645,10 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 2048 True 7,717 ± 1,553 / 2,013 ± 625 - 4.10 - 3.98 - 4.25 - 4.06 + 4.09 + 3.99 + 4.24 + 4.05 13.98 ± 1.54 / 13.46 ± 1.42 6.37 ± 3.38 / 25.43 ± 4.09 0.41 ± 0.80 / 33.31 ± 0.24 @@ -10646,7 +10706,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 True 2,519 ± 841 / 323 ± 104 4.14 - 3.69 + 3.71 4.32 4.40 16.17 ± 3.44 / 14.33 ± 1.92 @@ -10705,9 +10765,9 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 131072 True 3,424 ± 1,080 / 464 ± 158 - 4.17 - 4.01 - 4.52 + 4.16 + 4.00 + 4.51 3.97 9.20 ± 3.54 / 8.11 ± 3.24 32.94 ± 4.62 / 46.54 ± 5.68 @@ -10766,8 +10826,8 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 True 22,023 ± 6,203 / 3,675 ± 1,231 4.28 - 4.10 - 4.49 + 4.09 + 4.50 4.25 12.68 ± 1.39 / 12.32 ± 1.19 3.61 ± 2.69 / 19.01 ± 3.95 @@ -10819,62 +10879,62 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 13.1.0 - Qwen/Qwen1.5-0.5B-Chat (few-shot) - 620 - 152 - 32768 - False - 11,740 ± 3,000 / 2,209 ± 721 + allenai/OLMo-1B (few-shot) + 1177 + 50 + 2051 + True + 8,536 ± 1,926 / 1,940 ± 619 4.29 - 3.85 - 5.14 - 3.89 - 17.38 ± 2.04 / 15.74 ± 1.99 - 10.72 ± 3.35 / 25.21 ± 3.80 - 1.32 ± 1.08 / 42.05 ± 3.69 - 34.58 ± 0.97 / 40.37 ± 1.02 - 55.87 ± 5.22 / 11.87 ± 1.03 - 4.56 ± 1.88 / 25.87 ± 1.72 - 22.41 ± 3.16 / 42.73 ± 0.96 - 1.71 ± 0.99 / 25.84 ± 0.79 - 29.52 ± 1.48 / 29.79 ± 1.62 - 31.27 ± 1.30 / 31.91 ± 1.31 - 11.49 ± 1.38 / 27.12 ± 1.98 - 9.92 ± 8.37 / 1.42 ± 1.14 - 0.29 ± 1.58 / 40.21 ± 4.22 - -0.12 ± 1.48 / 39.92 ± 3.90 - 7.80 ± 1.19 / 17.09 ± 2.72 - 0.29 ± 1.08 / 24.63 ± 0.79 - 0.49 ± 1.19 / 24.95 ± 0.86 - 18.57 ± 4.62 / 17.69 ± 4.61 - 40.23 ± 5.86 / 49.01 ± 4.77 - 0.21 ± 1.06 / 39.60 ± 3.61 - 29.49 ± 2.47 / 35.01 ± 2.72 - 53.29 ± 6.52 / 13.04 ± 1.68 - 2.59 ± 0.72 / 26.87 ± 0.72 - -0.84 ± 1.01 / 24.44 ± 0.61 + 4.20 + 4.61 + 4.07 + 13.39 ± 2.60 / 12.39 ± 2.46 + 17.94 ± 5.58 / 32.80 ± 3.63 + -2.02 ± 2.28 / 40.63 ± 4.12 + 23.65 ± 2.96 / 26.24 ± 3.20 + 48.87 ± 1.42 / 5.39 ± 0.62 + -0.33 ± 0.61 / 23.91 ± 0.71 + 0.05 ± 2.96 / 34.67 ± 1.78 + -0.08 ± 0.81 / 25.21 ± 0.56 + 30.79 ± 1.95 / 32.18 ± 1.98 + 31.12 ± 2.36 / 33.10 ± 2.68 + 9.95 ± 3.92 / 29.01 ± 2.80 + 40.45 ± 0.43 / 4.00 ± 0.12 + -0.95 ± 1.87 / 39.37 ± 3.33 + -0.04 ± 1.73 / 42.36 ± 4.61 + 0.00 ± 0.00 / 3.06 ± 0.05 + 0.32 ± 1.03 / 24.22 ± 1.37 + 0.12 ± 0.91 / 24.92 ± 0.59 + 29.39 ± 3.08 / 29.93 ± 3.14 + 38.95 ± 11.78 / 43.61 ± 8.46 + -1.35 ± 1.76 / 40.70 ± 4.25 + 17.85 ± 3.77 / 20.30 ± 4.04 + 43.75 ± 0.28 / 4.67 ± 0.12 + -0.22 ± 0.80 / 23.76 ± 0.84 + 0.75 ± 1.00 / 25.27 ± 0.56 12.5.2 - 11.0.0 + 12.1.0 12.1.0 - 12.4.0 + 12.1.0 12.6.1 12.1.0 12.1.0 12.1.0 12.5.2 12.5.2 - 11.0.0 - 12.5.0 + 12.1.0 + 12.1.0 12.1.0 12.1.0 - 12.4.0 + 12.1.0 12.1.0 12.1.0 12.5.2 - 11.0.0 + 12.1.0 12.1.0 - 12.4.0 - 12.5.0 + 12.1.0 + 12.1.0 12.1.0 12.1.0 @@ -10939,62 +10999,62 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 14.0.4 - allenai/OLMo-1B (few-shot) - 1177 - 50 - 2051 - True - 8,536 ± 1,926 / 1,940 ± 619 + Qwen/Qwen1.5-0.5B-Chat (few-shot) + 620 + 152 + 32768 + False + 11,740 ± 3,000 / 2,209 ± 721 4.30 - 4.21 - 4.61 - 4.07 - 13.39 ± 2.60 / 12.39 ± 2.46 - 17.94 ± 5.58 / 32.80 ± 3.63 - -2.02 ± 2.28 / 40.63 ± 4.12 - 23.65 ± 2.96 / 26.24 ± 3.20 - 48.87 ± 1.42 / 5.39 ± 0.62 - -0.33 ± 0.61 / 23.91 ± 0.71 - 0.05 ± 2.96 / 34.67 ± 1.78 - -0.08 ± 0.81 / 25.21 ± 0.56 - 30.79 ± 1.95 / 32.18 ± 1.98 - 31.12 ± 2.36 / 33.10 ± 2.68 - 9.95 ± 3.92 / 29.01 ± 2.80 - 40.45 ± 0.43 / 4.00 ± 0.12 - -0.95 ± 1.87 / 39.37 ± 3.33 - -0.04 ± 1.73 / 42.36 ± 4.61 - 0.00 ± 0.00 / 3.06 ± 0.05 - 0.32 ± 1.03 / 24.22 ± 1.37 - 0.12 ± 0.91 / 24.92 ± 0.59 - 29.39 ± 3.08 / 29.93 ± 3.14 - 38.95 ± 11.78 / 43.61 ± 8.46 - -1.35 ± 1.76 / 40.70 ± 4.25 - 17.85 ± 3.77 / 20.30 ± 4.04 - 43.75 ± 0.28 / 4.67 ± 0.12 - -0.22 ± 0.80 / 23.76 ± 0.84 - 0.75 ± 1.00 / 25.27 ± 0.56 + 3.86 + 5.14 + 3.89 + 17.38 ± 2.04 / 15.74 ± 1.99 + 10.72 ± 3.35 / 25.21 ± 3.80 + 1.32 ± 1.08 / 42.05 ± 3.69 + 34.58 ± 0.97 / 40.37 ± 1.02 + 55.87 ± 5.22 / 11.87 ± 1.03 + 4.56 ± 1.88 / 25.87 ± 1.72 + 22.41 ± 3.16 / 42.73 ± 0.96 + 1.71 ± 0.99 / 25.84 ± 0.79 + 29.52 ± 1.48 / 29.79 ± 1.62 + 31.27 ± 1.30 / 31.91 ± 1.31 + 11.49 ± 1.38 / 27.12 ± 1.98 + 9.92 ± 8.37 / 1.42 ± 1.14 + 0.29 ± 1.58 / 40.21 ± 4.22 + -0.12 ± 1.48 / 39.92 ± 3.90 + 7.80 ± 1.19 / 17.09 ± 2.72 + 0.29 ± 1.08 / 24.63 ± 0.79 + 0.49 ± 1.19 / 24.95 ± 0.86 + 18.57 ± 4.62 / 17.69 ± 4.61 + 40.23 ± 5.86 / 49.01 ± 4.77 + 0.21 ± 1.06 / 39.60 ± 3.61 + 29.49 ± 2.47 / 35.01 ± 2.72 + 53.29 ± 6.52 / 13.04 ± 1.68 + 2.59 ± 0.72 / 26.87 ± 0.72 + -0.84 ± 1.01 / 24.44 ± 0.61 12.5.2 - 12.1.0 + 11.0.0 12.1.0 - 12.1.0 + 12.4.0 12.6.1 12.1.0 12.1.0 12.1.0 12.5.2 12.5.2 - 12.1.0 - 12.1.0 + 11.0.0 + 12.5.0 12.1.0 12.1.0 - 12.1.0 + 12.4.0 12.1.0 12.1.0 12.5.2 - 12.1.0 + 11.0.0 12.1.0 - 12.1.0 - 12.1.0 + 12.4.0 + 12.5.0 12.1.0 12.1.0 @@ -11006,7 +11066,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 True 3,024 ± 496 / 909 ± 301 4.35 - 4.22 + 4.20 4.53 4.31 12.82 ± 2.64 / 12.37 ± 1.95 @@ -11065,10 +11125,10 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 8192 True 21,777 ± 6,115 / 3,617 ± 1,211 - 4.37 - 4.17 - 4.50 - 4.43 + 4.38 + 4.20 + 4.51 + 4.42 8.97 ± 3.18 / 8.62 ± 2.72 2.66 ± 2.70 / 16.29 ± 2.34 1.65 ± 1.38 / 44.50 ± 3.21 @@ -11126,7 +11186,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 True 8,958 ± 1,815 / 2,240 ± 696 4.39 - 4.30 + 4.32 4.54 4.32 3.43 ± 2.66 / 5.56 ± 1.90 @@ -11186,7 +11246,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 True 26,346 ± 7,812 / 4,082 ± 1,372 4.39 - 4.44 + 4.43 4.40 4.33 13.72 ± 1.83 / 13.41 ± 1.52 @@ -11306,7 +11366,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 True 25,602 ± 7,583 / 3,953 ± 1,325 4.40 - 4.21 + 4.23 4.48 4.50 12.11 ± 1.07 / 11.48 ± 1.07 @@ -11365,10 +11425,10 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 512 True 1,373 ± 120 / 709 ± 172 - 4.43 + 4.42 4.37 4.58 - 4.33 + 4.32 0.00 ± 0.00 / 0.00 ± 0.00 4.81 ± 2.69 / 19.31 ± 1.01 0.00 ± 0.00 / 33.25 ± 0.23 @@ -11425,10 +11485,10 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 2048 True 2,331 ± 787 / 301 ± 97 - 4.46 - 4.22 + 4.45 + 4.21 4.63 - 4.53 + 4.52 10.59 ± 2.24 / 10.29 ± 1.37 13.31 ± 3.23 / 34.38 ± 3.13 0.52 ± 0.78 / 33.76 ± 0.37 @@ -11546,7 +11606,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 True 10,242 ± 3,432 / 1,335 ± 484 4.48 - 4.32 + 4.33 4.55 4.57 13.84 ± 1.95 / 13.12 ± 1.60 @@ -11848,7 +11908,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 4.96 5.03 4.97 - 4.89 + 4.88 0.00 ± 0.00 / 0.00 ± 0.00 0.00 ± 0.00 / 18.12 ± 0.19 0.00 ± 0.00 / 33.25 ± 0.23 @@ -11906,7 +11966,7 @@ title: Mainland Scandinavian NLG 🇩🇰🇳🇴🇸🇪 True 11,734 ± 3,124 / 2,174 ± 720 4.98 - 4.96 + 4.95 5.07 4.91 0.65 ± 0.68 / 0.59 ± 0.63 diff --git a/mainland-scandinavian-nlu.csv b/mainland-scandinavian-nlu.csv index 0fb29842..8f843cd6 100644 --- a/mainland-scandinavian-nlu.csv +++ b/mainland-scandinavian-nlu.csv @@ -12,7 +12,7 @@ KennethEnevoldsen/dfm-sentence-encoder-large-2,355,50,512,True,False,6569,1.54,1 "claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,1.56,1.38,1.87,1.44,65.88,63.61,71.03,46.24,74.23,70.5,50.92,76.1,72.03,40.57,70.22,77.7,74.34,49.32 google/rembert,576,250,256,True,False,11736,1.56,1.66,1.54,1.47,70.19,50.19,69.72,39.85,88.7,86.11,54.19,69.83,54.84,58.18,78.23,75.99,72.17,46.0 meta-llama/Llama-3.1-405B-Instruct-FP8 (few-shot),405869,128,131072,True,False,799,1.57,1.29,2.11,1.31,71.94,61.26,64.94,56.0,82.44,82.17,40.55,63.91,45.93,45.33,76.27,80.7,68.85,56.41 -intfloat/multilingual-e5-large,560,250,512,True,False,6732,1.6,1.6,1.72,1.47,69.5,55.07,57.67,46.71,89.86,84.32,61.52,62.34,34.88,53.01,80.36,79.65,63.15,46.99 +intfloat/multilingual-e5-large,560,250,512,True,False,6732,1.59,1.6,1.72,1.46,69.5,55.07,57.67,46.71,89.86,84.32,61.52,62.34,34.88,53.01,80.36,79.65,63.15,46.99 google/gemma-2-27b-it (few-shot),27227,256,8193,True,False,1516,1.61,1.52,1.81,1.51,59.94,59.06,58.57,57.48,67.35,66.61,67.14,64.66,52.49,44.85,62.59,80.73,61.37,58.76 NbAiLab/nb-roberta-base-scandi,278,250,512,True,False,15079,1.63,1.73,1.56,1.61,73.28,52.08,67.99,32.39,92.24,87.58,59.98,70.18,70.81,44.27,80.02,76.21,71.92,33.8 microsoft/mdeberta-v3-base,279,251,512,True,False,20637,1.63,1.74,1.66,1.48,72.9,43.38,67.05,42.15,91.9,86.81,53.69,70.55,61.21,48.82,78.84,75.24,72.3,44.74 @@ -21,18 +21,18 @@ meta-llama/Llama-3.1-70B-Instruct (few-shot),70554,128,131072,True,False,1409,1. NbAiLab/nb-roberta-base-scandi-1e4,278,250,512,True,False,15074,1.67,1.83,1.53,1.64,72.16,51.7,62.03,29.95,92.09,86.85,59.84,73.33,71.06,43.67,79.9,76.2,73.62,32.38 FacebookAI/xlm-roberta-large,560,250,512,True,False,17897,1.7,1.72,1.75,1.62,72.74,48.33,57.3,43.57,91.66,86.19,50.25,55.51,43.89,57.57,80.33,76.63,49.72,46.64 "gpt-3.5-turbo-0613 (few-shot, val)",-1,100,4094,True,False,921,1.7,1.59,2.02,1.5,61.31,52.52,57.63,57.03,77.7,73.92,58.88,54.29,32.82,45.35,73.04,72.77,58.06,58.02 +sentence-transformers/use-cmlm-multilingual,471,501,512,True,False,30231,1.7,1.86,1.68,1.57,69.17,48.03,55.31,42.34,90.08,86.04,56.35,59.38,46.54,55.05,80.05,75.09,61.83,45.69 KennethEnevoldsen/dfm-sentence-encoder-medium-3,178,120,512,True,False,14050,1.71,1.73,1.69,1.72,71.21,47.55,68.72,38.33,91.17,87.3,59.1,74.32,72.94,34.06,81.35,71.16,63.89,37.18 -sentence-transformers/use-cmlm-multilingual,471,501,512,True,False,30231,1.71,1.86,1.68,1.58,69.17,48.03,55.31,42.34,90.08,86.04,56.35,59.38,46.54,55.05,80.05,75.09,61.83,45.69 vesteinn/ScandiBERT-no-faroese,124,50,512,True,False,15436,1.72,1.85,1.69,1.61,69.79,47.73,68.28,31.9,91.09,85.72,50.9,69.34,66.24,48.45,79.08,72.53,73.01,36.92 -NbAiLab/nb-bert-base,178,120,512,True,False,14050,1.74,1.81,1.67,1.73,70.36,46.32,66.41,36.42,93.01,88.43,60.84,73.89,72.1,33.01,80.38,71.21,64.03,35.33 +NbAiLab/nb-bert-base,178,120,512,True,False,14050,1.73,1.8,1.67,1.73,70.36,46.32,66.41,36.42,93.01,88.43,60.84,73.89,72.1,33.01,80.38,71.21,64.03,35.33 meta-llama/Llama-3.3-70B-Instruct (few-shot),70554,128,131072,True,False,1353,1.74,1.53,2.22,1.47,67.07,59.89,56.56,49.02,79.84,79.93,41.11,57.84,43.52,40.92,71.98,81.15,64.46,51.22 +NbAiLab/nb-sbert-base,178,120,512,True,False,17757,1.76,1.82,1.72,1.75,70.12,47.83,63.25,36.51,90.96,87.34,60.57,72.11,70.2,29.94,80.26,71.05,62.49,33.8 "gpt-4o-2024-05-13 (zero-shot, val)",-1,200,8191,True,False,637,1.76,1.61,2.2,1.48,64.8,53.07,64.18,49.02,77.72,71.7,36.27,71.7,58.79,40.95,75.06,74.85,65.23,53.02 vesteinn/FoBERT,124,50,512,True,False,15623,1.76,1.83,1.73,1.71,69.65,49.18,65.45,32.4,90.65,84.88,52.44,68.77,65.4,43.13,78.58,73.41,71.14,31.62 -NbAiLab/nb-sbert-base,178,120,512,True,False,17757,1.77,1.82,1.72,1.76,70.12,47.83,63.25,36.51,90.96,87.34,60.57,72.11,70.2,29.94,80.26,71.05,62.49,33.8 ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot),28411,256,4096,True,False,3633,1.77,1.48,2.23,1.6,66.5,58.93,57.27,55.02,76.25,77.91,40.54,59.75,47.82,40.99,62.91,79.51,60.28,55.44 pere/roberta-debug-8,278,250,512,True,False,15103,1.77,1.83,1.72,1.75,71.34,49.77,64.31,31.86,91.16,84.75,55.25,68.03,66.9,41.65,74.48,74.58,69.07,31.66 +Qwen/Qwen2.5-72B-Instruct (few-shot),72706,152,32768,True,False,1219,1.8,1.53,2.3,1.58,63.81,54.5,57.19,55.77,72.21,70.24,39.85,63.14,43.24,43.41,62.12,79.89,61.71,54.99 pere/roberta-base-exp-8,278,250,512,True,False,15112,1.8,1.87,1.63,1.89,68.77,49.66,60.13,32.6,88.99,82.99,57.37,69.92,70.05,41.98,73.44,73.63,58.91,32.39 -Qwen/Qwen2.5-72B-Instruct (few-shot),72706,152,32768,True,False,1219,1.81,1.53,2.31,1.58,63.81,54.5,57.19,55.77,72.21,70.24,39.85,63.14,43.24,43.41,62.12,79.89,61.71,54.99 setu4993/LaBSE,471,501,512,True,False,25418,1.81,1.87,1.86,1.7,71.24,46.5,52.92,40.08,90.58,85.21,54.26,59.44,49.3,46.42,77.78,73.58,60.36,41.71 Qwen/QwQ-32B-Preview (few-shot),32764,152,32768,True,False,2258,1.82,1.63,2.31,1.52,64.66,53.42,53.93,55.55,75.68,75.89,38.41,56.42,39.34,44.35,69.73,78.76,57.57,56.43 intfloat/multilingual-e5-large-instruct,560,250,514,True,False,5947,1.85,1.9,2.04,1.6,69.86,55.45,31.14,45.51,89.27,83.78,63.35,55.71,12.32,38.74,79.5,79.48,53.01,45.68 @@ -43,7 +43,7 @@ pere/roberta-base-exp-32,278,250,512,True,False,15081,1.86,2.0,1.71,1.86,71.9,51 "152334H/miqu-1-70b-sf (few-shot, val)",68977,32,32764,True,False,2126,1.89,1.76,2.23,1.68,56.96,55.11,42.64,54.58,66.75,66.81,60.58,47.53,17.14,41.92,62.96,75.25,53.28,56.42 AI-Sweden-Models/bert-large-nordic-pile-1M-steps,369,64,512,True,False,6571,1.95,2.14,2.24,1.46,67.4,41.53,41.62,37.3,87.5,80.57,47.11,52.62,25.06,38.4,80.65,77.43,76.56,41.54 intfloat/multilingual-e5-base,278,250,512,True,False,14965,1.98,1.94,2.27,1.74,68.7,49.88,44.2,39.9,88.26,81.37,54.61,50.35,22.15,31.77,79.02,76.06,50.19,40.65 -"meta-llama/Meta-Llama-3-70B-Instruct (few-shot, val)",70554,128,8192,True,False,1673,1.98,1.84,2.09,2.02,63.1,53.09,40.98,51.13,80.5,76.47,59.29,47.28,32.76,39.71,77.06,53.56,47.5,46.86 +"meta-llama/Meta-Llama-3-70B-Instruct (few-shot, val)",70554,128,8192,True,False,1673,1.98,1.84,2.09,2.01,63.1,53.09,40.98,51.13,80.5,76.47,59.29,47.28,32.76,39.71,77.06,53.56,47.5,46.86 nvidia/Llama-3.1-Nemotron-70B-Instruct-HF (few-shot),70554,128,131072,True,False,1208,1.98,1.8,2.39,1.75,58.34,59.14,56.46,39.77,69.21,70.45,39.87,57.8,40.31,40.97,58.65,81.81,63.69,42.29 mistralai/Mistral-Nemo-Instruct-2407 (few-shot),12248,131,1024001,True,False,7095,2.0,2.05,2.09,1.87,53.95,48.97,31.78,56.44,70.14,68.74,60.64,35.59,29.22,49.87,62.86,70.54,37.5,58.0 pere/roberta-base-exp-32B,278,250,512,True,False,15103,2.0,2.01,2.03,1.96,71.81,47.83,54.99,29.92,90.6,86.76,52.19,54.98,58.33,29.17,77.97,73.27,47.19,31.07 @@ -53,15 +53,15 @@ ltg/norbert3-small,41,50,508,True,False,13515,2.04,2.17,1.74,2.22,67.89,39.34,50 "gpt-4-1106-preview (zero-shot, val)",-1,100,8191,True,False,436,2.09,1.93,2.54,1.79,35.79,53.69,62.98,51.96,60.16,48.74,39.62,71.38,42.94,36.04,51.31,73.54,66.39,52.22 CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,2.12,1.89,2.66,1.8,63.63,50.82,35.58,54.33,73.2,72.26,35.8,36.86,23.4,40.32,65.01,77.68,34.06,56.78 four-two-labs/orpo-llama-3-swe (few-shot),8030,128,8192,False,False,4974,2.12,2.11,2.4,1.85,46.75,51.73,24.73,59.97,61.63,61.3,48.85,24.15,21.33,53.66,60.93,79.74,26.02,59.84 +"gpt-4o-mini-2024-07-18 (zero-shot, val)",-1,200,8191,True,False,908,2.12,1.79,2.62,1.96,57.52,49.73,57.56,51.79,60.43,55.59,39.82,54.84,33.8,36.55,52.47,73.55,52.27,48.95 KBLab/megatron-bert-large-swedish-cased-165k,370,64,512,True,False,7138,2.13,2.44,2.56,1.38,58.5,41.02,27.1,39.99,85.99,79.47,39.53,27.39,23.56,39.01,81.05,78.0,76.79,45.71 -"gpt-4o-mini-2024-07-18 (zero-shot, val)",-1,200,8191,True,False,908,2.13,1.79,2.62,1.97,57.52,49.73,57.56,51.79,60.43,55.59,39.82,54.84,33.8,36.55,52.47,73.55,52.27,48.95 AI-Nordics/bert-large-swedish-cased,335,31,512,True,False,7199,2.14,2.42,2.53,1.48,60.66,38.46,32.29,37.68,83.32,77.97,38.44,37.54,23.1,39.97,78.61,77.47,72.87,43.11 -"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,2.14,2.01,2.29,2.13,59.96,56.91,67.13,17.52,72.74,69.17,67.45,74.27,54.83,3.67,62.45,77.69,68.93,12.11 -timpal0l/sol (few-shot),10732,32,4096,False,False,3701,2.15,2.2,2.29,1.95,54.91,44.38,21.11,58.96,65.14,65.88,57.06,26.41,19.58,51.6,57.51,77.31,25.06,60.16 -skole-gpt-mixtral (few-shot),-1,32,32768,False,False,3583,2.16,2.03,2.53,1.93,49.17,51.51,32.04,58.52,62.52,61.55,52.09,21.99,16.84,47.3,54.14,78.27,32.49,58.95 +"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,2.14,2.01,2.28,2.13,59.96,56.91,67.13,17.52,72.74,69.17,67.45,74.27,54.83,3.67,62.45,77.69,68.93,12.11 +timpal0l/sol (few-shot),10732,32,4096,False,False,3701,2.15,2.2,2.3,1.95,54.91,44.38,21.11,58.96,65.14,65.88,57.06,26.41,19.58,51.6,57.51,77.31,25.06,60.16 +skole-gpt-mixtral (few-shot),-1,32,32768,False,False,3583,2.16,2.02,2.53,1.93,49.17,51.51,32.04,58.52,62.52,61.55,52.09,21.99,16.84,47.3,54.14,78.27,32.49,58.95 cardiffnlp/twitter-xlm-roberta-base,278,250,512,True,False,34475,2.17,2.21,2.35,1.94,70.1,45.3,51.74,22.01,87.7,81.41,48.34,55.3,37.46,24.49,72.49,70.69,56.6,31.89 "timpal0l/dolphin-2.9-llama3-8b-flashback (few-shot, val)",8030,128,8192,False,True,5018,2.17,2.12,2.51,1.88,51.37,52.17,27.98,51.65,64.51,65.66,52.9,29.34,17.42,38.49,65.33,74.99,32.65,55.71 -upstage/SOLAR-10.7B-v1.0 (few-shot),10732,32,4096,True,False,3780,2.18,2.15,2.38,2.0,58.03,46.63,15.09,62.15,68.11,68.19,55.33,10.15,7.51,55.33,59.65,77.48,16.94,62.65 +upstage/SOLAR-10.7B-v1.0 (few-shot),10732,32,4096,True,False,3780,2.17,2.14,2.38,2.0,58.03,46.63,15.09,62.15,68.11,68.19,55.33,10.15,7.51,55.33,59.65,77.48,16.94,62.65 KBLab/megatron-bert-large-swedish-cased-110k,370,64,512,True,False,7075,2.2,2.47,2.72,1.4,60.18,39.2,26.68,39.34,84.03,77.98,39.15,21.39,17.1,35.32,80.39,78.45,76.28,44.56 NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,2.2,2.04,2.39,2.17,51.95,52.11,44.47,43.32,58.53,60.26,59.48,51.85,41.89,25.62,57.01,80.12,43.04,30.44 "RJuro/munin-neuralbeagle-7b (few-shot, val)",7242,32,32768,False,True,2493,2.2,2.06,2.54,2.01,51.44,54.91,22.77,56.51,61.18,65.16,55.61,20.84,9.12,42.92,62.96,77.13,15.73,58.43 @@ -69,30 +69,29 @@ mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,2.2,1.99,2 google-bert/bert-base-multilingual-uncased,167,106,512,True,False,13993,2.24,2.32,2.31,2.08,64.92,33.5,46.75,37.09,82.9,77.33,37.28,49.41,43.58,40.35,70.85,63.3,48.97,38.0 nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,2.24,2.14,2.55,2.02,49.85,49.52,32.35,52.54,64.15,62.16,55.29,32.3,22.82,32.62,58.75,79.59,33.09,47.28 "timpal0l/BeagleCatMunin (few-shot, val)",7242,32,32768,False,True,2495,2.24,2.11,2.64,1.98,47.62,54.73,21.8,57.26,54.04,62.21,54.74,14.51,5.38,42.83,50.53,77.37,27.84,59.98 -AI-Sweden-Models/Llama-3-8B-instruct (few-shot),8030,128,8192,False,False,1472,2.26,2.06,3.08,1.65,58.57,51.63,28.82,51.98,81.96,78.42,35.3,12.11,5.94,22.15,84.77,80.1,29.17,54.68 +AI-Sweden-Models/Llama-3-8B-instruct (few-shot),8030,128,8192,False,False,1472,2.26,2.06,3.07,1.65,58.57,51.63,28.82,51.98,81.96,78.42,35.3,12.11,5.94,22.15,84.77,80.1,29.17,54.68 "merge-crew/da-sv-dare-ties-density-0.9 (few-shot, val)",7242,32,32768,True,True,2443,2.27,2.2,2.62,1.99,45.61,53.73,17.08,56.67,48.24,61.5,49.4,24.12,13.2,47.93,46.61,76.38,34.16,58.77 "merge-crew/da-sv-slerp (few-shot, val)",7242,32,32768,True,True,2467,2.27,2.1,2.75,1.96,45.94,51.75,28.04,57.65,49.67,61.11,56.07,3.81,-1.29,44.98,46.57,76.53,33.43,59.87 "merge-crew/da-sv-task-arithmetic (few-shot, val)",7242,32,32768,True,True,2500,2.27,2.1,2.75,1.96,46.06,51.51,27.68,57.78,49.69,61.78,55.87,2.99,-1.29,44.62,47.28,76.62,33.23,60.0 KB/bert-base-swedish-cased,125,50,512,True,False,16181,2.28,2.63,2.75,1.46,61.74,33.28,33.15,28.67,85.91,79.67,38.7,39.13,24.13,19.04,81.95,75.58,78.86,38.56 google-bert/bert-base-multilingual-cased,178,120,512,True,False,14083,2.28,2.49,2.34,2.0,63.17,32.38,27.93,39.57,88.72,83.08,35.87,44.22,39.55,40.55,76.29,61.78,47.74,41.17 -"meta-llama/Llama-2-70b-chat-hf (few-shot, val)",68977,32,4096,True,False,1979,2.28,2.13,2.6,2.12,52.22,50.66,23.57,53.82,60.21,62.99,55.12,27.12,6.82,38.5,55.91,64.52,23.85,58.88 +"meta-llama/Llama-2-70b-chat-hf (few-shot, val)",68977,32,4096,True,False,1979,2.28,2.13,2.59,2.12,52.22,50.66,23.57,53.82,60.21,62.99,55.12,27.12,6.82,38.5,55.91,64.52,23.85,58.88 microsoft/infoxlm-large,560,250,512,True,False,6696,2.28,2.34,2.53,1.96,74.42,37.94,15.26,44.25,91.9,86.59,30.56,9.79,6.36,60.47,79.53,75.42,18.44,48.19 Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,2.29,2.31,2.38,2.18,51.94,51.97,29.99,38.99,66.22,64.14,55.48,26.13,17.32,49.75,56.28,77.51,23.25,47.09 mhenrichsen/danskgpt-chat-v2.1 (few-shot),-1,32,32768,True,False,5085,2.29,2.01,2.71,2.15,51.08,54.69,30.95,56.56,62.43,60.68,53.41,-1.16,0.3,49.15,54.37,75.98,17.98,55.07 KBLab/bert-base-swedish-cased,125,50,512,True,False,16164,2.3,2.63,2.8,1.48,61.74,33.31,33.35,28.67,85.33,79.44,38.17,39.49,22.17,19.04,81.23,75.73,78.6,38.56 KBLab/megatron-bert-base-swedish-cased-600k,135,64,512,True,False,15726,2.3,2.64,2.71,1.56,57.97,39.4,23.5,31.87,82.2,76.64,40.2,24.45,19.18,30.69,78.91,76.09,70.08,41.14 -meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1005,2.3,2.03,2.8,2.06,54.7,54.81,32.11,48.87,64.55,66.44,35.17,27.41,15.6,43.11,55.8,79.23,32.67,46.88 +"birgermoell/BeagleCatMunin-Flashback-Bellman (few-shot, val)",7242,32,32768,False,True,2890,2.3,2.1,2.73,2.08,50.4,52.3,21.3,58.17,53.96,63.45,52.7,14.87,2.48,41.43,52.96,76.99,14.27,59.92 +meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1473,2.3,2.03,2.8,2.06,54.7,54.81,32.11,48.87,64.55,66.44,35.17,27.41,15.6,43.11,55.8,79.23,32.67,46.88 timpal0l/Llama-3-8B-flashback-v1 (few-shot),8030,128,8192,True,False,3004,2.3,2.29,2.67,1.93,46.59,50.25,14.46,56.86,59.09,60.02,47.58,10.52,6.67,49.89,57.01,81.97,31.16,53.99 Geotrend/bert-base-25lang-cased,151,85,512,True,False,13908,2.31,2.52,2.34,2.06,62.53,32.88,29.01,39.51,87.99,83.1,36.21,46.43,39.82,40.01,75.62,62.5,38.18,40.96 -Geotrend/bert-base-en-fr-de-no-da-cased,118,42,512,True,False,13973,2.31,2.34,2.47,2.12,63.38,34.78,41.08,40.32,88.05,83.08,35.34,31.45,36.12,41.59,76.55,61.6,37.44,39.32 +Geotrend/bert-base-en-fr-de-no-da-cased,118,42,512,True,False,13973,2.31,2.34,2.46,2.12,63.38,34.78,41.08,40.32,88.05,83.08,35.34,31.45,36.12,41.59,76.55,61.6,37.44,39.32 Geotrend/bert-base-en-no-cased,111,33,512,True,False,14081,2.31,2.37,2.44,2.12,62.66,33.91,40.96,39.93,89.07,82.69,34.97,39.58,31.27,41.89,75.33,61.8,36.62,39.95 -"birgermoell/BeagleCatMunin-Flashback-Bellman (few-shot, val)",7242,32,32768,False,True,2890,2.31,2.11,2.73,2.08,50.4,52.3,21.3,58.17,53.96,63.45,52.7,14.87,2.48,41.43,52.96,76.99,14.27,59.92 "timpal0l/BeagleCatMunin2 (few-shot, val)",7242,32,32768,False,True,2477,2.31,2.25,2.56,2.13,51.53,47.95,14.1,58.28,61.17,65.44,58.69,15.03,5.95,42.42,60.87,73.72,6.78,58.75 ZurichNLP/unsup-simcse-xlm-roberta-base,278,250,512,True,False,34520,2.32,2.44,2.48,2.05,65.1,45.07,26.83,29.92,86.56,80.57,49.62,38.45,11.38,31.5,75.49,71.12,36.69,33.55 "birgermoell/Rapid-Cycling (few-shot, val)",7242,32,32768,False,True,2346,2.33,2.15,2.78,2.07,49.99,51.25,20.66,56.82,55.93,63.85,50.41,15.74,2.23,39.81,53.66,77.72,16.22,59.75 facebook/xlm-v-base,778,902,512,True,False,25396,2.33,2.22,2.78,2.0,71.42,31.86,52.95,34.66,89.99,78.6,17.93,43.46,10.97,43.74,68.39,73.43,45.09,38.04 -meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1335,2.34,2.26,2.72,2.05,49.46,51.16,23.01,49.75,61.48,61.58,32.94,21.2,19.65,53.35,59.92,80.91,26.39,47.69 -mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot),341029,256,4096,True,False,1904,2.34,2.25,2.61,2.16,48.24,39.52,62.92,36.92,56.41,55.6,25.18,62.56,53.09,42.57,48.92,62.08,68.93,36.4 +mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot),341029,256,4096,True,False,1904,2.33,2.24,2.6,2.15,48.24,39.52,62.92,36.92,56.41,55.6,25.18,62.56,53.09,42.57,48.92,62.08,68.93,36.4 NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,2.35,2.63,2.57,1.84,49.18,49.76,41.28,12.83,58.05,59.65,57.94,51.36,42.84,14.72,57.66,80.04,45.21,52.73 jonfd/electra-small-nordic,22,96,128,True,False,5989,2.35,2.5,2.36,2.19,65.4,34.43,67.27,6.6,84.95,79.57,40.15,72.87,63.77,14.16,71.07,66.42,69.19,11.85 microsoft/xlm-align-base,278,250,512,True,False,14744,2.35,2.48,2.29,2.27,70.36,47.83,11.87,29.87,90.07,85.65,54.46,12.16,8.99,49.24,78.6,73.67,15.41,32.41 @@ -101,28 +100,29 @@ Geotrend/bert-base-en-da-cased,111,33,512,True,False,14062,2.36,2.45,2.51,2.12,6 "birgermoell/Flashback-Bellman (few-shot, val)",7242,32,32768,False,True,2887,2.36,2.29,2.75,2.03,47.71,48.21,19.55,56.46,56.44,66.56,53.24,11.96,2.5,39.21,55.29,78.29,18.45,58.42 "birgermoell/Munin-NeuralBeagle-NorskGPT (few-shot, val)",7242,32,32768,False,True,2903,2.36,2.48,2.45,2.15,51.85,44.02,1.22,57.69,63.33,68.84,58.28,18.65,10.72,44.39,63.85,73.72,-0.56,60.1 "birgermoell/WestLake-Munin-Cat-NorskGPT (few-shot, val)",7242,32,32768,False,True,2856,2.36,2.48,2.45,2.15,51.85,44.02,1.22,57.69,63.33,68.84,58.28,18.65,10.72,44.39,63.85,73.72,-0.56,60.1 -meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1007,2.36,2.22,2.7,2.16,57.74,48.43,27.12,46.76,74.47,72.93,34.44,27.77,20.35,42.9,69.67,59.93,27.63,49.84 +meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1483,2.36,2.22,2.71,2.16,57.74,48.43,27.12,46.76,66.56,68.29,34.47,28.22,18.21,47.34,69.67,59.93,27.63,49.84 "AI-Sweden-Models/tyr (few-shot, val)",7242,32,32768,False,True,6079,2.37,2.28,2.8,2.03,47.01,50.6,13.73,56.35,58.6,63.15,51.85,0.66,0.53,43.22,56.21,78.3,14.35,61.08 "merge-crew/da-sv-ties (few-shot, val)",7242,32,32768,True,True,2457,2.37,2.28,2.74,2.1,45.39,51.95,13.25,58.51,47.61,60.57,44.46,23.99,11.6,47.02,48.36,76.57,20.94,59.07 timpal0l/njord-alpha (few-shot),7242,32,32768,True,False,5431,2.37,2.39,2.72,2.0,38.25,39.37,29.76,57.02,50.47,51.97,48.03,22.65,17.1,44.72,48.19,79.95,32.85,57.39 -utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.37,2.22,2.77,2.13,34.0,53.97,32.21,57.1,40.91,42.91,52.62,9.7,11.98,47.36,40.59,76.02,33.98,56.98 +utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.37,2.21,2.77,2.13,34.0,53.97,32.21,57.1,40.91,42.91,52.62,9.7,11.98,47.36,40.59,76.02,33.98,56.98 CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,2.38,2.22,2.79,2.14,51.32,52.0,18.48,52.43,66.55,63.63,38.61,15.8,12.3,43.26,57.38,78.43,14.52,53.14 ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4097,True,False,2515,2.38,2.31,2.68,2.16,44.58,47.16,19.2,58.41,49.94,52.17,53.27,17.22,12.01,45.04,44.8,75.92,24.84,56.71 "merge-crew/da-sv-dare-ties-density-0.6 (few-shot, val)",7242,32,32768,True,True,2515,2.39,2.33,2.7,2.13,46.03,49.59,12.72,57.03,47.26,59.35,54.93,9.0,5.26,45.95,45.12,78.74,19.74,60.15 mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot),46703,32,32768,True,False,5535,2.39,2.15,2.85,2.18,51.2,50.95,33.44,46.85,55.02,57.37,36.76,30.73,18.96,41.01,46.15,80.33,32.89,46.51 timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,2.39,2.37,2.81,1.99,42.43,47.82,16.51,56.95,48.97,51.52,49.05,14.37,9.96,44.07,44.14,80.14,34.23,57.07 +Geotrend/bert-base-da-cased,104,23,512,True,False,15432,2.41,2.51,2.56,2.17,62.76,32.06,30.95,37.79,87.52,82.66,32.73,36.41,30.37,37.71,74.13,62.18,36.93,37.59 "RJuro/munin-neuralbeagle-SkoleGPTOpenOrca-7b (few-shot, val)",7242,32,32768,False,True,3008,2.41,2.31,2.85,2.06,50.83,43.41,19.72,57.87,53.68,61.92,47.78,0.91,1.24,47.76,59.36,72.04,22.38,57.96 meta-llama/Llama-3.1-8B (few-shot),8030,128,131072,True,False,2986,2.41,2.27,2.93,2.04,50.92,47.86,29.19,48.38,65.17,60.22,34.02,32.48,18.38,33.06,62.19,80.31,30.29,42.78 -Geotrend/bert-base-da-cased,104,23,512,True,False,15432,2.42,2.51,2.57,2.17,62.76,32.06,30.95,37.79,87.52,82.66,32.73,36.41,30.37,37.71,74.13,62.18,36.93,37.59 "KennethEnevoldsen/munin_mistral-7b (few-shot, val)",7242,32,32768,False,True,2543,2.42,2.38,2.67,2.21,46.7,47.52,8.04,60.05,51.82,62.55,56.37,6.04,-0.02,48.85,52.34,77.66,6.0,60.16 mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,2.43,2.27,2.84,2.18,55.49,49.18,7.4,57.72,67.24,66.08,31.41,28.72,20.55,40.6,54.76,73.32,16.17,57.94 ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,2.44,2.37,2.73,2.21,44.92,49.31,10.14,57.34,53.79,56.13,51.36,6.83,8.09,48.01,44.94,76.78,16.96,56.83 +meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1477,2.44,2.26,3.01,2.05,49.46,51.16,23.01,49.75,62.89,56.18,33.07,30.73,20.57,30.77,59.92,80.91,26.39,47.69 senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.44,2.31,2.68,2.32,52.61,49.81,19.64,48.03,64.37,62.77,50.6,18.09,12.25,38.34,58.9,67.74,16.52,49.41 AI-Sweden-Models/Llama-3-8B (few-shot),8030,128,8192,True,False,4141,2.45,2.38,2.8,2.16,36.72,46.48,26.1,58.0,44.53,47.02,41.84,19.97,15.61,50.91,36.45,81.12,26.8,58.16 Mabeck/Heidrun-Mistral-7B-chat (few-shot),7242,32,4096,False,False,1419,2.45,2.63,2.63,2.08,51.93,49.86,20.25,25.84,61.41,59.49,49.19,15.17,10.78,48.99,55.06,77.5,17.47,58.67 danish-foundation-models/munin-7b-v0.1dev0 (few-shot),7242,32,8192,True,False,6113,2.45,2.48,2.79,2.09,39.12,36.47,26.76,58.75,50.43,54.2,39.21,20.51,11.66,51.57,47.1,73.05,30.29,57.39 -microsoft/infoxlm-base,278,250,512,True,False,34735,2.46,2.53,2.51,2.33,69.78,46.78,11.27,28.28,90.14,84.12,44.42,11.2,7.12,47.69,79.43,71.48,7.26,33.72 -google/gemma-7b (few-shot),8538,256,8192,True,False,1378,2.47,2.46,2.94,2.0,19.59,46.55,32.64,59.4,26.43,32.66,41.82,25.82,20.16,52.68,43.68,77.72,36.25,58.62 +microsoft/infoxlm-base,278,250,512,True,False,34735,2.45,2.53,2.5,2.33,69.78,46.78,11.27,28.28,90.14,84.12,44.42,11.2,7.12,47.69,79.43,71.48,7.26,33.72 +google/gemma-7b (few-shot),8538,256,8192,True,False,1378,2.46,2.46,2.92,2.0,19.59,46.55,32.64,59.4,26.43,32.66,41.82,25.82,20.16,52.68,43.68,77.72,36.25,58.62 KBLab/megatron-bert-base-swedish-cased-125k,135,64,512,True,False,15763,2.48,2.81,3.01,1.61,53.93,36.31,23.46,27.85,77.98,75.0,33.88,24.23,18.18,20.56,79.29,75.85,70.43,37.56 Mabeck/Heidrun-Mistral-7B-base (few-shot),7242,32,32768,True,False,3823,2.48,2.46,2.87,2.12,40.14,39.38,21.85,58.07,50.1,54.81,48.64,10.31,1.11,42.2,48.43,79.43,17.37,57.05 alpindale/Mistral-7B-v0.2-hf (few-shot),7242,32,32768,True,False,1841,2.49,2.38,2.9,2.19,43.65,45.86,15.19,59.14,50.63,52.69,44.05,11.6,9.26,45.23,48.96,78.9,10.82,58.91 @@ -134,20 +134,20 @@ ThatsGroes/munin-SkoleGPTOpenOrca-7b-16bit (few-shot),7242,32,32768,False,False, Twitter/twhin-bert-base,279,250,512,True,False,11514,2.5,2.52,2.85,2.13,60.01,42.17,29.43,29.79,84.11,77.22,37.02,35.42,6.87,25.98,70.17,66.62,46.72,31.38 meta-llama/Llama-2-13b-chat-hf (few-shot),13016,32,4096,True,False,2849,2.51,2.4,2.95,2.18,44.0,45.41,16.17,57.06,57.21,59.62,38.93,8.65,5.92,42.32,49.9,77.19,14.67,57.12 "mlabonne/AlphaMonarch-7B (few-shot, val)",7242,32,8192,False,True,5340,2.53,2.42,2.8,2.38,52.72,49.11,16.09,46.28,61.9,66.92,48.8,19.53,9.83,30.27,60.53,67.03,15.1,42.46 -jhu-clsp/bernice,278,250,512,True,False,5567,2.54,2.53,2.88,2.22,61.98,47.2,40.52,13.53,84.11,77.82,39.63,45.75,33.74,5.35,71.34,70.91,53.52,16.41 -clips/mfaq,278,250,128,True,False,5591,2.55,2.57,2.7,2.37,68.49,45.6,28.26,14.34,89.46,79.71,52.91,27.55,15.2,12.36,76.31,73.32,32.29,16.12 -flax-community/nordic-roberta-wiki,125,50,512,True,False,16227,2.55,2.51,2.98,2.16,60.82,34.45,41.89,26.83,85.42,78.92,36.27,48.07,29.81,0.44,72.9,61.11,55.05,29.04 -CohereForAI/c4ai-command-r-v01 (few-shot),34981,256,8192,False,False,1919,2.56,2.29,3.04,2.34,53.44,49.17,20.55,51.7,61.54,60.94,35.73,21.33,13.2,32.36,47.15,80.24,11.35,49.93 +flax-community/nordic-roberta-wiki,125,50,512,True,False,16227,2.54,2.5,2.98,2.15,60.82,34.45,41.89,26.83,85.42,78.92,36.27,48.07,29.81,0.44,72.9,61.11,55.05,29.04 +jhu-clsp/bernice,278,250,512,True,False,5567,2.54,2.53,2.87,2.22,61.98,47.2,40.52,13.53,84.11,77.82,39.63,45.75,33.74,5.35,71.34,70.91,53.52,16.41 +CohereForAI/c4ai-command-r-v01 (few-shot),34981,256,8192,False,False,1919,2.55,2.29,3.03,2.34,53.44,49.17,20.55,51.7,61.54,60.94,35.73,21.33,13.2,32.36,47.15,80.24,11.35,49.93 +clips/mfaq,278,250,128,True,False,5591,2.55,2.57,2.71,2.37,68.49,45.6,28.26,14.34,89.46,79.71,52.91,27.55,15.2,12.36,76.31,73.32,32.29,16.12 KBLab/bert-base-swedish-cased-new,135,64,512,True,False,15933,2.56,2.96,3.07,1.64,59.37,38.46,4.61,23.13,83.23,79.16,33.94,9.56,4.16,22.84,79.99,76.04,73.52,30.6 sentence-transformers/paraphrase-xlm-r-multilingual-v1,278,250,512,True,False,20154,2.57,2.45,2.89,2.36,61.17,46.39,38.61,19.9,81.26,74.05,49.93,38.26,25.17,0.0,70.22,71.33,39.6,18.65 DDSC/roberta-base-scandinavian,125,50,512,True,False,14491,2.58,2.67,2.77,2.29,43.9,44.48,30.37,28.89,71.73,79.8,46.74,8.02,17.04,29.26,58.84,72.28,37.61,30.59 +RuterNorway/Llama-2-13b-chat-norwegian (few-shot),-1,32,4096,False,False,3254,2.58,2.48,3.0,2.27,43.17,43.4,11.08,56.81,58.61,60.4,41.36,6.52,3.95,38.93,50.85,74.17,7.51,57.32 bineric/NorskGPT-Llama-13B-v0.1 (few-shot),-1,32,4096,False,False,2856,2.58,2.6,2.81,2.34,44.17,44.28,3.11,55.59,56.72,57.62,48.86,9.87,6.9,41.27,49.26,79.05,0.22,56.78 -RuterNorway/Llama-2-13b-chat-norwegian (few-shot),-1,32,4096,False,False,3254,2.59,2.49,3.0,2.27,43.17,43.4,11.08,56.81,58.61,60.4,41.36,6.52,3.95,38.93,50.85,74.17,7.51,57.32 sentence-transformers/paraphrase-multilingual-mpnet-base-v2,278,250,512,True,False,15100,2.59,2.52,2.86,2.38,61.18,49.13,29.66,19.99,81.94,75.56,55.53,36.01,14.99,0.0,65.14,73.47,36.62,18.65 meta-llama/Llama-2-13b-hf (few-shot),13016,32,4096,True,False,2898,2.61,2.73,3.1,2.0,41.28,23.01,23.5,60.29,51.12,55.35,23.75,14.0,7.61,49.24,54.52,78.45,21.55,59.71 flax-community/swe-roberta-wiki-oscar,125,50,512,True,False,15437,2.62,2.83,3.22,1.8,55.98,36.66,22.69,24.81,79.25,75.39,36.56,22.02,19.72,0.78,75.4,76.22,65.73,29.34 -mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,2.62,2.4,3.03,2.44,44.89,48.09,19.06,51.56,53.42,54.34,38.79,17.06,11.0,35.74,47.92,62.9,19.95,52.51 -distilbert/distilbert-base-multilingual-cased,135,120,512,True,False,26355,2.63,2.69,2.84,2.36,58.12,32.53,35.53,28.19,83.62,80.69,33.16,36.1,30.1,19.26,70.08,59.66,33.71,31.48 +mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,2.62,2.4,3.03,2.43,44.89,48.09,19.06,51.56,53.42,54.34,38.79,17.06,11.0,35.74,47.92,62.9,19.95,52.51 +distilbert/distilbert-base-multilingual-cased,135,120,512,True,False,26355,2.63,2.69,2.84,2.35,58.12,32.53,35.53,28.19,83.62,80.69,33.16,36.1,30.1,19.26,70.08,59.66,33.71,31.48 neph1/bellman-7b-mistral-instruct-v0.2 (few-shot),7242,32,32768,False,False,2518,2.63,2.36,3.08,2.45,46.11,47.58,18.41,52.78,57.01,56.77,38.81,14.16,9.29,32.75,54.38,55.84,16.05,53.22 meta-llama/Llama-3.2-3B (few-shot),3213,128,131073,True,False,3713,2.64,2.65,3.01,2.25,41.13,38.9,9.6,56.85,49.57,52.13,39.96,3.2,3.72,45.54,51.06,77.76,5.88,57.43 sentence-transformers/stsb-xlm-r-multilingual,278,250,512,True,False,15040,2.64,2.63,2.98,2.32,58.52,42.26,34.8,19.6,80.08,74.59,52.16,36.3,14.21,0.0,68.94,72.77,40.21,20.09 @@ -158,145 +158,146 @@ bineric/NorskGPT-Mistral-7b (few-shot),7242,32,32768,False,False,1440,2.66,2.61, ibm-granite/granite-8b-code-base-4k (few-shot),8055,49,4096,True,False,2313,2.66,2.66,2.96,2.35,48.44,39.07,9.72,51.18,68.4,65.15,42.0,5.2,3.32,37.51,59.77,74.45,3.97,50.18 Geotrend/distilbert-base-en-no-cased,69,33,512,True,False,26597,2.67,2.71,2.86,2.45,57.53,32.95,33.63,27.21,83.93,79.39,32.32,36.15,30.17,19.71,69.28,59.53,29.36,30.42 bineric/NorskGPT-Llama-7B-v0.1 (few-shot),6738,32,4096,False,False,5384,2.67,2.65,2.81,2.54,41.63,47.73,0.0,54.25,56.18,56.96,50.94,8.19,5.55,41.35,53.95,60.91,0.32,55.28 -mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32768,False,False,634,2.67,2.61,3.02,2.39,37.93,44.49,14.09,51.38,50.08,51.27,43.65,14.09,8.28,37.23,45.01,73.33,11.59,52.12 +mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32768,False,False,634,2.67,2.61,3.01,2.39,37.93,44.49,14.09,51.38,50.08,51.27,43.65,14.09,8.28,37.23,45.01,73.33,11.59,52.12 occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,2.67,2.67,2.98,2.36,40.19,42.31,1.14,57.89,45.5,45.96,44.46,0.0,0.0,52.19,47.67,71.73,7.9,57.78 occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,2.67,2.68,3.05,2.29,37.93,44.62,0.28,58.05,45.28,46.0,44.95,0.0,0.0,43.88,49.02,76.56,2.18,58.98 danish-foundation-models/munin-7b-alpha (few-shot),7242,32,32768,True,False,6116,2.68,2.49,3.29,2.25,40.6,36.89,26.41,57.81,48.89,51.95,20.54,4.39,1.2,47.16,42.23,78.8,15.47,56.75 Geotrend/distilbert-base-en-da-cased,69,33,512,True,False,26196,2.69,2.66,2.97,2.44,59.5,31.89,36.0,28.41,83.27,79.59,29.37,31.5,24.06,18.62,69.62,59.42,29.01,31.82 -ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,2.69,2.68,3.01,2.38,45.9,37.11,11.7,50.11,66.91,62.82,40.71,9.5,6.74,32.83,52.85,73.93,8.27,48.49 +ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,2.69,2.67,3.01,2.38,45.9,37.11,11.7,50.11,66.91,62.82,40.71,9.5,6.74,32.83,52.85,73.93,8.27,48.49 Geotrend/distilbert-base-da-cased,61,23,512,True,False,28950,2.7,2.69,2.98,2.44,58.36,32.13,34.75,27.5,82.84,78.83,30.7,34.24,27.2,16.44,69.25,58.47,29.8,30.61 ibm-granite/granite-3.0-2b-instruct (few-shot),2634,49,4097,True,False,10194,2.71,2.64,3.06,2.42,41.79,41.86,11.86,51.97,52.68,53.17,39.87,12.08,7.18,36.0,45.23,72.76,11.25,52.22 tollefj/nordavind-7b-instruct-warm (few-shot),7248,33,2048,False,False,6450,2.71,2.54,3.19,2.4,38.39,49.44,7.5,51.24,38.82,43.28,38.05,8.45,7.5,40.47,47.24,77.91,5.55,51.41 +LumiOpen/Viking-33B@1000B (few-shot),33119,131,4099,True,False,2080,2.73,2.64,3.19,2.37,34.22,45.05,9.4,54.92,40.4,44.45,40.79,5.91,2.98,37.75,42.35,77.68,8.08,54.57 TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot),7800,100,4096,True,False,6197,2.73,2.66,3.12,2.42,34.25,45.67,10.62,50.77,42.77,45.69,37.79,8.77,8.47,44.24,42.87,79.18,8.65,51.56 +meta-llama/Llama-2-7b-hf (few-shot),6738,32,4096,True,False,930,2.73,2.74,3.18,2.28,31.77,43.91,0.31,58.44,42.13,43.8,41.74,0.0,0.02,44.19,44.11,79.05,7.34,57.49 meta-llama/Llama-3.2-3B-Instruct (few-shot),3213,128,131073,False,False,10424,2.73,2.6,3.23,2.36,41.12,42.77,11.52,51.14,49.66,51.98,44.13,0.67,1.11,28.62,43.74,76.98,16.01,48.38 -LumiOpen/Viking-33B@1000B (few-shot),33119,131,4099,True,False,2080,2.74,2.65,3.19,2.37,34.22,45.05,9.4,54.92,40.4,44.45,40.79,5.91,2.98,37.75,42.35,77.68,8.08,54.57 Twitter/twhin-bert-large,561,250,512,True,False,9707,2.74,2.65,3.2,2.38,66.39,39.36,7.06,33.88,86.26,80.1,34.17,12.11,4.28,11.74,74.26,63.35,16.07,36.77 -meta-llama/Llama-2-7b-hf (few-shot),6738,32,4096,True,False,930,2.74,2.74,3.18,2.29,31.77,43.91,0.31,58.44,42.13,43.8,41.74,0.0,0.02,44.19,44.11,79.05,7.34,57.49 -sarnikowski/convbert-medium-small-da-cased,24,29,512,True,False,13821,2.75,2.28,3.11,2.86,64.28,36.85,63.55,24.52,79.5,73.03,32.4,41.65,25.53,5.41,58.01,57.67,13.4,24.92 +sarnikowski/convbert-medium-small-da-cased,24,29,512,True,False,13821,2.75,2.28,3.1,2.86,64.28,36.85,63.55,24.52,79.5,73.03,32.4,41.65,25.53,5.41,58.01,57.67,13.4,24.92 NorwAI/NorwAI-Mistral-7B (few-shot),7537,68,4096,True,False,3035,2.76,2.71,3.07,2.5,21.47,48.39,12.46,52.51,21.09,26.31,49.0,7.15,7.98,47.7,23.88,80.26,13.5,55.02 "claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,2.76,2.23,3.4,2.64,59.48,56.46,20.57,38.23,69.39,62.76,3.97,31.65,5.86,36.65,57.06,59.89,9.3,39.97 google/gemma-2-2b-it (few-shot),2614,256,8193,True,False,5374,2.77,2.67,3.15,2.48,28.22,47.11,19.99,48.0,35.56,37.7,46.84,17.15,14.38,29.75,35.61,75.84,15.62,47.76 -Addedk/kbbert-distilled-cased,82,50,512,True,False,29698,2.78,3.02,3.36,1.97,57.84,31.18,13.25,22.73,81.82,75.89,33.42,14.99,13.63,0.0,80.12,71.28,51.58,28.16 +Addedk/kbbert-distilled-cased,82,50,512,True,False,29698,2.78,3.01,3.36,1.97,57.84,31.18,13.25,22.73,81.82,75.89,33.42,14.99,13.63,0.0,80.12,71.28,51.58,28.16 emillykkejensen/Phi-3-mini-4k-instruct-dansk (few-shot),3821,32,4096,False,False,1360,2.78,2.65,3.19,2.51,39.96,44.93,4.01,55.01,56.41,53.95,42.27,0.0,0.21,29.35,47.81,68.43,3.63,53.03 "merge-crew/da-sv-dare-ties-density-0.3 (few-shot, val)",7242,32,32768,True,True,2461,2.79,2.69,3.2,2.49,30.16,48.49,5.52,52.44,35.98,47.39,38.98,11.54,5.2,37.54,32.37,75.33,12.73,53.05 sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2,118,250,512,True,False,29201,2.79,2.73,3.09,2.54,56.75,44.48,26.74,17.89,78.31,72.13,47.53,26.92,14.63,0.0,66.5,72.19,28.75,15.91 +jannikskytt/MeDa-Bert,111,32,511,True,False,16114,2.8,2.3,3.01,3.1,64.64,44.62,47.47,23.14,71.69,60.0,38.94,30.32,7.99,24.02,48.32,53.98,3.33,23.15 meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4096,False,False,2643,2.8,2.63,3.2,2.58,35.44,44.88,9.74,55.04,44.99,49.09,41.56,3.04,4.03,33.77,39.72,66.18,6.74,54.05 AI-Sweden-Models/gpt-sw3-40b (few-shot),39927,64,2048,True,False,409,2.81,2.66,3.35,2.43,26.57,47.81,11.13,53.78,24.07,26.67,31.05,10.8,8.89,48.78,32.0,80.44,10.73,53.8 -jannikskytt/MeDa-Bert,111,32,511,True,False,16114,2.81,2.3,3.01,3.11,64.64,44.62,47.47,23.14,71.69,60.0,38.94,30.32,7.99,24.02,48.32,53.98,3.33,23.15 mideind/IceBERT-xlmr-ic3,278,250,512,True,False,11004,2.81,2.84,3.07,2.53,58.49,37.47,6.71,30.6,82.46,74.22,37.19,13.25,7.96,18.75,70.57,66.01,10.2,30.71 danish-foundation-models/encoder-medium-v1,111,32,512,True,False,16130,2.82,2.32,3.1,3.03,63.42,39.91,51.01,25.76,68.66,61.77,36.56,31.23,5.4,22.56,49.62,58.7,2.23,25.45 -microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,3194,2.82,2.73,3.19,2.54,41.37,42.6,6.52,50.57,56.33,54.68,37.18,6.76,6.79,30.11,46.15,67.17,5.3,51.12 +microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,8681,2.82,2.73,3.19,2.54,41.37,42.6,6.52,50.57,56.33,54.68,37.18,6.76,6.79,30.11,46.15,67.17,5.3,51.12 norallm/normistral-7b-warm-instruct (few-shot),-1,33,2048,True,False,6194,2.82,2.66,3.22,2.58,39.83,47.48,4.55,49.23,46.49,51.46,37.98,7.86,7.23,33.31,51.45,63.64,5.8,48.95 timpal0l/Mistral-7B-v0.1-flashback-v2-instruct (few-shot),7242,32,32768,False,False,5172,2.84,2.71,3.56,2.25,37.02,40.65,7.48,52.71,50.34,52.06,32.19,-0.22,0.0,20.57,46.74,77.06,14.0,56.74 +DDSC/roberta-base-danish,125,50,512,True,False,15004,2.86,2.59,3.25,2.75,63.84,43.9,17.16,26.94,76.14,72.88,32.29,0.45,-0.08,23.91,65.95,64.02,0.8,28.46 Maltehb/danish-bert-botxo,111,32,512,True,False,16091,2.86,2.27,3.29,3.03,66.71,43.79,45.96,26.29,72.62,58.73,40.65,29.47,12.95,0.91,50.29,57.42,4.94,24.16 neuralmagic/Sparse-Llama-3.1-8B-2of4 (few-shot),8030,128,131072,True,False,2996,2.86,2.79,3.39,2.41,27.6,37.08,10.84,58.67,35.54,36.61,32.18,0.0,0.0,43.47,33.21,76.57,10.51,58.15 AI-Sweden-Models/gpt-sw3-20b (few-shot),20918,64,2048,True,False,1875,2.87,2.98,3.2,2.43,27.41,30.23,11.34,52.8,30.82,39.56,34.5,15.17,12.46,42.81,31.86,79.2,12.26,53.58 -DDSC/roberta-base-danish,125,50,512,True,False,15004,2.87,2.59,3.25,2.76,63.84,43.9,17.16,26.94,76.14,72.88,32.29,0.45,-0.08,23.91,65.95,64.02,0.8,28.46 NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,2.87,2.97,3.0,2.63,50.83,53.23,23.02,0.0,65.75,70.12,41.9,47.88,35.66,0.03,69.54,79.55,28.27,0.02 -Qwen/Qwen1.5-4B-Chat (few-shot),3950,152,32768,False,False,4347,2.87,2.69,3.28,2.64,35.96,42.04,8.65,53.68,44.83,46.29,32.7,3.57,1.61,42.55,40.19,64.08,5.43,53.21 +Qwen/Qwen1.5-4B-Chat (few-shot),3950,152,32768,False,False,4347,2.87,2.69,3.27,2.64,35.96,42.04,8.65,53.68,44.83,46.29,32.7,3.57,1.61,42.55,40.19,64.08,5.43,53.21 +google/gemma-7b-it (few-shot),8538,256,8192,False,False,1792,2.87,2.85,2.91,2.86,43.83,29.21,12.96,49.76,59.77,60.98,28.14,14.01,10.15,51.08,59.26,28.63,11.43,46.67 ltg/norbert3-xs,15,50,508,True,False,14208,2.87,2.95,2.83,2.82,59.94,39.16,2.16,24.69,87.63,80.19,49.92,7.93,5.06,22.46,67.53,59.27,2.83,24.11 -Addedk/mbert-swedish-distilled-cased,135,120,512,True,False,26091,2.88,3.0,3.16,2.49,56.36,31.16,21.08,19.63,82.98,76.65,30.38,21.99,19.06,9.47,73.41,62.1,34.86,18.1 -allenai/OLMo-1.7-7B-hf (few-shot),6888,50,4096,True,False,3371,2.88,2.97,3.28,2.39,33.8,31.57,2.76,54.2,42.78,42.85,36.68,2.39,1.91,39.16,41.25,76.6,6.37,54.87 +Addedk/mbert-swedish-distilled-cased,135,120,512,True,False,26091,2.88,2.99,3.16,2.49,56.36,31.16,21.08,19.63,82.98,76.65,30.38,21.99,19.06,9.47,73.41,62.1,34.86,18.1 +allenai/OLMo-1.7-7B-hf (few-shot),6888,50,4096,True,False,3371,2.88,2.96,3.28,2.39,33.8,31.57,2.76,54.2,42.78,42.85,36.68,2.39,1.91,39.16,41.25,76.6,6.37,54.87 birgermoell/roberta-swedish-scandi,125,50,512,True,False,15385,2.88,3.05,3.49,2.11,49.22,33.51,12.08,24.49,72.74,69.74,29.68,15.83,8.7,1.04,68.55,69.96,52.88,27.99 -google/gemma-7b-it (few-shot),8538,256,8192,False,False,1792,2.88,2.85,2.91,2.87,43.83,29.21,12.96,49.76,59.77,60.98,28.14,14.01,10.15,51.08,59.26,28.63,11.43,46.67 NorwAI/NorwAI-Llama2-7B (few-shot),7033,68,4096,True,False,4438,2.89,2.86,3.22,2.59,16.72,45.89,11.25,53.17,31.45,33.85,36.06,8.34,6.84,48.31,24.98,79.36,5.75,54.74 MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,2.91,3.02,3.28,2.43,28.18,29.32,2.9,56.48,36.96,39.38,32.67,2.18,5.33,45.23,41.49,75.64,0.66,57.48 norallm/normistral-7b-warm (few-shot),7248,33,2048,True,False,3175,2.91,2.83,3.43,2.46,37.8,40.51,3.35,49.08,42.29,46.29,27.05,1.63,2.57,39.18,48.78,76.09,2.53,48.93 -sarnikowski/electra-small-discriminator-da-256-cased,13,29,512,True,False,20340,2.91,2.52,3.28,2.93,60.63,24.38,68.58,21.03,73.15,66.34,29.97,40.79,25.08,1.93,52.79,57.93,14.72,20.54 -sarnikowski/convbert-small-da-cased,13,29,512,True,False,14273,2.93,2.55,3.24,3.01,60.59,29.52,57.1,20.16,76.07,70.94,32.49,35.43,21.11,1.84,55.06,53.7,12.38,22.53 +sarnikowski/electra-small-discriminator-da-256-cased,13,29,512,True,False,20340,2.91,2.52,3.28,2.92,60.63,24.38,68.58,21.03,73.15,66.34,29.97,40.79,25.08,1.93,52.79,57.93,14.72,20.54 +sarnikowski/convbert-small-da-cased,13,29,512,True,False,14273,2.93,2.55,3.23,3.0,60.59,29.52,57.1,20.16,76.07,70.94,32.49,35.43,21.11,1.84,55.06,53.7,12.38,22.53 LumiOpen/Viking-13B (few-shot),14030,131,4097,True,False,840,2.96,2.76,3.58,2.53,28.6,48.71,2.3,53.85,26.76,35.38,29.22,2.58,2.79,34.41,31.55,78.66,5.69,52.93 -01-ai/Yi-1.5-6B (few-shot),6061,64,4097,True,False,2867,2.98,3.21,3.29,2.45,35.21,12.73,4.75,55.95,46.02,48.72,27.86,2.41,2.5,44.7,45.55,70.71,4.83,55.25 -HPLT/gpt-33b-nordic-prerelease (few-shot),33119,131,4099,True,False,501,2.98,2.91,3.42,2.62,25.35,44.7,1.43,52.29,31.38,37.84,38.88,3.41,3.11,30.39,33.61,76.75,1.66,50.68 -ibm-granite/granite-3.0-2b-base (few-shot),2534,49,4097,True,False,10187,2.98,3.02,3.31,2.6,32.34,29.5,3.89,53.67,43.0,45.08,35.36,2.79,1.95,37.33,36.54,68.85,2.6,54.58 +ibm-granite/granite-3.0-2b-base (few-shot),2534,49,4097,True,False,10187,2.97,3.01,3.31,2.6,32.34,29.5,3.89,53.67,43.0,45.08,35.36,2.79,1.95,37.33,36.54,68.85,2.6,54.58 +01-ai/Yi-1.5-6B (few-shot),6061,64,4097,True,False,2867,2.98,3.2,3.29,2.45,35.21,12.73,4.75,55.95,46.02,48.72,27.86,2.41,2.5,44.7,45.55,70.71,4.83,55.25 +HPLT/gpt-33b-nordic-prerelease (few-shot),33119,131,4099,True,False,501,2.98,2.9,3.42,2.62,25.35,44.7,1.43,52.29,31.38,37.84,38.88,3.41,3.11,30.39,33.61,76.75,1.66,50.68 ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,3.0,2.97,3.43,2.59,37.37,31.44,5.27,48.41,44.89,48.08,32.29,7.49,4.65,26.37,40.68,68.96,4.77,49.73 stabilityai/stablelm-2-1_6b (few-shot),1645,100,4096,True,False,7259,3.0,2.97,3.47,2.55,28.45,39.09,1.43,51.67,41.64,42.37,33.71,-0.19,-0.01,30.14,38.0,75.15,1.04,53.11 -dbmdz/bert-base-historic-multilingual-cased,111,32,512,True,False,20047,3.04,3.28,3.39,2.45,47.61,24.17,8.14,25.19,68.63,67.7,25.68,6.73,3.35,22.57,68.83,64.25,28.62,28.78 +dbmdz/bert-base-historic-multilingual-cased,111,32,512,True,False,20047,3.03,3.28,3.38,2.44,47.61,24.17,8.14,25.19,68.63,67.7,25.68,6.73,3.35,22.57,68.83,64.25,28.62,28.78 google/gemma-2-2b (few-shot),2614,256,8193,True,False,5235,3.04,3.06,3.52,2.53,17.29,34.94,6.39,54.94,20.47,24.18,32.61,3.22,3.91,41.16,30.45,76.36,6.06,55.19 sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking,135,120,512,True,False,33753,3.04,3.12,3.31,2.7,54.48,36.6,8.84,15.42,77.81,72.22,44.59,8.98,5.72,0.0,65.5,68.33,14.81,16.11 sentence-transformers/quora-distilbert-multilingual,135,120,512,True,False,26458,3.04,3.12,3.31,2.7,54.48,36.6,8.84,13.97,77.81,72.22,44.59,8.98,5.72,0.0,65.5,68.36,14.81,16.11 AI-Sweden-Models/gpt-sw3-6.7b-v2 (few-shot),7111,64,2048,True,False,2351,3.05,3.29,3.27,2.6,20.84,18.07,10.54,51.22,29.62,32.3,34.67,8.37,7.76,44.62,28.73,77.47,8.78,50.57 AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot),20918,64,2048,True,False,1831,3.06,3.13,3.27,2.79,15.94,32.78,7.86,52.16,23.95,26.55,40.89,9.45,8.32,43.19,15.7,68.23,12.39,52.04 -Maltehb/aelaectra-danish-electra-small-cased,14,32,128,True,False,4593,3.07,2.65,3.35,3.22,63.31,32.72,67.74,0.0,71.85,67.14,29.0,33.57,21.79,0.03,57.82,55.68,19.26,0.0 +Maltehb/aelaectra-danish-electra-small-cased,14,32,128,True,False,4593,3.07,2.65,3.35,3.21,63.31,32.72,67.74,0.0,71.85,67.14,29.0,33.57,21.79,0.03,57.82,55.68,19.26,0.0 +ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,3.07,3.03,3.54,2.65,37.21,31.54,6.3,44.86,53.78,55.14,26.21,3.9,2.42,24.86,50.1,65.67,4.55,42.83 nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,3.07,2.96,3.37,2.88,49.01,47.95,32.89,0.0,63.7,62.53,34.35,31.53,22.71,0.06,48.51,78.68,29.18,0.0 -ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,3.08,3.03,3.54,2.66,37.21,31.54,6.3,44.86,53.78,55.14,26.21,3.9,2.42,24.86,50.1,65.67,4.55,42.83 -microsoft/Phi-3-mini-128k-instruct (few-shot),3821,32,131072,True,False,7312,3.08,3.18,3.25,2.82,4.51,40.85,5.43,51.76,52.18,50.53,33.3,2.63,4.0,37.08,42.36,51.53,3.11,51.11 +microsoft/Phi-3-mini-128k-instruct (few-shot),3821,32,131072,True,False,7312,3.08,3.17,3.25,2.81,4.51,40.85,5.43,51.76,52.18,50.53,33.3,2.63,4.0,37.08,42.36,51.53,3.11,51.11 LumiOpen/Viking-7B (few-shot),7550,131,4096,True,False,1431,3.09,3.05,3.52,2.71,23.98,38.74,1.04,50.17,22.37,29.9,35.86,1.03,2.92,34.39,30.64,72.02,1.08,48.72 -dbmdz/bert-medium-historic-multilingual-cased,42,32,512,True,False,24291,3.09,3.25,3.43,2.6,49.88,27.93,5.42,22.93,69.65,66.78,26.33,6.62,5.16,15.75,66.11,59.66,26.28,24.36 -ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,3.09,2.91,3.85,2.51,38.62,35.47,5.07,45.21,53.93,54.04,23.83,3.91,1.55,2.37,51.76,70.61,6.24,44.67 +dbmdz/bert-medium-historic-multilingual-cased,42,32,512,True,False,24291,3.09,3.25,3.43,2.59,49.88,27.93,5.42,22.93,69.65,66.78,26.33,6.62,5.16,15.75,66.11,59.66,26.28,24.36 +ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,3.09,2.9,3.85,2.51,38.62,35.47,5.07,45.21,53.93,54.04,23.83,3.91,1.55,2.37,51.76,70.61,6.24,44.67 meta-llama/Llama-3.2-1B-Instruct (few-shot),1236,128,131073,False,False,7436,3.12,3.0,3.7,2.67,35.45,36.94,1.12,44.61,44.66,47.78,27.43,0.07,1.14,18.0,41.6,71.86,3.72,43.57 -HPLT/gpt-13b-nordic-prerelease (few-shot),14030,131,4099,True,False,3520,3.14,2.98,3.76,2.69,28.72,37.19,2.96,49.53,28.94,33.83,27.32,1.46,-0.59,25.62,32.19,72.26,2.39,48.92 -utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,3.14,3.08,3.58,2.76,19.61,37.92,2.81,50.05,31.43,36.92,30.63,0.98,1.67,33.24,27.41,72.24,0.13,49.77 +HPLT/gpt-13b-nordic-prerelease (few-shot),14030,131,4099,True,False,3520,3.14,2.97,3.76,2.69,28.72,37.19,2.96,49.53,28.94,33.83,27.32,1.46,-0.59,25.62,32.19,72.26,2.39,48.92 +utter-project/EuroLLM-1.7B-Instruct (few-shot),1657,128,4096,True,False,15009,3.14,3.08,3.57,2.76,19.61,37.92,2.81,50.05,31.43,36.92,30.63,0.98,1.67,33.24,27.41,72.24,0.13,49.77 Maltehb/aelaectra-danish-electra-small-uncased,14,32,128,True,False,5995,3.17,2.66,3.46,3.38,62.52,34.45,65.15,2.51,59.76,51.44,33.41,32.87,20.09,0.0,39.17,57.71,17.1,0.11 -jjzha/dajobbert-base-uncased,110,32,512,True,False,16243,3.17,2.64,3.55,3.32,60.78,39.65,37.67,15.41,65.95,55.29,33.31,20.34,8.07,0.0,42.99,55.49,4.69,14.22 +jjzha/dajobbert-base-uncased,110,32,512,True,False,16243,3.17,2.64,3.55,3.31,60.78,39.65,37.67,15.41,65.95,55.29,33.31,20.34,8.07,0.0,42.99,55.49,4.69,14.22 google/gemma-2b (few-shot),2506,256,8192,True,False,6087,3.18,3.08,3.66,2.81,19.97,40.21,2.27,50.55,15.53,19.78,32.89,1.18,0.0,33.33,14.67,75.45,3.82,51.73 -NbAiLab/nb-llama-3.1-8B (few-shot),8030,128,131072,True,False,1297,3.21,3.33,3.4,2.89,44.83,37.14,10.13,8.09,51.85,54.79,31.84,36.3,32.19,0.71,53.65,80.41,5.81,10.43 -AI-Sweden-Models/gpt-sw3-1.3b-instruct (few-shot),1445,64,2048,True,False,4544,3.23,3.39,3.41,2.88,14.73,27.14,2.65,46.38,33.08,38.28,35.58,0.82,1.43,36.06,19.04,73.34,2.9,47.45 -Qwen/Qwen1.5-4B (few-shot),3950,152,32768,True,False,3248,3.23,2.83,3.34,3.52,32.28,39.62,5.38,54.16,32.12,36.86,36.97,5.27,1.4,40.0,37.26,5.2,1.85,54.15 -ibm-granite/granite-7b-base (few-shot),6738,32,2048,True,False,4405,3.23,3.15,3.85,2.69,24.93,31.65,0.06,51.47,32.21,36.62,16.98,1.57,0.97,26.28,33.34,72.0,0.25,52.53 +NbAiLab/nb-llama-3.1-8B (few-shot),8030,128,131072,True,False,1297,3.2,3.33,3.39,2.89,44.83,37.14,10.13,8.09,51.85,54.79,31.84,36.3,32.19,0.71,53.65,80.41,5.81,10.43 +AI-Sweden-Models/gpt-sw3-1.3b-instruct (few-shot),1445,64,2048,True,False,4544,3.22,3.38,3.41,2.88,14.73,27.14,2.65,46.38,33.08,38.28,35.58,0.82,1.43,36.06,19.04,73.34,2.9,47.45 +Qwen/Qwen1.5-4B (few-shot),3950,152,32768,True,False,3248,3.22,2.82,3.34,3.51,32.28,39.62,5.38,54.16,32.12,36.86,36.97,5.27,1.4,40.0,37.26,5.2,1.85,54.15 +ibm-granite/granite-7b-base (few-shot),6738,32,2048,True,False,4405,3.23,3.15,3.84,2.69,24.93,31.65,0.06,51.47,32.21,36.62,16.98,1.57,0.97,26.28,33.34,72.0,0.25,52.53 meta-llama/Llama-3.2-1B (few-shot),1236,128,131073,True,False,7577,3.23,3.17,3.81,2.71,19.82,35.97,2.14,46.59,30.54,31.34,29.5,-0.13,0.02,19.59,29.89,74.33,1.06,46.89 -openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,3.25,3.16,3.72,2.86,34.66,21.93,1.5,52.36,37.36,42.83,16.02,-0.08,2.29,31.6,35.02,51.8,6.15,50.85 +openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,3.24,3.15,3.72,2.86,34.66,21.93,1.5,52.36,37.36,42.83,16.02,-0.08,2.29,31.6,35.02,51.8,6.15,50.85 ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,3.26,3.19,3.72,2.88,23.87,31.21,2.04,47.36,37.73,40.07,21.5,0.86,2.01,27.03,29.08,65.51,0.5,46.52 ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,3.31,3.48,3.59,2.85,31.8,6.85,0.97,49.83,40.08,43.96,31.9,-0.07,1.27,23.32,36.01,57.18,1.52,51.04 -HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,3.33,3.39,3.81,2.79,29.44,18.49,1.73,44.39,37.6,38.38,24.05,3.56,2.61,13.58,37.37,64.46,4.49,43.92 +HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,3.33,3.39,3.81,2.78,29.44,18.49,1.73,44.39,37.6,38.38,24.05,3.56,2.61,13.58,37.37,64.46,4.49,43.92 HPLT/gpt-7b-nordic-prerelease (few-shot),7550,131,4096,True,False,1382,3.34,3.11,3.98,2.94,21.98,37.77,1.26,46.03,20.25,28.99,17.44,3.2,2.61,21.5,27.07,61.96,2.65,46.16 -AI-Sweden-Models/gpt-sw3-6.7b-v2-instruct (few-shot),7111,64,2048,True,False,1473,3.36,3.66,3.4,3.02,15.35,2.85,10.99,50.51,24.67,29.03,34.39,2.42,5.11,42.52,14.58,56.6,10.92,50.18 -allenai/OLMo-7B (few-shot),6888,50,2051,True,False,5403,3.36,3.21,4.15,2.73,26.76,30.76,0.55,45.65,34.42,35.17,21.46,0.34,0.26,0.12,37.36,72.08,-0.86,45.16 -google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,3.36,3.24,3.62,3.22,24.44,34.03,2.25,42.12,39.78,43.58,22.01,2.76,1.45,32.42,33.51,43.97,0.53,39.39 +AI-Sweden-Models/gpt-sw3-6.7b-v2-instruct (few-shot),7111,64,2048,True,False,1473,3.36,3.65,3.4,3.02,15.35,2.85,10.99,50.51,24.67,29.03,34.39,2.42,5.11,42.52,14.58,56.6,10.92,50.18 +allenai/OLMo-7B (few-shot),6888,50,2051,True,False,5403,3.36,3.21,4.15,2.72,26.76,30.76,0.55,45.65,34.42,35.17,21.46,0.34,0.26,0.12,37.36,72.08,-0.86,45.16 +google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,3.36,3.24,3.61,3.22,24.44,34.03,2.25,42.12,39.78,43.58,22.01,2.76,1.45,32.42,33.51,43.97,0.53,39.39 EuropeanParliament/EUBERT,94,66,512,True,False,20070,3.37,3.21,3.81,3.08,41.09,27.33,21.58,20.68,49.92,44.37,19.81,8.64,3.11,15.89,38.36,59.0,19.4,19.23 AI-Sweden-Models/gpt-sw3-6.7b (few-shot),7111,64,2048,True,False,2285,3.4,3.34,3.75,3.11,18.23,22.71,5.03,49.11,22.35,21.98,18.23,1.68,2.49,41.8,18.83,53.68,3.49,49.81 AI-Sweden-Models/gpt-sw3-1.3b (few-shot),1445,64,2048,True,False,4608,3.42,3.47,3.78,3.02,8.8,28.65,2.84,45.34,13.49,14.74,27.28,3.09,1.86,34.91,6.08,71.38,1.17,45.55 NorwAI/NorwAI-Mistral-7B-instruct (few-shot),7537,68,4096,False,False,3027,3.43,3.52,3.73,3.05,13.78,42.16,3.52,20.02,27.49,32.33,47.78,3.92,4.27,2.46,20.97,77.76,2.35,28.65 +mhenrichsen/danskgpt-tiny-chat (few-shot),1100,32,2048,False,False,1745,3.45,3.26,3.89,3.21,22.31,34.05,0.7,41.82,28.74,30.34,27.49,-2.17,0.26,19.1,27.31,45.94,-0.97,35.57 sentence-transformers/distiluse-base-multilingual-cased-v1,135,120,512,True,False,34042,3.45,3.42,3.8,3.14,46.78,27.78,3.04,15.52,60.76,59.62,25.98,2.65,3.47,0.2,49.86,60.06,3.18,16.08 -mhenrichsen/danskgpt-tiny-chat (few-shot),1100,32,2048,False,False,1745,3.46,3.26,3.9,3.21,22.31,34.05,0.7,41.82,28.74,30.34,27.49,-2.17,0.26,19.1,27.31,45.94,-0.97,35.57 -HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,3.47,3.62,4.01,2.78,24.47,9.93,1.22,42.09,26.7,28.23,23.25,-0.47,0.26,13.4,35.96,68.31,3.61,43.26 +HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,3.47,3.61,4.01,2.78,24.47,9.93,1.22,42.09,26.7,28.23,23.25,-0.47,0.26,13.4,35.96,68.31,3.61,43.26 KBLab/albert-base-swedish-cased-alpha,14,50,512,True,False,15925,3.47,3.7,3.84,2.88,29.9,19.79,6.15,15.96,66.97,63.9,18.85,5.83,4.02,0.0,47.19,56.57,20.92,23.86 -AI-Sweden-Models/gpt-sw3-356m-instruct (few-shot),471,64,2048,True,False,5855,3.51,3.42,3.78,3.33,11.28,34.94,2.08,36.59,24.38,31.28,30.88,-0.3,0.45,23.99,14.84,59.0,0.06,34.37 -openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.51,3.37,3.83,3.33,29.49,13.77,0.0,51.53,34.78,39.0,10.69,6.17,5.9,31.25,37.17,20.2,6.13,46.66 -NbAiLab/nb-gpt-j-6B-alpaca (few-shot),6055,50,1024,False,False,2607,3.52,3.51,3.78,3.26,12.95,27.68,1.65,38.6,23.82,26.04,32.6,0.34,2.26,21.33,13.28,60.17,1.52,37.23 -dbmdz/bert-mini-historic-multilingual-cased,12,32,512,True,False,47122,3.52,3.61,3.75,3.2,41.7,26.03,2.19,13.82,61.55,59.9,24.59,3.45,2.72,3.99,50.07,56.1,5.05,14.49 -ibm-granite/granite-3.0-1b-a400m-instruct (few-shot),1335,49,4096,True,False,7964,3.52,3.4,4.07,3.1,30.63,22.35,1.95,37.3,36.99,37.27,19.55,1.95,2.31,7.33,33.8,58.78,0.72,35.45 +AI-Sweden-Models/gpt-sw3-356m-instruct (few-shot),471,64,2048,True,False,5855,3.51,3.42,3.78,3.32,11.28,34.94,2.08,36.59,24.38,31.28,30.88,-0.3,0.45,23.99,14.84,59.0,0.06,34.37 +NbAiLab/nb-gpt-j-6B-alpaca (few-shot),6055,50,1024,False,False,2607,3.51,3.51,3.78,3.25,12.95,27.68,1.65,38.6,23.82,26.04,32.6,0.34,2.26,21.33,13.28,60.17,1.52,37.23 +openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.51,3.37,3.83,3.32,29.49,13.77,0.0,51.53,34.78,39.0,10.69,6.17,5.9,31.25,37.17,20.2,6.13,46.66 +dbmdz/bert-mini-historic-multilingual-cased,12,32,512,True,False,47122,3.52,3.61,3.74,3.2,41.7,26.03,2.19,13.82,61.55,59.9,24.59,3.45,2.72,3.99,50.07,56.1,5.05,14.49 +ibm-granite/granite-3.0-1b-a400m-instruct (few-shot),1335,49,4096,True,False,7964,3.52,3.4,4.06,3.1,30.63,22.35,1.95,37.3,36.99,37.27,19.55,1.95,2.31,7.33,33.8,58.78,0.72,35.45 state-spaces/mamba-2.8b-hf (few-shot),2768,50,32769,True,False,2722,3.53,3.71,3.93,2.96,17.58,10.47,1.23,42.56,26.9,34.59,31.06,0.21,-0.17,10.35,23.25,71.7,-0.82,40.48 -NbAiLab/nb-llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1296,3.54,3.33,4.15,3.15,21.87,24.82,2.89,44.86,31.65,31.54,10.64,1.81,1.72,16.32,32.54,43.55,1.93,44.8 -sentence-transformers/distiluse-base-multilingual-cased-v2,135,120,512,True,False,33247,3.55,3.75,3.72,3.17,26.96,30.13,2.01,8.22,63.79,60.96,32.83,1.09,0.18,0.0,51.67,62.71,2.32,8.76 -sentence-transformers/distiluse-base-multilingual-cased,135,120,512,True,False,19206,3.55,3.75,3.72,3.17,26.96,30.13,2.01,8.25,63.79,60.96,32.83,1.09,0.18,0.0,51.67,63.04,2.32,8.93 +NbAiLab/nb-llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1296,3.54,3.33,4.14,3.15,21.87,24.82,2.89,44.86,31.65,31.54,10.64,1.81,1.72,16.32,32.54,43.55,1.93,44.8 +sentence-transformers/distiluse-base-multilingual-cased-v2,135,120,512,True,False,33247,3.54,3.75,3.71,3.17,26.96,30.13,2.01,8.22,63.79,60.96,32.83,1.09,0.18,0.0,51.67,62.71,2.32,8.76 +sentence-transformers/distiluse-base-multilingual-cased,135,120,512,True,False,19206,3.54,3.75,3.71,3.17,26.96,30.13,2.01,8.25,63.79,60.96,32.83,1.09,0.18,0.0,51.67,63.04,2.32,8.93 Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,3.56,3.41,4.07,3.21,18.0,26.58,0.63,41.66,26.99,25.74,19.85,1.96,-0.01,16.33,20.94,52.54,0.34,43.55 -PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,3.59,3.39,4.08,3.29,28.3,28.95,0.2,36.39,38.96,40.42,19.42,-0.13,0.77,4.7,36.29,39.68,0.96,32.64 +PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,3.59,3.39,4.1,3.29,28.3,28.95,0.2,36.39,38.96,40.42,19.42,-0.13,0.77,4.7,36.29,39.68,0.96,32.64 allenai/OLMo-7B-Twin-2T (few-shot),6888,50,2051,True,False,5484,3.59,3.6,4.3,2.88,7.52,18.3,3.23,46.35,9.06,17.16,25.52,0.68,0.17,0.46,20.49,70.04,2.28,45.85 -jannesg/bertsson,124,50,512,True,False,15314,3.6,3.71,4.0,3.1,32.63,24.11,2.91,15.37,49.3,46.11,23.21,2.26,-0.66,0.68,51.13,61.67,2.87,17.24 +jannesg/bertsson,124,50,512,True,False,15314,3.6,3.7,4.0,3.1,32.63,24.11,2.91,15.37,49.3,46.11,23.21,2.26,-0.66,0.68,51.13,61.67,2.87,17.24 +AI-Sweden-Models/gpt-sw3-356m (few-shot),471,64,2048,True,False,5758,3.61,3.54,3.76,3.54,16.13,27.61,1.96,34.79,27.37,31.22,34.21,0.92,1.25,18.52,23.77,34.29,1.57,33.7 Qwen/Qwen1.5-1.8B (few-shot),1837,152,32768,True,False,5666,3.61,3.49,4.14,3.21,9.83,29.03,0.56,46.43,12.1,13.42,22.82,2.7,2.21,16.31,18.01,51.91,1.49,44.83 -AI-Sweden-Models/gpt-sw3-356m (few-shot),471,64,2048,True,False,5758,3.62,3.55,3.77,3.54,16.13,27.61,1.96,34.79,27.37,31.22,34.21,0.92,1.25,18.52,23.77,34.29,1.57,33.7 -ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,3.69,3.67,4.14,3.26,16.82,17.52,1.53,40.21,31.16,29.73,17.59,1.07,1.59,6.92,23.26,55.06,1.81,35.49 -PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.79,3.8,4.1,3.48,15.93,13.01,0.05,36.85,28.82,27.81,18.74,-0.46,-0.84,12.66,21.42,45.75,-0.25,32.71 +ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,3.69,3.66,4.15,3.25,16.82,17.52,1.53,40.21,31.16,29.73,17.59,1.07,1.59,6.92,23.26,55.06,1.81,35.49 +PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.79,3.8,4.1,3.47,15.93,13.01,0.05,36.85,28.82,27.81,18.74,-0.46,-0.84,12.66,21.42,45.75,-0.25,32.71 +mhenrichsen/danskgpt-tiny (few-shot),1100,32,2048,True,False,8597,3.8,3.64,4.19,3.58,14.13,26.31,-0.54,32.12,27.37,27.59,18.09,-0.19,-0.8,5.84,23.92,31.93,0.46,30.81 3ebdola/Dialectal-Arabic-XLM-R-Base,278,250,512,True,False,12783,3.81,3.83,4.06,3.55,36.51,22.07,1.63,3.09,55.55,53.53,12.69,2.79,1.66,0.0,42.78,44.95,1.43,8.71 -alexanderfalk/danbert-small-cased,83,52,512,True,False,30013,3.81,3.65,4.01,3.78,33.05,30.67,13.01,1.56,42.18,37.39,24.39,7.29,2.57,0.0,22.47,53.88,1.55,1.12 -mhenrichsen/danskgpt-tiny (few-shot),1100,32,2048,True,False,8597,3.81,3.64,4.21,3.58,14.13,26.31,-0.54,32.12,27.37,27.59,18.09,-0.19,-0.8,5.84,23.92,31.93,0.46,30.81 -NbAiLab/nb-llama-3.2-1B (few-shot),1236,128,131072,True,False,3424,3.82,3.74,4.33,3.4,9.2,32.94,1.59,23.1,10.6,22.63,19.76,2.8,0.17,3.99,4.49,73.13,2.5,22.14 -RuterNorway/Llama-2-7b-chat-norwegian (few-shot),-1,32,4096,False,False,10890,3.87,4.06,4.1,3.45,10.12,10.65,-0.66,26.08,21.04,18.71,12.22,-1.18,0.36,26.86,22.38,31.11,0.09,44.36 -PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,3.88,3.58,4.23,3.84,16.17,29.12,-0.47,34.8,27.47,23.82,22.22,-2.06,-0.77,2.48,14.09,23.71,1.74,32.0 -Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,3.9,3.83,4.31,3.55,17.38,10.72,1.32,34.58,29.52,31.27,11.49,0.29,-0.12,7.8,18.57,40.23,0.21,29.49 -dbmdz/bert-tiny-historic-multilingual-cased,5,32,512,True,False,78027,3.9,3.93,4.13,3.63,33.62,20.71,1.19,4.19,46.11,35.18,19.19,2.76,0.42,0.0,26.87,57.41,-1.06,5.54 -Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,3.92,3.88,4.35,3.53,19.01,8.88,0.66,32.78,34.46,33.41,6.31,-1.59,0.61,5.95,28.96,26.58,-1.88,34.59 -allenai/OLMo-1B (few-shot),1177,50,2051,True,False,8536,3.96,3.89,4.4,3.6,13.39,17.94,-2.02,23.65,30.79,31.12,9.95,-0.95,-0.04,0.0,29.39,38.95,-1.35,17.85 +alexanderfalk/danbert-small-cased,83,52,512,True,False,30013,3.81,3.64,4.01,3.78,33.05,30.67,13.01,1.56,42.18,37.39,24.39,7.29,2.57,0.0,22.47,53.88,1.55,1.12 +NbAiLab/nb-llama-3.2-1B (few-shot),1236,128,131072,True,False,3424,3.82,3.74,4.32,3.4,9.2,32.94,1.59,23.1,10.6,22.63,19.76,2.8,0.17,3.99,4.49,73.13,2.5,22.14 +RuterNorway/Llama-2-7b-chat-norwegian (few-shot),-1,32,4096,False,False,10890,3.87,4.06,4.09,3.45,10.12,10.65,-0.66,26.08,21.04,18.71,12.22,-1.18,0.36,26.86,22.38,31.11,0.09,44.36 +PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,3.88,3.58,4.22,3.83,16.17,29.12,-0.47,34.8,27.47,23.82,22.22,-2.06,-0.77,2.48,14.09,23.71,1.74,32.0 +Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,3.89,3.82,4.3,3.54,17.38,10.72,1.32,34.58,29.52,31.27,11.49,0.29,-0.12,7.8,18.57,40.23,0.21,29.49 +dbmdz/bert-tiny-historic-multilingual-cased,5,32,512,True,False,78027,3.89,3.93,4.13,3.62,33.62,20.71,1.19,4.19,46.11,35.18,19.19,2.76,0.42,0.0,26.87,57.41,-1.06,5.54 +Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,3.91,3.88,4.33,3.53,19.01,8.88,0.66,32.78,34.46,33.41,6.31,-1.59,0.61,5.95,28.96,26.58,-1.88,34.59 +tiiuae/Falcon3-1B-Instruct (few-shot),1669,131,8192,True,False,9270,3.92,3.77,4.31,3.68,20.03,15.96,0.86,28.98,29.25,25.45,11.28,1.52,0.52,8.47,26.41,25.99,1.64,21.39 +allenai/OLMo-1B (few-shot),1177,50,2051,True,False,8536,3.96,3.89,4.4,3.59,13.39,17.94,-2.02,23.65,30.79,31.12,9.95,-0.95,-0.04,0.0,29.39,38.95,-1.35,17.85 RabotaRu/HRBert-mini,80,200,512,True,False,54951,4.05,4.07,4.31,3.76,22.21,20.33,0.9,2.73,31.87,32.47,15.07,1.26,0.49,0.0,24.61,52.31,1.32,2.86 -HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.18,4.12,4.44,3.98,12.68,3.61,1.79,28.12,26.6,23.7,6.21,-0.39,0.21,4.65,18.22,11.52,1.72,27.27 -fresh-xlm-roberta-base,278,250,512,True,False,2214,4.18,4.24,4.37,3.93,16.04,17.37,1.34,1.58,25.49,25.94,12.6,0.5,1.83,0.0,11.91,51.11,0.86,2.0 -AI-Sweden-Models/gpt-sw3-126m-instruct (few-shot),186,64,2048,True,False,7717,4.19,4.15,4.36,4.06,13.98,6.37,0.41,20.46,27.66,30.88,5.13,0.0,0.0,7.55,23.05,12.47,0.08,20.43 -PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.2,4.04,4.5,4.06,13.84,9.47,-0.36,22.1,26.59,26.78,7.91,0.28,0.04,0.65,22.09,14.15,-0.04,21.6 -NbAiLab/nb-gpt-j-6B-v2 (few-shot),6051,50,1024,False,False,2556,4.21,4.16,4.41,4.07,0.24,27.8,0.56,6.84,5.29,6.77,20.84,0.45,0.48,2.43,0.31,27.42,0.07,17.82 +fresh-xlm-roberta-base,278,250,512,True,False,2214,4.17,4.23,4.37,3.92,16.04,17.37,1.34,1.58,25.49,25.94,12.6,0.5,1.83,0.0,11.91,51.11,0.86,2.0 +AI-Sweden-Models/gpt-sw3-126m-instruct (few-shot),186,64,2048,True,False,7717,4.18,4.15,4.35,4.05,13.98,6.37,0.41,20.46,27.66,30.88,5.13,0.0,0.0,7.55,23.05,12.47,0.08,20.43 +HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.19,4.12,4.46,3.98,12.68,3.61,1.79,28.12,26.6,23.7,6.21,-0.39,0.21,4.65,18.22,11.52,1.72,27.27 +PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.19,4.04,4.49,4.05,13.84,9.47,-0.36,22.1,26.59,26.78,7.91,0.28,0.04,0.65,22.09,14.15,-0.04,21.6 +NbAiLab/nb-gpt-j-6B-v2 (few-shot),6051,50,1024,False,False,2556,4.21,4.15,4.41,4.06,0.24,27.8,0.56,6.84,5.29,6.77,20.84,0.45,0.48,2.43,0.31,27.42,0.07,17.82 fresh-electra-small,14,31,512,True,False,7840,4.21,4.28,4.42,3.94,12.87,18.61,0.3,0.0,18.38,12.76,15.29,0.17,0.37,0.0,10.54,55.54,-0.15,0.02 PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.23,4.15,4.52,4.01,10.59,13.31,0.52,16.61,25.02,21.59,8.05,-0.15,-0.97,0.37,16.28,17.38,-0.45,17.78 -HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.24,4.21,4.46,4.05,8.97,2.66,1.65,24.92,20.37,21.27,7.6,1.31,0.51,4.8,13.64,9.34,2.2,26.06 +HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.24,4.21,4.47,4.04,8.97,2.66,1.65,24.92,20.37,21.27,7.6,1.31,0.51,4.8,13.64,9.34,2.2,26.06 NbAiLab/nb-llama-3.2-3B (few-shot),3213,128,131072,True,False,1880,4.27,4.57,4.51,3.73,0.06,8.71,2.9,1.4,2.73,0.26,15.1,13.58,7.78,0.15,1.37,72.06,8.44,0.45 -NorwAI/NorwAI-Mistral-7B-pretrain (few-shot),7537,68,4096,True,False,3024,4.31,4.25,4.55,4.13,12.82,3.55,0.68,19.85,12.77,10.51,8.7,0.0,0.82,1.85,9.75,17.76,1.22,14.98 -HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.38,4.33,4.61,4.2,12.11,2.61,0.25,14.02,20.89,19.62,2.78,-0.98,0.93,0.15,17.09,7.41,0.47,11.73 -HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.39,4.3,4.46,4.41,13.72,3.79,-0.45,14.69,24.37,24.69,8.84,-1.2,-0.5,0.16,19.15,-3.03,0.06,14.18 -AI-Sweden-Models/gpt-sw3-126m (few-shot),186,64,2048,True,False,8958,4.41,4.25,4.65,4.33,3.43,9.18,-0.22,16.64,13.55,9.38,7.78,-1.46,-2.97,2.32,5.66,8.15,-0.81,16.4 -NbAiLab/nb-gpt-j-6B@sharded (few-shot),-1,50,1024,True,False,2630,4.41,4.44,4.54,4.26,0.36,11.0,-0.11,5.15,0.22,0.24,20.64,-0.99,-0.15,0.53,0.01,33.5,-0.02,4.79 +NorwAI/NorwAI-Mistral-7B-pretrain (few-shot),7537,68,4096,True,False,3024,4.31,4.25,4.55,4.12,12.82,3.55,0.68,19.85,12.77,10.51,8.7,0.0,0.82,1.85,9.75,17.76,1.22,14.98 +HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.38,4.33,4.61,4.19,12.11,2.61,0.25,14.02,20.89,19.62,2.78,-0.98,0.93,0.15,17.09,7.41,0.47,11.73 +HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.39,4.3,4.46,4.4,13.72,3.79,-0.45,14.69,24.37,24.69,8.84,-1.2,-0.5,0.16,19.15,-3.03,0.06,14.18 +AI-Sweden-Models/gpt-sw3-126m (few-shot),186,64,2048,True,False,8958,4.4,4.25,4.64,4.32,3.43,9.18,-0.22,16.64,13.55,9.38,7.78,-1.46,-2.97,2.32,5.66,8.15,-0.81,16.4 +NbAiLab/nb-gpt-j-6B@sharded (few-shot),-1,50,1024,True,False,2630,4.41,4.43,4.53,4.26,0.36,11.0,-0.11,5.15,0.22,0.24,20.64,-0.99,-0.15,0.53,0.01,33.5,-0.02,4.79 RJuro/kanelsnegl-v0.1 (few-shot),7242,32,512,True,False,5847,4.61,4.57,4.9,4.35,0.0,13.0,0.0,0.0,0.0,0.0,0.95,0.0,0.0,0.0,0.0,34.63,0.0,0.0 RJuro/kanelsnegl-v0.2 (few-shot),7242,32,512,True,False,1373,4.66,4.72,4.9,4.35,0.0,4.81,0.0,0.0,0.0,0.0,1.27,0.0,0.0,0.0,0.0,28.62,0.0,0.0 -ai-forever/mGPT (few-shot),-1,100,1024,True,False,11734,4.75,4.68,4.85,4.72,0.65,2.61,-0.73,1.99,0.08,0.0,4.76,0.67,-0.88,0.0,0.0,0.0,0.49,6.24 -NorGLM/NorGPT-369M (few-shot),-1,64,1024,True,False,19896,4.76,4.7,4.8,4.77,1.13,2.06,-0.36,0.32,3.14,3.0,3.41,0.22,0.27,0.0,1.47,5.5,-2.19,0.1 -peter-sk/gpt-neox-da (few-shot),1515,50,1024,True,False,6025,4.82,4.78,4.94,4.74,0.64,-0.52,-0.02,0.48,0.29,0.25,-1.43,-0.42,1.11,0.0,0.26,4.75,-0.6,0.06 +ai-forever/mGPT (few-shot),-1,100,1024,True,False,11734,4.74,4.68,4.84,4.71,0.65,2.61,-0.73,1.99,0.08,0.0,4.76,0.67,-0.88,0.0,0.0,0.0,0.49,6.24 +NorGLM/NorGPT-369M (few-shot),-1,64,1024,True,False,19896,4.75,4.7,4.8,4.76,1.13,2.06,-0.36,0.32,3.14,3.0,3.41,0.22,0.27,0.0,1.47,5.5,-2.19,0.1 +peter-sk/gpt-neox-da (few-shot),1515,50,1024,True,False,6025,4.82,4.78,4.93,4.74,0.64,-0.52,-0.02,0.48,0.29,0.25,-1.43,-0.42,1.11,0.0,0.26,4.75,-0.6,0.06 Sigurdur/qa-icebreaker (few-shot),110,32,1024,False,False,44889,4.84,4.8,4.9,4.82,0.0,0.07,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-0.1,0.0,0.0 -ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.85,4.8,4.92,4.82,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.84,4.79,4.92,4.82,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 Sigurdur/icebreaker (few-shot),110,32,1024,False,False,48619,4.86,4.8,4.92,4.87,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-3.6,0.0,0.0 diff --git a/mainland-scandinavian-nlu.md b/mainland-scandinavian-nlu.md index 689e226c..7038c1e2 100644 --- a/mainland-scandinavian-nlu.md +++ b/mainland-scandinavian-nlu.md @@ -3,7 +3,7 @@ layout: leaderboard title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 --- -
Last updated: 10/01/2025 12:31:07 CET
+
Last updated: 11/01/2025 11:04:17 CET
@@ -585,10 +585,10 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 512 True 6,732 ± 1,273 / 1,633 ± 523 - 1.60 + 1.59 1.60 1.72 - 1.47 + 1.46 69.50 ± 1.78 / 65.03 ± 1.31 55.07 ± 1.53 / 69.43 ± 1.47 57.67 ± 2.56 / 78.48 ± 1.32 @@ -938,6 +938,46 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 0.0.0 12.9.0 + + sentence-transformers/use-cmlm-multilingual + 471 + 501 + 512 + True + 30,231 ± 8,171 / 4,863 ± 1,598 + 1.70 + 1.86 + 1.68 + 1.57 + 69.17 ± 2.07 / 65.80 ± 1.78 + 48.03 ± 0.74 / 65.34 ± 0.40 + 55.31 ± 2.29 / 76.29 ± 1.57 + 42.34 ± 3.05 / 47.57 ± 3.10 + 90.08 ± 0.76 / 87.12 ± 1.08 + 86.04 ± 0.78 / 81.89 ± 0.98 + 56.35 ± 1.25 / 69.31 ± 1.02 + 59.38 ± 2.52 / 78.02 ± 1.71 + 46.54 ± 3.21 / 71.78 ± 2.00 + 55.05 ± 1.24 / 70.46 ± 1.22 + 80.05 ± 1.13 / 74.21 ± 1.26 + 75.09 ± 1.30 / 72.93 ± 2.37 + 61.83 ± 1.28 / 79.96 ± 0.82 + 45.69 ± 1.11 / 51.07 ± 1.04 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + KennethEnevoldsen/dfm-sentence-encoder-medium-3 178 @@ -978,46 +1018,6 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 0.0.0 0.0.0 - - sentence-transformers/use-cmlm-multilingual - 471 - 501 - 512 - True - 30,231 ± 8,171 / 4,863 ± 1,598 - 1.71 - 1.86 - 1.68 - 1.58 - 69.17 ± 2.07 / 65.80 ± 1.78 - 48.03 ± 0.74 / 65.34 ± 0.40 - 55.31 ± 2.29 / 76.29 ± 1.57 - 42.34 ± 3.05 / 47.57 ± 3.10 - 90.08 ± 0.76 / 87.12 ± 1.08 - 86.04 ± 0.78 / 81.89 ± 0.98 - 56.35 ± 1.25 / 69.31 ± 1.02 - 59.38 ± 2.52 / 78.02 ± 1.71 - 46.54 ± 3.21 / 71.78 ± 2.00 - 55.05 ± 1.24 / 70.46 ± 1.22 - 80.05 ± 1.13 / 74.21 ± 1.26 - 75.09 ± 1.30 / 72.93 ± 2.37 - 61.83 ± 1.28 / 79.96 ± 0.82 - 45.69 ± 1.11 / 51.07 ± 1.04 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - vesteinn/ScandiBERT-no-faroese 124 @@ -1065,8 +1065,8 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 512 True 14,050 ± 3,222 / 2,727 ± 886 - 1.74 - 1.81 + 1.73 + 1.80 1.67 1.73 70.36 ± 1.61 / 66.92 ± 1.70 @@ -1138,6 +1138,46 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 14.0.3 14.0.3 + + NbAiLab/nb-sbert-base + 178 + 120 + 512 + True + 17,757 ± 3,883 / 3,864 ± 1,237 + 1.76 + 1.82 + 1.72 + 1.75 + 70.12 ± 1.61 / 65.80 ± 1.43 + 47.83 ± 1.05 / 65.23 ± 0.75 + 63.25 ± 2.38 / 80.82 ± 1.42 + 36.51 ± 2.05 / 41.01 ± 2.35 + 90.96 ± 0.66 / 90.87 ± 0.65 + 87.34 ± 1.74 / 88.75 ± 1.38 + 60.57 ± 1.22 / 72.70 ± 0.75 + 72.11 ± 1.85 / 85.08 ± 1.24 + 70.20 ± 2.24 / 84.26 ± 1.48 + 29.94 ± 3.33 / 40.55 ± 4.21 + 80.26 ± 1.10 / 77.87 ± 1.03 + 71.05 ± 0.75 / 69.24 ± 2.39 + 62.49 ± 2.06 / 80.42 ± 1.54 + 33.80 ± 1.33 / 38.28 ± 1.45 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + gpt-4o-2024-05-13 (zero-shot, val) unknown @@ -1218,46 +1258,6 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 0.0.0 0.0.0 - - NbAiLab/nb-sbert-base - 178 - 120 - 512 - True - 17,757 ± 3,883 / 3,864 ± 1,237 - 1.77 - 1.82 - 1.72 - 1.76 - 70.12 ± 1.61 / 65.80 ± 1.43 - 47.83 ± 1.05 / 65.23 ± 0.75 - 63.25 ± 2.38 / 80.82 ± 1.42 - 36.51 ± 2.05 / 41.01 ± 2.35 - 90.96 ± 0.66 / 90.87 ± 0.65 - 87.34 ± 1.74 / 88.75 ± 1.38 - 60.57 ± 1.22 / 72.70 ± 0.75 - 72.11 ± 1.85 / 85.08 ± 1.24 - 70.20 ± 2.24 / 84.26 ± 1.48 - 29.94 ± 3.33 / 40.55 ± 4.21 - 80.26 ± 1.10 / 77.87 ± 1.03 - 71.05 ± 0.75 / 69.24 ± 2.39 - 62.49 ± 2.06 / 80.42 ± 1.54 - 33.80 ± 1.33 / 38.28 ± 1.45 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot) 28411 @@ -1338,6 +1338,46 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 0.0.0 0.0.0 + + Qwen/Qwen2.5-72B-Instruct (few-shot) + 72706 + 152 + 32768 + True + 1,219 ± 412 / 158 ± 53 + 1.80 + 1.53 + 2.30 + 1.58 + 63.81 ± 1.27 / 42.18 ± 2.59 + 54.50 ± 1.55 / 69.89 ± 1.02 + 57.19 ± 1.30 / 78.30 ± 0.79 + 55.77 ± 0.97 / 67.26 ± 0.69 + 72.21 ± 1.25 / 49.58 ± 2.05 + 70.24 ± 1.23 / 52.09 ± 2.38 + 39.85 ± 0.65 / 39.69 ± 0.52 + 63.14 ± 1.27 / 81.41 ± 0.62 + 43.24 ± 1.56 / 71.36 ± 0.86 + 43.41 ± 3.02 / 73.20 ± 2.14 + 62.12 ± 2.14 / 39.19 ± 2.64 + 79.89 ± 0.97 / 80.23 ± 0.85 + 61.71 ± 1.08 / 80.82 ± 0.54 + 54.99 ± 0.94 / 66.56 ± 0.46 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + pere/roberta-base-exp-8 278 @@ -1378,46 +1418,6 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 0.0.0 0.0.0 - - Qwen/Qwen2.5-72B-Instruct (few-shot) - 72706 - 152 - 32768 - True - 1,219 ± 412 / 158 ± 53 - 1.81 - 1.53 - 2.31 - 1.58 - 63.81 ± 1.27 / 42.18 ± 2.59 - 54.50 ± 1.55 / 69.89 ± 1.02 - 57.19 ± 1.30 / 78.30 ± 0.79 - 55.77 ± 0.97 / 67.26 ± 0.69 - 72.21 ± 1.25 / 49.58 ± 2.05 - 70.24 ± 1.23 / 52.09 ± 2.38 - 39.85 ± 0.65 / 39.69 ± 0.52 - 63.14 ± 1.27 / 81.41 ± 0.62 - 43.24 ± 1.56 / 71.36 ± 0.86 - 43.41 ± 3.02 / 73.20 ± 2.14 - 62.12 ± 2.14 / 39.19 ± 2.64 - 79.89 ± 0.97 / 80.23 ± 0.85 - 61.71 ± 1.08 / 80.82 ± 0.54 - 54.99 ± 0.94 / 66.56 ± 0.46 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - setu4993/LaBSE 471 @@ -1828,7 +1828,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 1.98 1.84 2.09 - 2.02 + 2.01 63.10 ± 2.12 / 55.10 ± 1.44 53.09 ± 3.85 / 68.18 ± 2.27 40.98 ± 4.46 / 69.10 ± 2.72 @@ -2219,56 +2219,16 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 12.7.0 - KBLab/megatron-bert-large-swedish-cased-165k - 370 - 64 - 512 - True - 7,138 ± 1,111 / 2,067 ± 660 - 2.13 - 2.44 - 2.56 - 1.38 - 58.50 ± 4.21 / 55.82 ± 3.50 - 41.02 ± 1.64 / 60.13 ± 1.52 - 27.10 ± 3.59 / 61.03 ± 2.41 - 39.99 ± 1.25 / 45.72 ± 1.27 - 85.99 ± 0.83 / 83.09 ± 0.94 - 79.47 ± 1.14 / 75.61 ± 1.34 - 39.53 ± 0.99 / 50.90 ± 2.17 - 27.39 ± 2.48 / 61.03 ± 2.27 - 23.56 ± 2.23 / 60.05 ± 1.05 - 39.01 ± 1.18 / 51.83 ± 1.58 - 81.05 ± 1.34 / 76.08 ± 1.45 - 78.00 ± 0.89 / 75.01 ± 2.18 - 76.79 ± 1.70 / 87.59 ± 1.06 - 45.71 ± 1.09 / 51.70 ± 0.89 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - - - gpt-4o-mini-2024-07-18 (zero-shot, val) - unknown - 200 - 8191 + gpt-4o-mini-2024-07-18 (zero-shot, val) + unknown + 200 + 8191 True 908 ± 303 / 96 ± 36 - 2.13 + 2.12 1.79 2.62 - 1.97 + 1.96 57.52 ± 2.21 / 35.79 ± 1.78 49.73 ± 3.09 / 65.95 ± 2.22 57.56 ± 2.67 / 77.16 ± 1.35 @@ -2298,6 +2258,46 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 14.0.1 14.0.1 + + KBLab/megatron-bert-large-swedish-cased-165k + 370 + 64 + 512 + True + 7,138 ± 1,111 / 2,067 ± 660 + 2.13 + 2.44 + 2.56 + 1.38 + 58.50 ± 4.21 / 55.82 ± 3.50 + 41.02 ± 1.64 / 60.13 ± 1.52 + 27.10 ± 3.59 / 61.03 ± 2.41 + 39.99 ± 1.25 / 45.72 ± 1.27 + 85.99 ± 0.83 / 83.09 ± 0.94 + 79.47 ± 1.14 / 75.61 ± 1.34 + 39.53 ± 0.99 / 50.90 ± 2.17 + 27.39 ± 2.48 / 61.03 ± 2.27 + 23.56 ± 2.23 / 60.05 ± 1.05 + 39.01 ± 1.18 / 51.83 ± 1.58 + 81.05 ± 1.34 / 76.08 ± 1.45 + 78.00 ± 0.89 / 75.01 ± 2.18 + 76.79 ± 1.70 / 87.59 ± 1.06 + 45.71 ± 1.09 / 51.70 ± 0.89 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + AI-Nordics/bert-large-swedish-cased 335 @@ -2347,7 +2347,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 784 ± 310 / 95 ± 28 2.14 2.01 - 2.29 + 2.28 2.13 59.96 ± 1.64 / 41.55 ± 2.90 56.91 ± 2.34 / 71.25 ± 1.60 @@ -2387,7 +2387,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 3,701 ± 876 / 771 ± 247 2.15 2.20 - 2.29 + 2.30 1.95 54.91 ± 1.53 / 36.29 ± 1.85 44.38 ± 2.83 / 57.19 ± 3.61 @@ -2426,7 +2426,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 False 3,583 ± 977 / 686 ± 231 2.16 - 2.03 + 2.02 2.53 1.93 49.17 ± 2.72 / 34.74 ± 2.27 @@ -2545,8 +2545,8 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 4096 True 3,780 ± 906 / 799 ± 261 - 2.18 - 2.15 + 2.17 + 2.14 2.38 2.00 58.03 ± 2.18 / 38.25 ± 2.31 @@ -2867,7 +2867,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 1,472 ± 376 / 284 ± 96 2.26 2.06 - 3.08 + 3.07 1.65 58.57 ± 1.03 / 41.07 ± 2.81 51.63 ± 1.00 / 67.43 ± 0.78 @@ -3107,7 +3107,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 1,979 ± 621 / 320 ± 105 2.28 2.13 - 2.60 + 2.59 2.12 52.22 ± 2.07 / 38.82 ± 1.90 50.66 ± 1.88 / 62.04 ± 2.83 @@ -3338,13 +3338,53 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 0.0.0 0.0.0 + + birgermoell/BeagleCatMunin-Flashback-Bellman (few-shot, val) + 7242 + 32 + 32768 + False + 2,890 ± 401 / 1,155 ± 348 + 2.30 + 2.10 + 2.73 + 2.08 + 50.40 ± 2.92 / 38.57 ± 2.82 + 52.30 ± 2.65 / 64.19 ± 2.60 + 21.30 ± 3.52 / 47.78 ± 4.14 + 58.17 ± 1.71 / 63.79 ± 1.42 + 53.96 ± 3.37 / 49.84 ± 3.30 + 63.45 ± 2.27 / 53.13 ± 3.43 + 52.70 ± 4.58 / 66.82 ± 3.41 + 14.87 ± 3.37 / 40.83 ± 1.91 + 2.48 ± 3.31 / 35.61 ± 1.83 + 41.43 ± 3.34 / 67.26 ± 2.73 + 52.96 ± 3.45 / 41.51 ± 4.30 + 76.99 ± 2.37 / 76.84 ± 2.99 + 14.27 ± 4.36 / 40.60 ± 3.04 + 59.92 ± 1.64 / 64.87 ± 1.47 + 9.3.1 + 9.3.1 + 9.3.1 + 12.5.2 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 12.5.2 + 9.3.1 + 9.3.1 + 9.3.1 + 12.5.2 + meta-llama/Llama-3.1-8B-Instruct (few-shot) 8030 128 131072 True - 1,005 ± 330 / 196 ± 74 + 1,473 ± 377 / 283 ± 96 2.30 2.03 2.80 @@ -3467,7 +3507,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 13,973 ± 3,205 / 2,725 ± 884 2.31 2.34 - 2.47 + 2.46 2.12 63.38 ± 2.39 / 59.20 ± 1.98 34.78 ± 1.49 / 55.59 ± 0.92 @@ -3538,46 +3578,6 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 0.0.0 0.0.0 - - birgermoell/BeagleCatMunin-Flashback-Bellman (few-shot, val) - 7242 - 32 - 32768 - False - 2,890 ± 401 / 1,155 ± 348 - 2.31 - 2.11 - 2.73 - 2.08 - 50.40 ± 2.92 / 38.57 ± 2.82 - 52.30 ± 2.65 / 64.19 ± 2.60 - 21.30 ± 3.52 / 47.78 ± 4.14 - 58.17 ± 1.71 / 63.79 ± 1.42 - 53.96 ± 3.37 / 49.84 ± 3.30 - 63.45 ± 2.27 / 53.13 ± 3.43 - 52.70 ± 4.58 / 66.82 ± 3.41 - 14.87 ± 3.37 / 40.83 ± 1.91 - 2.48 ± 3.31 / 35.61 ± 1.83 - 41.43 ± 3.34 / 67.26 ± 2.73 - 52.96 ± 3.45 / 41.51 ± 4.30 - 76.99 ± 2.37 / 76.84 ± 2.99 - 14.27 ± 4.36 / 40.60 ± 3.04 - 59.92 ± 1.64 / 64.87 ± 1.47 - 9.3.1 - 9.3.1 - 9.3.1 - 12.5.2 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 12.5.2 - 9.3.1 - 9.3.1 - 9.3.1 - 12.5.2 - timpal0l/BeagleCatMunin2 (few-shot, val) 7242 @@ -3738,46 +3738,6 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 0.0.0 0.0.0 - - meta-llama/Meta-Llama-3-8B (few-shot) - 8030 - 128 - 8192 - True - 1,335 ± 338 / 260 ± 88 - 2.34 - 2.26 - 2.72 - 2.05 - 49.46 ± 1.88 / 32.11 ± 2.41 - 51.16 ± 2.15 / 67.00 ± 1.51 - 23.01 ± 3.93 / 49.99 ± 4.63 - 49.75 ± 5.10 / 56.13 ± 4.89 - 61.48 ± 1.83 / 47.65 ± 2.94 - 61.58 ± 2.21 / 50.10 ± 2.68 - 32.94 ± 0.86 / 37.52 ± 0.43 - 21.20 ± 6.57 / 52.29 ± 7.43 - 19.65 ± 4.32 / 56.66 ± 4.40 - 53.35 ± 4.33 / 74.98 ± 3.70 - 59.92 ± 2.46 / 40.98 ± 4.90 - 80.91 ± 0.41 / 78.09 ± 1.22 - 26.39 ± 3.47 / 52.38 ± 4.49 - 47.69 ± 6.29 / 54.30 ± 6.65 - 14.0.3 - 14.1.2 - 14.1.2 - 14.0.3 - 12.6.1 - 12.6.1 - 14.1.2 - 12.6.1 - 12.6.1 - 12.6.1 - 14.0.4 - 14.1.2 - 14.1.2 - 14.0.4 - mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot) 341029 @@ -3785,10 +3745,10 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 4096 True 1,904 ± 475 / 361 ± 121 - 2.34 - 2.25 - 2.61 - 2.16 + 2.33 + 2.24 + 2.60 + 2.15 48.24 ± 9.94 / 31.90 ± 6.18 39.52 ± 7.04 / 57.63 ± 7.05 62.92 ± 7.59 / 79.60 ± 5.59 @@ -4144,21 +4104,21 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 128 8192 True - 1,007 ± 316 / 162 ± 45 + 1,483 ± 377 / 287 ± 97 2.36 2.22 - 2.70 + 2.71 2.16 57.74 ± 2.06 / 40.66 ± 2.58 48.43 ± 3.31 / 62.09 ± 3.62 27.12 ± 2.83 / 60.40 ± 2.70 46.76 ± 1.20 / 59.77 ± 0.51 - 74.47 ± 1.47 / 65.57 ± 2.39 - 72.93 ± 1.00 / 65.44 ± 2.55 - 34.44 ± 0.42 / 37.94 ± 0.39 - 27.77 ± 1.63 / 61.75 ± 1.77 - 20.35 ± 1.92 / 57.74 ± 2.28 - 42.90 ± 3.57 / 69.90 ± 3.17 + 66.56 ± 1.70 / 58.47 ± 2.40 + 68.29 ± 1.47 / 61.22 ± 2.25 + 34.47 ± 0.44 / 37.96 ± 0.40 + 28.22 ± 1.37 / 59.51 ± 1.44 + 18.21 ± 2.21 / 52.84 ± 2.30 + 47.34 ± 1.99 / 73.23 ± 1.29 69.67 ± 1.30 / 52.94 ± 4.01 59.93 ± 4.70 / 67.54 ± 3.04 27.63 ± 3.19 / 60.85 ± 3.29 @@ -4167,12 +4127,12 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 12.6.1 12.6.1 12.6.1 - 12.6.1 - 12.6.1 - 14.0.4 - 12.6.1 - 12.6.1 - 12.6.1 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 12.6.1 12.6.1 12.6.1 @@ -4306,7 +4266,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 False 1,483 ± 321 / 379 ± 158 2.37 - 2.22 + 2.21 2.77 2.13 34.00 ± 2.69 / 25.49 ± 2.06 @@ -4538,34 +4498,74 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 12.5.3 12.5.3 - - RJuro/munin-neuralbeagle-SkoleGPTOpenOrca-7b (few-shot, val) - 7242 - 32 - 32768 - False - 3,008 ± 429 / 991 ± 323 + + Geotrend/bert-base-da-cased + 104 + 23 + 512 + True + 15,432 ± 2,838 / 3,642 ± 1,189 2.41 - 2.31 - 2.85 - 2.06 - 50.83 ± 2.28 / 36.96 ± 2.58 - 43.41 ± 2.56 / 48.74 ± 2.83 - 19.72 ± 4.69 / 52.71 ± 5.26 - 57.87 ± 2.32 / 64.53 ± 1.73 - 53.68 ± 2.01 / 49.22 ± 2.67 - 61.92 ± 4.06 / 49.03 ± 3.97 - 47.78 ± 3.19 / 57.76 ± 2.55 - 0.91 ± 1.78 / 33.51 ± 0.85 - 1.24 ± 1.66 / 33.71 ± 0.94 - 47.76 ± 2.93 / 70.99 ± 2.39 - 59.36 ± 2.75 / 47.08 ± 4.17 - 72.04 ± 3.27 / 63.83 ± 2.07 - 22.38 ± 7.17 / 54.70 ± 5.49 - 57.96 ± 2.00 / 64.06 ± 1.76 - 9.3.2 - 9.3.2 - 9.3.2 + 2.51 + 2.56 + 2.17 + 62.76 ± 1.92 / 58.88 ± 1.74 + 32.06 ± 1.44 / 52.57 ± 1.80 + 30.95 ± 11.93 / 63.72 ± 6.84 + 37.79 ± 2.37 / 42.36 ± 2.56 + 87.52 ± 0.63 / 83.86 ± 0.68 + 82.66 ± 1.64 / 78.65 ± 2.01 + 32.73 ± 1.37 / 46.52 ± 1.86 + 36.41 ± 8.89 / 65.20 ± 6.41 + 30.37 ± 5.50 / 62.12 ± 5.66 + 37.71 ± 1.11 / 49.90 ± 1.47 + 74.13 ± 1.17 / 68.93 ± 1.36 + 62.18 ± 1.26 / 59.44 ± 2.35 + 36.93 ± 6.47 / 65.97 ± 6.05 + 37.59 ± 1.99 / 41.94 ± 2.23 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + + + RJuro/munin-neuralbeagle-SkoleGPTOpenOrca-7b (few-shot, val) + 7242 + 32 + 32768 + False + 3,008 ± 429 / 991 ± 323 + 2.41 + 2.31 + 2.85 + 2.06 + 50.83 ± 2.28 / 36.96 ± 2.58 + 43.41 ± 2.56 / 48.74 ± 2.83 + 19.72 ± 4.69 / 52.71 ± 5.26 + 57.87 ± 2.32 / 64.53 ± 1.73 + 53.68 ± 2.01 / 49.22 ± 2.67 + 61.92 ± 4.06 / 49.03 ± 3.97 + 47.78 ± 3.19 / 57.76 ± 2.55 + 0.91 ± 1.78 / 33.51 ± 0.85 + 1.24 ± 1.66 / 33.71 ± 0.94 + 47.76 ± 2.93 / 70.99 ± 2.39 + 59.36 ± 2.75 / 47.08 ± 4.17 + 72.04 ± 3.27 / 63.83 ± 2.07 + 22.38 ± 7.17 / 54.70 ± 5.49 + 57.96 ± 2.00 / 64.06 ± 1.76 + 9.3.2 + 9.3.2 + 9.3.2 12.5.2 9.3.2 9.3.2 @@ -4618,46 +4618,6 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 14.1.2 14.0.4 - - Geotrend/bert-base-da-cased - 104 - 23 - 512 - True - 15,432 ± 2,838 / 3,642 ± 1,189 - 2.42 - 2.51 - 2.57 - 2.17 - 62.76 ± 1.92 / 58.88 ± 1.74 - 32.06 ± 1.44 / 52.57 ± 1.80 - 30.95 ± 11.93 / 63.72 ± 6.84 - 37.79 ± 2.37 / 42.36 ± 2.56 - 87.52 ± 0.63 / 83.86 ± 0.68 - 82.66 ± 1.64 / 78.65 ± 2.01 - 32.73 ± 1.37 / 46.52 ± 1.86 - 36.41 ± 8.89 / 65.20 ± 6.41 - 30.37 ± 5.50 / 62.12 ± 5.66 - 37.71 ± 1.11 / 49.90 ± 1.47 - 74.13 ± 1.17 / 68.93 ± 1.36 - 62.18 ± 1.26 / 59.44 ± 2.35 - 36.93 ± 6.47 / 65.97 ± 6.05 - 37.59 ± 1.99 / 41.94 ± 2.23 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - KennethEnevoldsen/munin_mistral-7b (few-shot, val) 7242 @@ -4778,6 +4738,46 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 13.0.0 13.0.0 + + meta-llama/Meta-Llama-3-8B (few-shot) + 8030 + 128 + 8192 + True + 1,477 ± 376 / 285 ± 97 + 2.44 + 2.26 + 3.01 + 2.05 + 49.46 ± 1.88 / 32.11 ± 2.41 + 51.16 ± 2.15 / 67.00 ± 1.51 + 23.01 ± 3.93 / 49.99 ± 4.63 + 49.75 ± 5.10 / 56.13 ± 4.89 + 62.89 ± 2.01 / 50.34 ± 1.92 + 56.18 ± 3.26 / 47.30 ± 2.62 + 33.07 ± 0.73 / 37.60 ± 0.49 + 30.73 ± 3.19 / 64.06 ± 2.41 + 20.57 ± 3.62 / 54.22 ± 5.15 + 30.77 ± 4.33 / 54.66 ± 4.41 + 59.92 ± 2.46 / 40.98 ± 4.90 + 80.91 ± 0.41 / 78.09 ± 1.22 + 26.39 ± 3.47 / 52.38 ± 4.49 + 47.69 ± 6.29 / 54.30 ± 6.65 + 14.0.3 + 14.1.2 + 14.1.2 + 14.0.3 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.0.4 + 14.1.2 + 14.1.2 + 14.0.4 + senseable/WestLake-7B-v2 (few-shot) 7242 @@ -4945,9 +4945,9 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 512 True 34,735 ± 7,558 / 6,846 ± 2,312 - 2.46 + 2.45 2.53 - 2.51 + 2.50 2.33 69.78 ± 1.59 / 65.83 ± 2.08 46.78 ± 1.57 / 64.46 ± 1.17 @@ -4985,9 +4985,9 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 8192 True 1,378 ± 260 / 387 ± 119 - 2.47 + 2.46 2.46 - 2.94 + 2.92 2.00 19.59 ± 2.54 / 15.47 ± 2.19 46.55 ± 1.89 / 59.52 ± 3.56 @@ -5458,6 +5458,46 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 12.5.2 12.5.2 + + flax-community/nordic-roberta-wiki + 125 + 50 + 512 + True + 16,227 ± 2,650 / 4,252 ± 1,393 + 2.54 + 2.50 + 2.98 + 2.15 + 60.82 ± 2.03 / 57.64 ± 2.08 + 34.45 ± 0.78 / 55.56 ± 0.68 + 41.89 ± 9.80 / 70.04 ± 5.10 + 26.83 ± 1.26 / 31.55 ± 1.26 + 85.42 ± 0.61 / 82.31 ± 0.65 + 78.92 ± 1.42 / 74.86 ± 1.50 + 36.27 ± 1.57 / 50.95 ± 1.70 + 48.07 ± 5.64 / 72.00 ± 4.07 + 29.81 ± 3.52 / 64.03 ± 2.35 + 0.44 ± 0.41 / 1.08 ± 0.99 + 72.90 ± 1.37 / 66.93 ± 1.30 + 61.11 ± 1.28 / 58.97 ± 2.27 + 55.05 ± 1.64 / 76.76 ± 0.93 + 29.04 ± 1.16 / 33.60 ± 1.06 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + jhu-clsp/bernice 278 @@ -5467,7 +5507,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 5,567 ± 450 / 2,483 ± 798 2.54 2.53 - 2.88 + 2.87 2.22 61.98 ± 2.00 / 58.30 ± 2.12 47.20 ± 1.34 / 64.51 ± 1.21 @@ -5498,6 +5538,46 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 0.0.0 0.0.0 + + CohereForAI/c4ai-command-r-v01 (few-shot) + 34981 + 256 + 8192 + False + 1,919 ± 645 / 248 ± 83 + 2.55 + 2.29 + 3.03 + 2.34 + 53.44 ± 1.53 / 33.98 ± 1.94 + 49.17 ± 1.04 / 61.82 ± 1.64 + 20.55 ± 1.56 / 51.55 ± 2.37 + 51.70 ± 1.48 / 62.01 ± 1.21 + 61.54 ± 2.53 / 42.44 ± 2.12 + 60.94 ± 1.48 / 43.92 ± 2.09 + 35.73 ± 0.66 / 38.06 ± 0.54 + 21.33 ± 2.08 / 52.01 ± 3.05 + 13.20 ± 1.51 / 52.20 ± 2.53 + 32.36 ± 1.99 / 61.64 ± 1.20 + 47.15 ± 2.31 / 29.98 ± 2.07 + 80.24 ± 0.75 / 78.86 ± 1.28 + 11.35 ± 1.20 / 41.82 ± 1.41 + 49.93 ± 1.69 / 61.00 ± 1.14 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + clips/mfaq 278 @@ -5507,7 +5587,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 5,591 ± 187 / 3,349 ± 1,105 2.55 2.57 - 2.70 + 2.71 2.37 68.49 ± 2.09 / 64.72 ± 2.02 45.60 ± 1.76 / 63.53 ± 1.48 @@ -5539,89 +5619,9 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 0.0.0 - flax-community/nordic-roberta-wiki - 125 - 50 - 512 - True - 16,227 ± 2,650 / 4,252 ± 1,393 - 2.55 - 2.51 - 2.98 - 2.16 - 60.82 ± 2.03 / 57.64 ± 2.08 - 34.45 ± 0.78 / 55.56 ± 0.68 - 41.89 ± 9.80 / 70.04 ± 5.10 - 26.83 ± 1.26 / 31.55 ± 1.26 - 85.42 ± 0.61 / 82.31 ± 0.65 - 78.92 ± 1.42 / 74.86 ± 1.50 - 36.27 ± 1.57 / 50.95 ± 1.70 - 48.07 ± 5.64 / 72.00 ± 4.07 - 29.81 ± 3.52 / 64.03 ± 2.35 - 0.44 ± 0.41 / 1.08 ± 0.99 - 72.90 ± 1.37 / 66.93 ± 1.30 - 61.11 ± 1.28 / 58.97 ± 2.27 - 55.05 ± 1.64 / 76.76 ± 0.93 - 29.04 ± 1.16 / 33.60 ± 1.06 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - - - CohereForAI/c4ai-command-r-v01 (few-shot) - 34981 - 256 - 8192 - False - 1,919 ± 645 / 248 ± 83 - 2.56 - 2.29 - 3.04 - 2.34 - 53.44 ± 1.53 / 33.98 ± 1.94 - 49.17 ± 1.04 / 61.82 ± 1.64 - 20.55 ± 1.56 / 51.55 ± 2.37 - 51.70 ± 1.48 / 62.01 ± 1.21 - 61.54 ± 2.53 / 42.44 ± 2.12 - 60.94 ± 1.48 / 43.92 ± 2.09 - 35.73 ± 0.66 / 38.06 ± 0.54 - 21.33 ± 2.08 / 52.01 ± 3.05 - 13.20 ± 1.51 / 52.20 ± 2.53 - 32.36 ± 1.99 / 61.64 ± 1.20 - 47.15 ± 2.31 / 29.98 ± 2.07 - 80.24 ± 0.75 / 78.86 ± 1.28 - 11.35 ± 1.20 / 41.82 ± 1.41 - 49.93 ± 1.69 / 61.00 ± 1.14 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - - - KBLab/bert-base-swedish-cased-new - 135 - 64 + KBLab/bert-base-swedish-cased-new + 135 + 64 512 True 15,933 ± 2,541 / 4,289 ± 1,376 @@ -5738,6 +5738,46 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 0.0.0 0.0.0 + + RuterNorway/Llama-2-13b-chat-norwegian (few-shot) + unknown + 32 + 4096 + False + 3,254 ± 1,068 / 484 ± 173 + 2.58 + 2.48 + 3.00 + 2.27 + 43.17 ± 2.78 / 31.37 ± 2.95 + 43.40 ± 2.20 / 57.24 ± 3.52 + 11.08 ± 2.98 / 43.40 ± 4.66 + 56.81 ± 0.70 / 63.10 ± 0.35 + 58.61 ± 1.58 / 47.74 ± 2.83 + 60.40 ± 1.25 / 47.53 ± 2.68 + 41.36 ± 3.50 / 58.47 ± 3.79 + 6.52 ± 2.11 / 38.10 ± 2.56 + 3.95 ± 2.52 / 42.37 ± 4.20 + 38.93 ± 2.43 / 65.76 ± 3.07 + 50.85 ± 2.44 / 39.65 ± 3.83 + 74.17 ± 2.12 / 76.62 ± 1.83 + 7.51 ± 1.94 / 37.81 ± 1.76 + 57.32 ± 0.63 / 63.28 ± 0.71 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + bineric/NorskGPT-Llama-13B-v0.1 (few-shot) unknown @@ -5778,46 +5818,6 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 12.10.4 12.10.4 - - RuterNorway/Llama-2-13b-chat-norwegian (few-shot) - unknown - 32 - 4096 - False - 3,254 ± 1,068 / 484 ± 173 - 2.59 - 2.49 - 3.00 - 2.27 - 43.17 ± 2.78 / 31.37 ± 2.95 - 43.40 ± 2.20 / 57.24 ± 3.52 - 11.08 ± 2.98 / 43.40 ± 4.66 - 56.81 ± 0.70 / 63.10 ± 0.35 - 58.61 ± 1.58 / 47.74 ± 2.83 - 60.40 ± 1.25 / 47.53 ± 2.68 - 41.36 ± 3.50 / 58.47 ± 3.79 - 6.52 ± 2.11 / 38.10 ± 2.56 - 3.95 ± 2.52 / 42.37 ± 4.20 - 38.93 ± 2.43 / 65.76 ± 3.07 - 50.85 ± 2.44 / 39.65 ± 3.83 - 74.17 ± 2.12 / 76.62 ± 1.83 - 7.51 ± 1.94 / 37.81 ± 1.76 - 57.32 ± 0.63 / 63.28 ± 0.71 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - sentence-transformers/paraphrase-multilingual-mpnet-base-v2 278 @@ -5948,7 +5948,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 2.62 2.40 3.03 - 2.44 + 2.43 44.89 ± 2.46 / 29.13 ± 1.92 48.09 ± 1.00 / 65.40 ± 0.75 19.06 ± 2.34 / 58.77 ± 1.37 @@ -5988,7 +5988,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 2.63 2.69 2.84 - 2.36 + 2.35 58.12 ± 1.30 / 54.97 ± 1.45 32.53 ± 1.39 / 54.09 ± 1.00 35.53 ± 2.54 / 66.86 ± 1.87 @@ -6427,7 +6427,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 634 ± 179 / 110 ± 35 2.67 2.61 - 3.02 + 3.01 2.39 37.93 ± 2.71 / 23.54 ± 1.99 44.49 ± 2.56 / 60.64 ± 3.00 @@ -6626,7 +6626,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 True 5,617 ± 995 / 1,623 ± 540 2.69 - 2.68 + 2.67 3.01 2.38 45.90 ± 2.53 / 33.00 ± 1.93 @@ -6778,6 +6778,46 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 12.3.2 12.4.0 + + LumiOpen/Viking-33B@1000B (few-shot) + 33119 + 131 + 4099 + True + 2,080 ± 700 / 331 ± 117 + 2.73 + 2.64 + 3.19 + 2.37 + 34.22 ± 2.47 / 22.52 ± 1.93 + 45.05 ± 2.49 / 62.23 ± 1.84 + 9.40 ± 2.63 / 44.83 ± 4.69 + 54.92 ± 1.00 / 60.33 ± 0.76 + 40.40 ± 2.29 / 30.41 ± 2.07 + 44.45 ± 3.61 / 34.06 ± 3.27 + 40.79 ± 1.70 / 57.84 ± 2.77 + 5.91 ± 2.51 / 47.81 ± 3.76 + 2.98 ± 2.86 / 45.49 ± 4.59 + 37.75 ± 3.23 / 59.72 ± 3.03 + 42.35 ± 1.51 / 28.31 ± 3.87 + 77.68 ± 1.11 / 78.86 ± 0.93 + 8.08 ± 1.69 / 50.52 ± 2.25 + 54.57 ± 1.25 / 60.34 ± 1.10 + 12.9.0 + 12.9.0 + 12.9.0 + 12.9.0 + 12.9.0 + 12.9.0 + 12.9.0 + 12.9.0 + 12.9.0 + 12.9.0 + 12.9.0 + 12.9.0 + 12.9.0 + 12.9.0 + TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot) 7800 @@ -6818,6 +6858,46 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 13.0.0 13.0.0 + + meta-llama/Llama-2-7b-hf (few-shot) + 6738 + 32 + 4096 + True + 930 ± 310 / 128 ± 43 + 2.73 + 2.74 + 3.18 + 2.28 + 31.77 ± 3.29 / 22.31 ± 2.29 + 43.91 ± 1.94 / 61.54 ± 2.33 + 0.31 ± 0.61 / 33.43 ± 0.23 + 58.44 ± 0.83 / 63.54 ± 0.66 + 42.13 ± 3.82 / 37.17 ± 3.44 + 43.80 ± 2.85 / 37.48 ± 4.00 + 41.74 ± 2.25 / 57.91 ± 2.82 + 0.00 ± 0.00 / 33.41 ± 0.30 + 0.02 ± 0.04 / 33.88 ± 0.35 + 44.19 ± 4.13 / 66.18 ± 4.05 + 44.11 ± 4.26 / 31.64 ± 4.48 + 79.05 ± 1.08 / 75.52 ± 2.66 + 7.34 ± 3.19 / 43.83 ± 5.31 + 57.49 ± 0.95 / 63.16 ± 0.77 + 9.2.0 + 9.2.0 + 9.2.0 + 12.5.1 + 9.2.0 + 9.2.0 + 9.2.0 + 9.2.0 + 9.2.0 + 12.5.1 + 9.2.0 + 9.2.0 + 9.2.0 + 12.5.1 + meta-llama/Llama-3.2-3B-Instruct (few-shot) 3213 @@ -6858,46 +6938,6 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 13.0.0 13.0.0 - - LumiOpen/Viking-33B@1000B (few-shot) - 33119 - 131 - 4099 - True - 2,080 ± 700 / 331 ± 117 - 2.74 - 2.65 - 3.19 - 2.37 - 34.22 ± 2.47 / 22.52 ± 1.93 - 45.05 ± 2.49 / 62.23 ± 1.84 - 9.40 ± 2.63 / 44.83 ± 4.69 - 54.92 ± 1.00 / 60.33 ± 0.76 - 40.40 ± 2.29 / 30.41 ± 2.07 - 44.45 ± 3.61 / 34.06 ± 3.27 - 40.79 ± 1.70 / 57.84 ± 2.77 - 5.91 ± 2.51 / 47.81 ± 3.76 - 2.98 ± 2.86 / 45.49 ± 4.59 - 37.75 ± 3.23 / 59.72 ± 3.03 - 42.35 ± 1.51 / 28.31 ± 3.87 - 77.68 ± 1.11 / 78.86 ± 0.93 - 8.08 ± 1.69 / 50.52 ± 2.25 - 54.57 ± 1.25 / 60.34 ± 1.10 - 12.9.0 - 12.9.0 - 12.9.0 - 12.9.0 - 12.9.0 - 12.9.0 - 12.9.0 - 12.9.0 - 12.9.0 - 12.9.0 - 12.9.0 - 12.9.0 - 12.9.0 - 12.9.0 - Twitter/twhin-bert-large 561 @@ -6938,46 +6978,6 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 0.0.0 0.0.0 - - meta-llama/Llama-2-7b-hf (few-shot) - 6738 - 32 - 4096 - True - 930 ± 310 / 128 ± 43 - 2.74 - 2.74 - 3.18 - 2.29 - 31.77 ± 3.29 / 22.31 ± 2.29 - 43.91 ± 1.94 / 61.54 ± 2.33 - 0.31 ± 0.61 / 33.43 ± 0.23 - 58.44 ± 0.83 / 63.54 ± 0.66 - 42.13 ± 3.82 / 37.17 ± 3.44 - 43.80 ± 2.85 / 37.48 ± 4.00 - 41.74 ± 2.25 / 57.91 ± 2.82 - 0.00 ± 0.00 / 33.41 ± 0.30 - 0.02 ± 0.04 / 33.88 ± 0.35 - 44.19 ± 4.13 / 66.18 ± 4.05 - 44.11 ± 4.26 / 31.64 ± 4.48 - 79.05 ± 1.08 / 75.52 ± 2.66 - 7.34 ± 3.19 / 43.83 ± 5.31 - 57.49 ± 0.95 / 63.16 ± 0.77 - 9.2.0 - 9.2.0 - 9.2.0 - 12.5.1 - 9.2.0 - 9.2.0 - 9.2.0 - 9.2.0 - 9.2.0 - 12.5.1 - 9.2.0 - 9.2.0 - 9.2.0 - 12.5.1 - sarnikowski/convbert-medium-small-da-cased 24 @@ -6987,7 +6987,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 13,821 ± 2,209 / 3,547 ± 1,184 2.75 2.28 - 3.11 + 3.10 2.86 64.28 ± 1.74 / 59.29 ± 1.54 36.85 ± 3.28 / 56.27 ± 3.98 @@ -7146,7 +7146,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 True 29,698 ± 4,287 / 8,677 ± 2,776 2.78 - 3.02 + 3.01 3.36 1.97 57.84 ± 1.47 / 54.75 ± 1.23 @@ -7298,6 +7298,46 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 0.0.0 0.0.0 + + jannikskytt/MeDa-Bert + 111 + 32 + 511 + True + 16,114 ± 2,429 / 4,566 ± 1,482 + 2.80 + 2.30 + 3.01 + 3.10 + 64.64 ± 1.72 / 59.54 ± 1.83 + 44.62 ± 1.38 / 62.33 ± 1.20 + 47.47 ± 8.03 / 70.55 ± 4.26 + 23.14 ± 1.59 / 29.91 ± 1.40 + 71.69 ± 0.92 / 68.09 ± 0.91 + 60.00 ± 1.99 / 56.64 ± 1.98 + 38.94 ± 2.59 / 53.58 ± 3.33 + 30.32 ± 4.68 / 62.42 ± 3.11 + 7.99 ± 3.34 / 53.24 ± 1.64 + 24.02 ± 1.35 / 37.28 ± 1.24 + 48.32 ± 1.62 / 45.04 ± 1.50 + 53.98 ± 2.05 / 52.94 ± 1.88 + 3.33 ± 2.12 / 51.06 ± 1.15 + 23.15 ± 2.61 / 29.17 ± 2.19 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + meta-llama/Llama-2-7b-chat-hf (few-shot) 6738 @@ -7378,46 +7418,6 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 12.9.0 12.9.0 - - jannikskytt/MeDa-Bert - 111 - 32 - 511 - True - 16,114 ± 2,429 / 4,566 ± 1,482 - 2.81 - 2.30 - 3.01 - 3.11 - 64.64 ± 1.72 / 59.54 ± 1.83 - 44.62 ± 1.38 / 62.33 ± 1.20 - 47.47 ± 8.03 / 70.55 ± 4.26 - 23.14 ± 1.59 / 29.91 ± 1.40 - 71.69 ± 0.92 / 68.09 ± 0.91 - 60.00 ± 1.99 / 56.64 ± 1.98 - 38.94 ± 2.59 / 53.58 ± 3.33 - 30.32 ± 4.68 / 62.42 ± 3.11 - 7.99 ± 3.34 / 53.24 ± 1.64 - 24.02 ± 1.35 / 37.28 ± 1.24 - 48.32 ± 1.62 / 45.04 ± 1.50 - 53.98 ± 2.05 / 52.94 ± 1.88 - 3.33 ± 2.12 / 51.06 ± 1.15 - 23.15 ± 2.61 / 29.17 ± 2.19 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - mideind/IceBERT-xlmr-ic3 278 @@ -7504,7 +7504,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 32 4096 True - 3,194 ± 687 / 650 ± 216 + 8,681 ± 1,650 / 2,177 ± 717 2.82 2.73 3.19 @@ -7618,6 +7618,46 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 12.3.2 12.4.0 + + DDSC/roberta-base-danish + 125 + 50 + 512 + True + 15,004 ± 2,964 / 3,290 ± 1,092 + 2.86 + 2.59 + 3.25 + 2.75 + 63.84 ± 1.73 / 59.85 ± 1.44 + 43.90 ± 1.50 / 62.31 ± 0.96 + 17.16 ± 13.94 / 56.47 ± 7.34 + 26.94 ± 1.16 / 31.50 ± 1.03 + 76.14 ± 2.58 / 72.24 ± 2.54 + 72.88 ± 1.50 / 68.61 ± 1.62 + 32.29 ± 9.23 / 49.08 ± 8.36 + 0.45 ± 1.61 / 49.14 ± 1.42 + -0.08 ± 1.79 / 45.89 ± 3.49 + 23.91 ± 2.24 / 36.47 ± 2.77 + 65.95 ± 1.70 / 60.53 ± 1.38 + 64.02 ± 2.78 / 62.27 ± 4.19 + 0.80 ± 0.78 / 47.24 ± 3.43 + 28.46 ± 0.90 / 33.13 ± 0.88 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + Maltehb/danish-bert-botxo 111 @@ -7738,46 +7778,6 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 9.3.1 9.3.1 - - DDSC/roberta-base-danish - 125 - 50 - 512 - True - 15,004 ± 2,964 / 3,290 ± 1,092 - 2.87 - 2.59 - 3.25 - 2.76 - 63.84 ± 1.73 / 59.85 ± 1.44 - 43.90 ± 1.50 / 62.31 ± 0.96 - 17.16 ± 13.94 / 56.47 ± 7.34 - 26.94 ± 1.16 / 31.50 ± 1.03 - 76.14 ± 2.58 / 72.24 ± 2.54 - 72.88 ± 1.50 / 68.61 ± 1.62 - 32.29 ± 9.23 / 49.08 ± 8.36 - 0.45 ± 1.61 / 49.14 ± 1.42 - -0.08 ± 1.79 / 45.89 ± 3.49 - 23.91 ± 2.24 / 36.47 ± 2.77 - 65.95 ± 1.70 / 60.53 ± 1.38 - 64.02 ± 2.78 / 62.27 ± 4.19 - 0.80 ± 0.78 / 47.24 ± 3.43 - 28.46 ± 0.90 / 33.13 ± 0.88 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - NbAiLab/nb-llama-3.1-70B (few-shot) 70554 @@ -7827,7 +7827,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 4,347 ± 893 / 1,135 ± 365 2.87 2.69 - 3.28 + 3.27 2.64 35.96 ± 2.61 / 28.58 ± 2.58 42.04 ± 1.42 / 60.76 ± 1.41 @@ -7858,6 +7858,46 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 12.1.0 12.5.2 + + google/gemma-7b-it (few-shot) + 8538 + 256 + 8192 + False + 1,792 ± 249 / 668 ± 203 + 2.87 + 2.85 + 2.91 + 2.86 + 43.83 ± 1.93 / 34.03 ± 1.59 + 29.21 ± 1.92 / 52.86 ± 1.54 + 12.96 ± 1.67 / 55.83 ± 0.88 + 49.76 ± 0.59 / 56.52 ± 0.50 + 59.77 ± 2.77 / 56.12 ± 2.72 + 60.98 ± 2.01 / 57.99 ± 1.31 + 28.14 ± 1.90 / 49.76 ± 1.59 + 14.01 ± 2.15 / 56.43 ± 1.08 + 10.15 ± 1.06 / 54.56 ± 0.71 + 51.08 ± 2.83 / 74.34 ± 1.55 + 59.26 ± 2.00 / 52.73 ± 2.71 + 28.63 ± 1.24 / 50.95 ± 0.75 + 11.43 ± 1.88 / 53.31 ± 1.74 + 46.67 ± 1.97 / 53.24 ± 1.72 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.10.0 + 12.10.0 + 12.10.0 + 12.10.0 + 12.10.0 + 12.10.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + ltg/norbert3-xs 15 @@ -7906,7 +7946,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 True 26,091 ± 5,835 / 5,209 ± 1,690 2.88 - 3.00 + 2.99 3.16 2.49 56.36 ± 1.95 / 53.98 ± 1.92 @@ -7946,7 +7986,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 True 3,371 ± 876 / 561 ± 184 2.88 - 2.97 + 2.96 3.28 2.39 33.80 ± 2.66 / 25.32 ± 3.06 @@ -8018,46 +8058,6 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 0.0.0 0.0.0 - - google/gemma-7b-it (few-shot) - 8538 - 256 - 8192 - False - 1,792 ± 249 / 668 ± 203 - 2.88 - 2.85 - 2.91 - 2.87 - 43.83 ± 1.93 / 34.03 ± 1.59 - 29.21 ± 1.92 / 52.86 ± 1.54 - 12.96 ± 1.67 / 55.83 ± 0.88 - 49.76 ± 0.59 / 56.52 ± 0.50 - 59.77 ± 2.77 / 56.12 ± 2.72 - 60.98 ± 2.01 / 57.99 ± 1.31 - 28.14 ± 1.90 / 49.76 ± 1.59 - 14.01 ± 2.15 / 56.43 ± 1.08 - 10.15 ± 1.06 / 54.56 ± 0.71 - 51.08 ± 2.83 / 74.34 ± 1.55 - 59.26 ± 2.00 / 52.73 ± 2.71 - 28.63 ± 1.24 / 50.95 ± 0.75 - 11.43 ± 1.88 / 53.31 ± 1.74 - 46.67 ± 1.97 / 53.24 ± 1.72 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.10.0 - 12.10.0 - 12.10.0 - 12.10.0 - 12.10.0 - 12.10.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - NorwAI/NorwAI-Llama2-7B (few-shot) 7033 @@ -8188,7 +8188,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 2.91 2.52 3.28 - 2.93 + 2.92 60.63 ± 1.32 / 56.90 ± 1.49 24.38 ± 1.74 / 40.85 ± 3.07 68.58 ± 1.38 / 84.12 ± 0.69 @@ -8227,8 +8227,8 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 14,273 ± 2,312 / 3,555 ± 1,187 2.93 2.55 - 3.24 - 3.01 + 3.23 + 3.00 60.59 ± 1.84 / 57.31 ± 1.47 29.52 ± 2.89 / 47.81 ± 4.54 57.10 ± 2.02 / 78.14 ± 1.10 @@ -8298,6 +8298,46 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 12.10.5 12.10.5 + + ibm-granite/granite-3.0-2b-base (few-shot) + 2534 + 49 + 4097 + True + 10,187 ± 2,363 / 2,204 ± 737 + 2.97 + 3.01 + 3.31 + 2.60 + 32.34 ± 3.77 / 24.48 ± 3.17 + 29.50 ± 3.63 / 42.61 ± 4.86 + 3.89 ± 1.49 / 37.29 ± 3.65 + 53.67 ± 0.84 / 59.15 ± 0.69 + 43.00 ± 2.81 / 35.39 ± 2.28 + 45.08 ± 0.83 / 38.16 ± 1.91 + 35.36 ± 2.31 / 54.88 ± 2.08 + 2.79 ± 1.92 / 41.90 ± 4.62 + 1.95 ± 2.02 / 38.91 ± 3.27 + 37.33 ± 3.11 / 59.74 ± 2.74 + 36.54 ± 2.70 / 28.79 ± 3.85 + 68.85 ± 5.19 / 70.02 ± 3.95 + 2.60 ± 2.58 / 40.21 ± 4.08 + 54.58 ± 0.89 / 59.78 ± 0.77 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 01-ai/Yi-1.5-6B (few-shot) 6061 @@ -8306,7 +8346,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 True 2,867 ± 550 / 793 ± 253 2.98 - 3.21 + 3.20 3.29 2.45 35.21 ± 2.52 / 23.65 ± 2.52 @@ -8346,7 +8386,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 True 501 ± 50 / 238 ± 69 2.98 - 2.91 + 2.90 3.42 2.62 25.35 ± 3.59 / 17.77 ± 2.25 @@ -8378,46 +8418,6 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 12.10.0 12.10.0 - - ibm-granite/granite-3.0-2b-base (few-shot) - 2534 - 49 - 4097 - True - 10,187 ± 2,363 / 2,204 ± 737 - 2.98 - 3.02 - 3.31 - 2.60 - 32.34 ± 3.77 / 24.48 ± 3.17 - 29.50 ± 3.63 / 42.61 ± 4.86 - 3.89 ± 1.49 / 37.29 ± 3.65 - 53.67 ± 0.84 / 59.15 ± 0.69 - 43.00 ± 2.81 / 35.39 ± 2.28 - 45.08 ± 0.83 / 38.16 ± 1.91 - 35.36 ± 2.31 / 54.88 ± 2.08 - 2.79 ± 1.92 / 41.90 ± 4.62 - 1.95 ± 2.02 / 38.91 ± 3.27 - 37.33 ± 3.11 / 59.74 ± 2.74 - 36.54 ± 2.70 / 28.79 ± 3.85 - 68.85 ± 5.19 / 70.02 ± 3.95 - 2.60 ± 2.58 / 40.21 ± 4.08 - 54.58 ± 0.89 / 59.78 ± 0.77 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - ibm-granite/granite-3.0-3b-a800m-instruct (few-shot) 3374 @@ -8505,10 +8505,10 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 512 True 20,047 ± 4,407 / 3,844 ± 1,259 - 3.04 + 3.03 3.28 - 3.39 - 2.45 + 3.38 + 2.44 47.61 ± 1.71 / 45.91 ± 1.91 24.17 ± 1.92 / 43.75 ± 2.75 8.14 ± 3.76 / 51.78 ± 1.81 @@ -8748,7 +8748,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 3.07 2.65 3.35 - 3.22 + 3.21 63.31 ± 1.75 / 58.18 ± 1.78 32.72 ± 2.91 / 49.84 ± 4.90 67.74 ± 1.33 / 83.32 ± 0.71 @@ -8778,46 +8778,6 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 0.0.0 0.0.0 - - nvidia/mistral-nemo-minitron-8b-instruct (few-shot) - 8414 - 131 - 8192 - True - 3,161 ± 676 / 1,247 ± 481 - 3.07 - 2.96 - 3.37 - 2.88 - 49.01 ± 1.94 / 29.13 ± 2.09 - 47.95 ± 1.37 / 64.82 ± 0.89 - 32.89 ± 1.40 / 66.18 ± 0.64 - 0.00 ± 0.00 / 17.81 ± 0.42 - 63.70 ± 0.98 / 39.82 ± 2.02 - 62.53 ± 1.64 / 40.20 ± 2.22 - 34.35 ± 0.92 / 37.57 ± 0.55 - 31.53 ± 1.91 / 64.14 ± 1.52 - 22.71 ± 2.23 / 57.77 ± 2.48 - 0.06 ± 0.04 / 26.09 ± 1.72 - 48.51 ± 1.91 / 24.61 ± 1.44 - 78.68 ± 0.89 / 78.07 ± 0.97 - 29.18 ± 2.29 / 63.69 ± 1.55 - 0.00 ± 0.00 / 16.78 ± 0.35 - 14.1.1 - 14.1.1 - 14.1.1 - 14.1.1 - 14.1.1 - 14.1.1 - 14.1.1 - 14.1.1 - 14.1.1 - 14.1.1 - 14.1.1 - 14.1.1 - 14.1.1 - 14.1.1 - ibm-granite/granite-3b-code-instruct-2k (few-shot) 3483 @@ -8825,10 +8785,10 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 2048 True 9,059 ± 1,947 / 2,201 ± 728 - 3.08 + 3.07 3.03 3.54 - 2.66 + 2.65 37.21 ± 2.75 / 27.74 ± 2.59 31.54 ± 2.39 / 50.61 ± 2.88 6.30 ± 1.61 / 49.09 ± 2.54 @@ -8858,6 +8818,46 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 13.0.0 13.0.0 + + nvidia/mistral-nemo-minitron-8b-instruct (few-shot) + 8414 + 131 + 8192 + True + 3,161 ± 676 / 1,247 ± 481 + 3.07 + 2.96 + 3.37 + 2.88 + 49.01 ± 1.94 / 29.13 ± 2.09 + 47.95 ± 1.37 / 64.82 ± 0.89 + 32.89 ± 1.40 / 66.18 ± 0.64 + 0.00 ± 0.00 / 17.81 ± 0.42 + 63.70 ± 0.98 / 39.82 ± 2.02 + 62.53 ± 1.64 / 40.20 ± 2.22 + 34.35 ± 0.92 / 37.57 ± 0.55 + 31.53 ± 1.91 / 64.14 ± 1.52 + 22.71 ± 2.23 / 57.77 ± 2.48 + 0.06 ± 0.04 / 26.09 ± 1.72 + 48.51 ± 1.91 / 24.61 ± 1.44 + 78.68 ± 0.89 / 78.07 ± 0.97 + 29.18 ± 2.29 / 63.69 ± 1.55 + 0.00 ± 0.00 / 16.78 ± 0.35 + 14.1.1 + 14.1.1 + 14.1.1 + 14.1.1 + 14.1.1 + 14.1.1 + 14.1.1 + 14.1.1 + 14.1.1 + 14.1.1 + 14.1.1 + 14.1.1 + 14.1.1 + 14.1.1 + microsoft/Phi-3-mini-128k-instruct (few-shot) 3821 @@ -8866,9 +8866,9 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 True 7,312 ± 1,668 / 1,609 ± 525 3.08 - 3.18 + 3.17 3.25 - 2.82 + 2.81 4.51 ± 2.12 / 3.71 ± 1.76 40.85 ± 1.26 / 59.79 ± 1.32 5.43 ± 1.74 / 46.21 ± 4.13 @@ -8948,7 +8948,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 3.09 3.25 3.43 - 2.60 + 2.59 49.88 ± 2.14 / 46.74 ± 1.94 27.93 ± 0.66 / 50.86 ± 0.42 5.42 ± 2.85 / 48.29 ± 3.93 @@ -8986,7 +8986,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 True 2,732 ± 868 / 662 ± 238 3.09 - 2.91 + 2.90 3.85 2.51 38.62 ± 3.40 / 27.71 ± 3.01 @@ -9066,7 +9066,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 True 3,520 ± 736 / 823 ± 273 3.14 - 2.98 + 2.97 3.76 2.69 28.72 ± 2.61 / 20.53 ± 2.46 @@ -9107,7 +9107,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 15,009 ± 4,072 / 2,702 ± 878 3.14 3.08 - 3.58 + 3.57 2.76 19.61 ± 2.68 / 17.44 ± 2.64 37.92 ± 1.74 / 46.23 ± 1.91 @@ -9188,7 +9188,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 3.17 2.64 3.55 - 3.32 + 3.31 60.78 ± 1.12 / 55.74 ± 1.15 39.65 ± 1.31 / 59.23 ± 0.94 37.67 ± 7.20 / 65.47 ± 3.22 @@ -9265,9 +9265,9 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 131072 True 1,297 ± 338 / 245 ± 83 - 3.21 + 3.20 3.33 - 3.40 + 3.39 2.89 44.83 ± 1.57 / 29.75 ± 1.72 37.14 ± 1.61 / 44.74 ± 2.01 @@ -9305,8 +9305,8 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 2048 True 4,544 ± 1,000 / 1,106 ± 359 - 3.23 - 3.39 + 3.22 + 3.38 3.41 2.88 14.73 ± 1.84 / 14.44 ± 1.74 @@ -9345,10 +9345,10 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 32768 True 3,248 ± 739 / 761 ± 252 - 3.23 - 2.83 + 3.22 + 2.82 3.34 - 3.52 + 3.51 32.28 ± 3.16 / 23.24 ± 3.51 39.62 ± 2.39 / 56.09 ± 2.89 5.38 ± 2.18 / 36.31 ± 1.96 @@ -9387,7 +9387,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 4,405 ± 1,098 / 1,032 ± 345 3.23 3.15 - 3.85 + 3.84 2.69 24.93 ± 4.36 / 22.23 ± 3.32 31.65 ± 2.94 / 51.95 ± 2.92 @@ -9465,8 +9465,8 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 4096 False 1,254 ± 328 / 243 ± 83 - 3.25 - 3.16 + 3.24 + 3.15 3.72 2.86 34.66 ± 1.19 / 21.37 ± 1.52 @@ -9588,7 +9588,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 3.33 3.39 3.81 - 2.79 + 2.78 29.44 ± 1.81 / 20.31 ± 1.68 18.49 ± 2.47 / 35.29 ± 2.83 1.73 ± 1.63 / 38.18 ± 4.15 @@ -9666,7 +9666,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 True 1,473 ± 493 / 448 ± 143 3.36 - 3.66 + 3.65 3.40 3.02 15.35 ± 1.38 / 14.74 ± 1.30 @@ -9708,7 +9708,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 3.36 3.21 4.15 - 2.73 + 2.72 26.76 ± 3.11 / 19.46 ± 2.38 30.76 ± 4.38 / 44.83 ± 4.36 0.55 ± 1.73 / 39.40 ± 4.57 @@ -9747,7 +9747,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 6,471 ± 1,142 / 1,961 ± 584 3.36 3.24 - 3.62 + 3.61 3.22 24.44 ± 2.59 / 17.37 ± 2.03 34.03 ± 2.50 / 52.42 ± 2.16 @@ -9938,6 +9938,46 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 12.10.5 12.10.5 + + mhenrichsen/danskgpt-tiny-chat (few-shot) + 1100 + 32 + 2048 + False + 1,745 ± 978 / 686 ± 159 + 3.45 + 3.26 + 3.89 + 3.21 + 22.31 ± 2.55 / 19.30 ± 2.14 + 34.05 ± 2.37 / 52.43 ± 2.46 + 0.70 ± 1.11 / 40.47 ± 3.04 + 41.82 ± 2.07 / 48.91 ± 2.47 + 28.74 ± 4.18 / 28.29 ± 4.37 + 30.34 ± 6.08 / 30.02 ± 6.42 + 27.49 ± 3.13 / 48.00 ± 3.89 + -2.17 ± 1.06 / 33.52 ± 0.37 + 0.26 ± 1.08 / 34.12 ± 0.45 + 19.10 ± 2.33 / 38.96 ± 2.78 + 27.31 ± 4.23 / 26.33 ± 4.40 + 45.94 ± 12.82 / 55.94 ± 8.25 + -0.97 ± 1.64 / 36.69 ± 2.34 + 35.57 ± 2.45 / 41.66 ± 2.41 + 9.1.2 + 9.1.2 + 9.1.2 + 12.4.0 + 9.1.2 + 9.1.2 + 9.1.2 + 9.1.2 + 9.1.2 + 12.4.0 + 9.1.2 + 9.1.2 + 9.1.2 + 12.4.0 + sentence-transformers/distiluse-base-multilingual-cased-v1 135 @@ -9978,46 +10018,6 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 0.0.0 0.0.0 - - mhenrichsen/danskgpt-tiny-chat (few-shot) - 1100 - 32 - 2048 - False - 1,745 ± 978 / 686 ± 159 - 3.46 - 3.26 - 3.90 - 3.21 - 22.31 ± 2.55 / 19.30 ± 2.14 - 34.05 ± 2.37 / 52.43 ± 2.46 - 0.70 ± 1.11 / 40.47 ± 3.04 - 41.82 ± 2.07 / 48.91 ± 2.47 - 28.74 ± 4.18 / 28.29 ± 4.37 - 30.34 ± 6.08 / 30.02 ± 6.42 - 27.49 ± 3.13 / 48.00 ± 3.89 - -2.17 ± 1.06 / 33.52 ± 0.37 - 0.26 ± 1.08 / 34.12 ± 0.45 - 19.10 ± 2.33 / 38.96 ± 2.78 - 27.31 ± 4.23 / 26.33 ± 4.40 - 45.94 ± 12.82 / 55.94 ± 8.25 - -0.97 ± 1.64 / 36.69 ± 2.34 - 35.57 ± 2.45 / 41.66 ± 2.41 - 9.1.2 - 9.1.2 - 9.1.2 - 12.4.0 - 9.1.2 - 9.1.2 - 9.1.2 - 9.1.2 - 9.1.2 - 12.4.0 - 9.1.2 - 9.1.2 - 9.1.2 - 12.4.0 - HuggingFaceTB/SmolLM2-1.7B (few-shot) 1711 @@ -10026,7 +10026,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 True 16,249 ± 3,690 / 3,689 ± 1,226 3.47 - 3.62 + 3.61 4.01 2.78 24.47 ± 3.42 / 18.70 ± 2.18 @@ -10108,7 +10108,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 3.51 3.42 3.78 - 3.33 + 3.32 11.28 ± 0.96 / 11.02 ± 0.85 34.94 ± 3.80 / 49.66 ± 3.96 2.08 ± 1.48 / 45.41 ± 3.83 @@ -10138,6 +10138,46 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 12.1.0 12.4.0 + + NbAiLab/nb-gpt-j-6B-alpaca (few-shot) + 6055 + 50 + 1024 + False + 2,607 ± 592 / 680 ± 208 + 3.51 + 3.51 + 3.78 + 3.25 + 12.95 ± 3.80 / 11.68 ± 2.31 + 27.68 ± 3.64 / 46.61 ± 4.11 + 1.65 ± 1.96 / 47.94 ± 2.55 + 38.60 ± 0.65 / 47.40 ± 0.64 + 23.82 ± 4.25 / 22.08 ± 2.50 + 26.04 ± 6.38 / 24.47 ± 3.69 + 32.60 ± 1.84 / 47.47 ± 3.33 + 0.34 ± 1.43 / 44.47 ± 2.44 + 2.26 ± 2.27 / 45.41 ± 3.25 + 21.33 ± 0.98 / 42.76 ± 1.02 + 13.28 ± 4.32 / 13.40 ± 2.95 + 60.17 ± 8.39 / 65.99 ± 4.66 + 1.52 ± 1.94 / 45.19 ± 3.80 + 37.23 ± 1.07 / 46.83 ± 0.82 + 9.3.1 + 10.0.1 + 10.0.1 + 12.4.0 + 9.3.2 + 9.3.2 + 10.0.1 + 10.0.1 + 10.0.1 + 12.4.0 + 9.3.1 + 10.0.1 + 10.0.1 + 12.4.0 + openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot) 7453 @@ -10148,7 +10188,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 3.51 3.37 3.83 - 3.33 + 3.32 29.49 ± 2.73 / 21.57 ± 2.07 13.77 ± 3.72 / 23.78 ± 3.10 0.00 ± 0.00 / 33.49 ± 0.27 @@ -10178,46 +10218,6 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 14.0.4 14.0.4 - - NbAiLab/nb-gpt-j-6B-alpaca (few-shot) - 6055 - 50 - 1024 - False - 2,607 ± 592 / 680 ± 208 - 3.52 - 3.51 - 3.78 - 3.26 - 12.95 ± 3.80 / 11.68 ± 2.31 - 27.68 ± 3.64 / 46.61 ± 4.11 - 1.65 ± 1.96 / 47.94 ± 2.55 - 38.60 ± 0.65 / 47.40 ± 0.64 - 23.82 ± 4.25 / 22.08 ± 2.50 - 26.04 ± 6.38 / 24.47 ± 3.69 - 32.60 ± 1.84 / 47.47 ± 3.33 - 0.34 ± 1.43 / 44.47 ± 2.44 - 2.26 ± 2.27 / 45.41 ± 3.25 - 21.33 ± 0.98 / 42.76 ± 1.02 - 13.28 ± 4.32 / 13.40 ± 2.95 - 60.17 ± 8.39 / 65.99 ± 4.66 - 1.52 ± 1.94 / 45.19 ± 3.80 - 37.23 ± 1.07 / 46.83 ± 0.82 - 9.3.1 - 10.0.1 - 10.0.1 - 12.4.0 - 9.3.2 - 9.3.2 - 10.0.1 - 10.0.1 - 10.0.1 - 12.4.0 - 9.3.1 - 10.0.1 - 10.0.1 - 12.4.0 - dbmdz/bert-mini-historic-multilingual-cased 12 @@ -10227,7 +10227,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 47,122 ± 9,661 / 9,714 ± 3,152 3.52 3.61 - 3.75 + 3.74 3.20 41.70 ± 1.80 / 38.74 ± 1.82 26.03 ± 0.90 / 48.46 ± 1.21 @@ -10267,7 +10267,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 7,964 ± 2,255 / 1,299 ± 433 3.52 3.40 - 4.07 + 4.06 3.10 30.63 ± 2.60 / 20.55 ± 1.52 22.35 ± 2.26 / 44.97 ± 2.66 @@ -10347,7 +10347,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 1,296 ± 335 / 246 ± 84 3.54 3.33 - 4.15 + 4.14 3.15 21.87 ± 4.65 / 18.10 ± 3.46 24.82 ± 6.87 / 35.81 ± 5.98 @@ -10385,9 +10385,9 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 512 True 33,247 ± 8,123 / 6,017 ± 1,977 - 3.55 + 3.54 3.75 - 3.72 + 3.71 3.17 26.96 ± 1.31 / 25.63 ± 1.35 30.13 ± 2.10 / 46.78 ± 4.49 @@ -10425,9 +10425,9 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 512 True 19,206 ± 4,451 / 3,658 ± 1,187 - 3.55 + 3.54 3.75 - 3.72 + 3.71 3.17 26.96 ± 1.31 / 25.63 ± 1.35 30.13 ± 2.10 / 46.78 ± 4.49 @@ -10507,7 +10507,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 10,756 ± 3,589 / 1,157 ± 670 3.59 3.39 - 4.08 + 4.10 3.29 28.30 ± 2.45 / 22.93 ± 1.82 28.95 ± 4.05 / 48.32 ± 5.01 @@ -10586,7 +10586,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 True 15,314 ± 2,786 / 3,666 ± 1,201 3.60 - 3.71 + 3.70 4.00 3.10 32.63 ± 1.06 / 32.76 ± 1.02 @@ -10618,6 +10618,46 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 0.0.0 0.0.0 + + AI-Sweden-Models/gpt-sw3-356m (few-shot) + 471 + 64 + 2048 + True + 5,758 ± 1,348 / 1,215 ± 391 + 3.61 + 3.54 + 3.76 + 3.54 + 16.13 ± 4.02 / 14.90 ± 3.13 + 27.61 ± 2.14 / 39.77 ± 1.85 + 1.96 ± 2.25 / 38.40 ± 2.99 + 34.79 ± 1.52 / 39.67 ± 1.69 + 27.37 ± 4.07 / 27.94 ± 4.04 + 31.22 ± 3.87 / 31.39 ± 3.99 + 34.21 ± 1.63 / 47.17 ± 2.76 + 0.92 ± 1.55 / 40.71 ± 2.58 + 1.25 ± 2.30 / 43.49 ± 3.20 + 18.52 ± 2.78 / 32.10 ± 4.23 + 23.77 ± 3.70 / 23.06 ± 3.46 + 34.29 ± 11.64 / 36.76 ± 7.46 + 1.57 ± 1.70 / 40.84 ± 1.99 + 33.70 ± 1.46 / 38.82 ± 1.54 + 9.3.1 + 9.3.1 + 9.3.2 + 12.5.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.2 + 9.3.2 + 12.5.1 + 9.3.1 + 9.3.1 + 9.3.2 + 12.5.1 + Qwen/Qwen1.5-1.8B (few-shot) 1837 @@ -10658,46 +10698,6 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 12.1.0 12.1.0 - - AI-Sweden-Models/gpt-sw3-356m (few-shot) - 471 - 64 - 2048 - True - 5,758 ± 1,348 / 1,215 ± 391 - 3.62 - 3.55 - 3.77 - 3.54 - 16.13 ± 4.02 / 14.90 ± 3.13 - 27.61 ± 2.14 / 39.77 ± 1.85 - 1.96 ± 2.25 / 38.40 ± 2.99 - 34.79 ± 1.52 / 39.67 ± 1.69 - 27.37 ± 4.07 / 27.94 ± 4.04 - 31.22 ± 3.87 / 31.39 ± 3.99 - 34.21 ± 1.63 / 47.17 ± 2.76 - 0.92 ± 1.55 / 40.71 ± 2.58 - 1.25 ± 2.30 / 43.49 ± 3.20 - 18.52 ± 2.78 / 32.10 ± 4.23 - 23.77 ± 3.70 / 23.06 ± 3.46 - 34.29 ± 11.64 / 36.76 ± 7.46 - 1.57 ± 1.70 / 40.84 ± 1.99 - 33.70 ± 1.46 / 38.82 ± 1.54 - 9.3.1 - 9.3.1 - 9.3.2 - 12.5.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.2 - 9.3.2 - 12.5.1 - 9.3.1 - 9.3.1 - 9.3.2 - 12.5.1 - ibm-granite/granite-3.0-1b-a400m-base (few-shot) 1385 @@ -10706,9 +10706,9 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 True 7,808 ± 2,183 / 1,289 ± 428 3.69 - 3.67 - 4.14 - 3.26 + 3.66 + 4.15 + 3.25 16.82 ± 3.53 / 14.28 ± 3.01 17.52 ± 2.86 / 27.96 ± 2.94 1.53 ± 2.06 / 36.59 ± 3.39 @@ -10748,7 +10748,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 3.79 3.80 4.10 - 3.48 + 3.47 15.93 ± 3.91 / 14.68 ± 2.81 13.01 ± 2.33 / 28.28 ± 4.63 0.05 ± 1.37 / 40.73 ± 3.78 @@ -10778,6 +10778,46 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 14.1.2 14.0.4 + + mhenrichsen/danskgpt-tiny (few-shot) + 1100 + 32 + 2048 + True + 8,597 ± 1,983 / 1,926 ± 600 + 3.80 + 3.64 + 4.19 + 3.58 + 14.13 ± 3.50 / 12.15 ± 3.14 + 26.31 ± 5.33 / 44.07 ± 6.36 + -0.54 ± 1.46 / 44.56 ± 3.34 + 32.12 ± 1.62 / 38.99 ± 1.42 + 27.37 ± 6.89 / 27.19 ± 7.19 + 27.59 ± 6.34 / 28.03 ± 6.94 + 18.09 ± 6.14 / 31.83 ± 6.77 + -0.19 ± 1.93 / 41.38 ± 3.18 + -0.80 ± 0.89 / 40.66 ± 3.78 + 5.84 ± 1.36 / 16.14 ± 2.48 + 23.92 ± 6.88 / 22.42 ± 6.73 + 31.93 ± 14.68 / 43.80 ± 8.79 + 0.46 ± 1.91 / 43.45 ± 3.64 + 30.81 ± 2.73 / 35.67 ± 2.95 + 0.0.0 + 0.0.0 + 0.0.0 + 12.5.1 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 12.5.1 + 0.0.0 + 0.0.0 + 0.0.0 + 12.5.1 + 3ebdola/Dialectal-Arabic-XLM-R-Base 278 @@ -10826,7 +10866,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 True 30,013 ± 4,309 / 8,840 ± 2,859 3.81 - 3.65 + 3.64 4.01 3.78 33.05 ± 1.28 / 31.68 ± 1.05 @@ -10858,46 +10898,6 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 0.0.0 0.0.0 - - mhenrichsen/danskgpt-tiny (few-shot) - 1100 - 32 - 2048 - True - 8,597 ± 1,983 / 1,926 ± 600 - 3.81 - 3.64 - 4.21 - 3.58 - 14.13 ± 3.50 / 12.15 ± 3.14 - 26.31 ± 5.33 / 44.07 ± 6.36 - -0.54 ± 1.46 / 44.56 ± 3.34 - 32.12 ± 1.62 / 38.99 ± 1.42 - 27.37 ± 6.89 / 27.19 ± 7.19 - 27.59 ± 6.34 / 28.03 ± 6.94 - 18.09 ± 6.14 / 31.83 ± 6.77 - -0.19 ± 1.93 / 41.38 ± 3.18 - -0.80 ± 0.89 / 40.66 ± 3.78 - 5.84 ± 1.36 / 16.14 ± 2.48 - 23.92 ± 6.88 / 22.42 ± 6.73 - 31.93 ± 14.68 / 43.80 ± 8.79 - 0.46 ± 1.91 / 43.45 ± 3.64 - 30.81 ± 2.73 / 35.67 ± 2.95 - 0.0.0 - 0.0.0 - 0.0.0 - 12.5.1 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 12.5.1 - 0.0.0 - 0.0.0 - 0.0.0 - 12.5.1 - NbAiLab/nb-llama-3.2-1B (few-shot) 1236 @@ -10907,7 +10907,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 3,424 ± 1,080 / 464 ± 158 3.82 3.74 - 4.33 + 4.32 3.40 9.20 ± 3.54 / 8.11 ± 3.24 32.94 ± 4.62 / 46.54 ± 5.68 @@ -10947,7 +10947,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 10,890 ± 2,686 / 2,186 ± 750 3.87 4.06 - 4.10 + 4.09 3.45 10.12 ± 1.24 / 9.84 ± 1.12 10.65 ± 3.65 / 28.33 ± 5.27 @@ -10987,8 +10987,8 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 2,519 ± 841 / 323 ± 104 3.88 3.58 - 4.23 - 3.84 + 4.22 + 3.83 16.17 ± 3.44 / 14.33 ± 1.92 29.12 ± 4.09 / 49.93 ± 4.45 -0.47 ± 0.62 / 33.18 ± 0.28 @@ -11025,10 +11025,10 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 32768 False 11,740 ± 3,000 / 2,209 ± 721 - 3.90 - 3.83 - 4.31 - 3.55 + 3.89 + 3.82 + 4.30 + 3.54 17.38 ± 2.04 / 15.74 ± 1.99 10.72 ± 3.35 / 25.21 ± 3.80 1.32 ± 1.08 / 42.05 ± 3.69 @@ -11065,10 +11065,10 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 512 True 78,027 ± 15,466 / 17,064 ± 5,335 - 3.90 + 3.89 3.93 4.13 - 3.63 + 3.62 33.62 ± 1.57 / 31.69 ± 1.40 20.71 ± 1.68 / 40.07 ± 2.65 1.19 ± 1.08 / 48.46 ± 1.34 @@ -11105,9 +11105,9 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 32768 True 11,371 ± 2,924 / 2,122 ± 692 - 3.92 + 3.91 3.88 - 4.35 + 4.33 3.53 19.01 ± 1.91 / 17.08 ± 1.83 8.88 ± 1.86 / 24.27 ± 2.45 @@ -11138,6 +11138,46 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 12.1.0 12.1.0 + + tiiuae/Falcon3-1B-Instruct (few-shot) + 1669 + 131 + 8192 + True + 9,270 ± 2,690 / 1,434 ± 437 + 3.92 + 3.77 + 4.31 + 3.68 + 20.03 ± 1.60 / 17.52 ± 1.50 + 15.96 ± 2.77 / 38.74 ± 2.88 + 0.86 ± 1.76 / 42.48 ± 3.82 + 28.98 ± 1.93 / 35.38 ± 2.10 + 29.25 ± 2.15 / 28.34 ± 2.25 + 25.45 ± 1.96 / 24.62 ± 1.86 + 11.28 ± 0.71 / 22.46 ± 2.54 + 1.52 ± 1.26 / 40.82 ± 4.08 + 0.52 ± 1.60 / 41.23 ± 4.21 + 8.47 ± 0.94 / 20.43 ± 2.10 + 26.41 ± 2.74 / 25.18 ± 2.46 + 25.99 ± 3.51 / 35.36 ± 4.53 + 1.64 ± 1.90 / 37.52 ± 2.69 + 21.39 ± 1.66 / 26.70 ± 1.83 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + allenai/OLMo-1B (few-shot) 1177 @@ -11148,7 +11188,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 3.96 3.89 4.40 - 3.60 + 3.59 13.39 ± 2.60 / 12.39 ± 2.46 17.94 ± 5.58 / 32.80 ± 3.63 -2.02 ± 2.28 / 40.63 ± 4.12 @@ -11218,46 +11258,6 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 0.0.0 0.0.0 - - HuggingFaceTB/SmolLM2-360M (few-shot) - 362 - 49 - 8192 - True - 22,023 ± 6,203 / 3,675 ± 1,231 - 4.18 - 4.12 - 4.44 - 3.98 - 12.68 ± 1.39 / 12.32 ± 1.19 - 3.61 ± 2.69 / 19.01 ± 3.95 - 1.79 ± 0.97 / 36.23 ± 3.19 - 28.12 ± 3.14 / 32.48 ± 3.57 - 26.60 ± 1.99 / 23.60 ± 2.05 - 23.70 ± 1.58 / 23.04 ± 2.17 - 6.21 ± 2.55 / 23.74 ± 3.28 - -0.39 ± 0.76 / 33.40 ± 0.31 - 0.21 ± 0.41 / 35.33 ± 3.02 - 4.65 ± 1.00 / 10.23 ± 1.68 - 18.22 ± 1.75 / 17.97 ± 2.10 - 11.52 ± 7.20 / 29.30 ± 5.30 - 1.72 ± 1.21 / 34.96 ± 1.12 - 27.27 ± 3.03 / 31.25 ± 3.24 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - fresh-xlm-roberta-base 278 @@ -11265,10 +11265,10 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 512 True 2,214 ± 94 / 1,494 ± 229 - 4.18 - 4.24 + 4.17 + 4.23 4.37 - 3.93 + 3.92 16.04 ± 2.47 / 15.60 ± 2.62 17.37 ± 3.82 / 36.83 ± 4.86 1.34 ± 0.97 / 35.45 ± 3.20 @@ -11305,10 +11305,10 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 2048 True 7,717 ± 1,553 / 2,013 ± 625 - 4.19 + 4.18 4.15 - 4.36 - 4.06 + 4.35 + 4.05 13.98 ± 1.54 / 13.46 ± 1.42 6.37 ± 3.38 / 25.43 ± 4.09 0.41 ± 0.80 / 33.31 ± 0.24 @@ -11338,6 +11338,46 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 11.0.0 12.4.0 + + HuggingFaceTB/SmolLM2-360M (few-shot) + 362 + 49 + 8192 + True + 22,023 ± 6,203 / 3,675 ± 1,231 + 4.19 + 4.12 + 4.46 + 3.98 + 12.68 ± 1.39 / 12.32 ± 1.19 + 3.61 ± 2.69 / 19.01 ± 3.95 + 1.79 ± 0.97 / 36.23 ± 3.19 + 28.12 ± 3.14 / 32.48 ± 3.57 + 26.60 ± 1.99 / 23.60 ± 2.05 + 23.70 ± 1.58 / 23.04 ± 2.17 + 6.21 ± 2.55 / 23.74 ± 3.28 + -0.39 ± 0.76 / 33.40 ± 0.31 + 0.21 ± 0.41 / 35.33 ± 3.02 + 4.65 ± 1.00 / 10.23 ± 1.68 + 18.22 ± 1.75 / 17.97 ± 2.10 + 11.52 ± 7.20 / 29.30 ± 5.30 + 1.72 ± 1.21 / 34.96 ± 1.12 + 27.27 ± 3.03 / 31.25 ± 3.24 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + PleIAs/Pleias-350m-Preview (few-shot) 353 @@ -11345,10 +11385,10 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 2048 True 10,242 ± 3,432 / 1,335 ± 484 - 4.20 + 4.19 4.04 - 4.50 - 4.06 + 4.49 + 4.05 13.84 ± 1.95 / 13.12 ± 1.60 9.47 ± 3.30 / 25.66 ± 3.36 -0.36 ± 1.60 / 39.52 ± 3.19 @@ -11386,9 +11426,9 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 False 2,556 ± 580 / 681 ± 214 4.21 - 4.16 + 4.15 4.41 - 4.07 + 4.06 0.24 ± 0.25 / 0.25 ± 0.21 27.80 ± 6.39 / 43.80 ± 5.16 0.56 ± 0.51 / 34.04 ± 1.28 @@ -11507,8 +11547,8 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 21,777 ± 6,115 / 3,617 ± 1,211 4.24 4.21 - 4.46 - 4.05 + 4.47 + 4.04 8.97 ± 3.18 / 8.62 ± 2.72 2.66 ± 2.70 / 16.29 ± 2.34 1.65 ± 1.38 / 44.50 ± 3.21 @@ -11588,7 +11628,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 4.31 4.25 4.55 - 4.13 + 4.12 12.82 ± 2.64 / 12.37 ± 1.95 3.55 ± 3.64 / 22.75 ± 4.02 0.68 ± 1.41 / 35.13 ± 0.98 @@ -11628,7 +11668,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 4.38 4.33 4.61 - 4.20 + 4.19 12.11 ± 1.07 / 11.48 ± 1.07 2.61 ± 3.22 / 18.95 ± 3.93 0.25 ± 1.87 / 39.65 ± 4.00 @@ -11668,7 +11708,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 4.39 4.30 4.46 - 4.41 + 4.40 13.72 ± 1.83 / 13.41 ± 1.52 3.79 ± 3.11 / 21.06 ± 4.74 -0.45 ± 0.70 / 39.69 ± 4.95 @@ -11705,10 +11745,10 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 2048 True 8,958 ± 1,815 / 2,240 ± 696 - 4.41 + 4.40 4.25 - 4.65 - 4.33 + 4.64 + 4.32 3.43 ± 2.66 / 5.56 ± 1.90 9.18 ± 4.25 / 26.36 ± 3.94 -0.22 ± 1.53 / 34.20 ± 0.84 @@ -11746,8 +11786,8 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 True 2,630 ± 605 / 684 ± 217 4.41 - 4.44 - 4.54 + 4.43 + 4.53 4.26 0.36 ± 0.40 / 1.82 ± 1.16 11.00 ± 7.09 / 26.09 ± 6.96 @@ -11865,10 +11905,10 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 1024 True 11,734 ± 3,124 / 2,174 ± 720 - 4.75 + 4.74 4.68 - 4.85 - 4.72 + 4.84 + 4.71 0.65 ± 0.68 / 0.59 ± 0.63 2.61 ± 2.75 / 20.51 ± 2.48 -0.73 ± 1.72 / 41.15 ± 3.71 @@ -11905,10 +11945,10 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 1024 True 19,896 ± 5,099 / 3,848 ± 1,251 - 4.76 + 4.75 4.70 4.80 - 4.77 + 4.76 1.13 ± 1.19 / 0.97 ± 1.03 2.06 ± 2.30 / 20.38 ± 2.71 -0.36 ± 0.97 / 41.52 ± 4.00 @@ -11947,7 +11987,7 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 6,025 ± 1,442 / 1,342 ± 431 4.82 4.78 - 4.94 + 4.93 4.74 0.64 ± 0.89 / 0.52 ± 0.69 -0.52 ± 1.72 / 28.55 ± 1.60 @@ -12025,8 +12065,8 @@ title: Mainland Scandinavian NLU 🇩🇰🇳🇴🇸🇪 8192 True 7,692 ± 1,423 / 1,960 ± 644 - 4.85 - 4.80 + 4.84 + 4.79 4.92 4.82 0.00 ± 0.00 / 0.00 ± 0.00 diff --git a/norwegian-nlg.csv b/norwegian-nlg.csv index b9eb6e82..88a41211 100644 --- a/norwegian-nlg.csv +++ b/norwegian-nlg.csv @@ -1,97 +1,97 @@ model_id,num_model_parameters,vocabulary_size,max_sequence_length,commercially_licensed,merge,speed,rank,norne_nb,norne_nn,norec,no_sammendrag,scala_nb,scala_nn,norquad,mmlu_no,hellaswag_no "gpt-4-0613 (few-shot, val)",-1,100,8191,True,False,597,1.23,81.16,75.75,72.72,65.92,77.3,57.18,47.5,68.77,88.3 "gpt-4o-2024-05-13 (few-shot, val)",-1,200,128000,True,False,916,1.38,79.07,81.56,66.66,63.25,64.53,54.7,43.51,73.81,89.91 -"gpt-4-1106-preview (few-shot, val)",-1,100,128000,True,False,576,1.39,77.48,78.7,62.55,63.6,74.45,56.31,44.67,70.84,86.3 +"gpt-4-1106-preview (few-shot, val)",-1,100,128000,True,False,576,1.4,77.48,78.7,62.55,63.6,74.45,56.31,44.67,70.84,86.3 "meta-llama/Meta-Llama-3-70B (few-shot, val)",70554,128,8192,True,False,312,1.46,75.31,75.94,66.74,65.78,59.82,47.56,60.87,62.45,65.29 meta-llama/Llama-3.1-405B-Instruct-FP8 (few-shot),405869,128,131072,True,False,799,1.61,82.44,82.17,40.55,66.76,63.91,45.93,45.33,73.55,81.37 -google/gemma-2-27b-it (few-shot),27227,256,8193,True,False,1516,1.62,67.35,66.61,67.14,66.24,64.66,52.49,44.85,58.34,70.87 +google/gemma-2-27b-it (few-shot),27227,256,8193,True,False,1516,1.63,67.35,66.61,67.14,66.24,64.66,52.49,44.85,58.34,70.87 +meta-llama/Llama-3.1-70B-Instruct (few-shot),70554,128,131072,True,False,1409,1.77,80.05,80.67,40.65,66.56,56.42,38.21,49.22,65.19,69.47 "gpt-4o-2024-05-13 (zero-shot, val)",-1,200,8191,True,False,637,1.78,77.72,71.7,36.27,62.76,71.7,58.79,40.95,69.22,76.4 -meta-llama/Llama-3.1-70B-Instruct (few-shot),70554,128,131072,True,False,1409,1.78,80.05,80.67,40.65,66.56,56.42,38.21,49.22,65.19,69.47 +meta-llama/Llama-3.3-70B-Instruct (few-shot),70554,128,131072,True,False,1353,1.78,79.84,79.93,41.11,65.38,57.84,43.52,40.92,66.69,75.66 abhishek/autotrain-llama3-oh-sft-v0-2 (few-shot),70554,128,8192,False,False,2668,1.79,80.97,79.69,63.91,65.47,39.82,26.86,47.06,60.88,57.46 -meta-llama/Llama-3.3-70B-Instruct (few-shot),70554,128,131072,True,False,1353,1.79,79.84,79.93,41.11,65.38,57.84,43.52,40.92,66.69,75.66 "meta-llama/Meta-Llama-3-70B-Instruct (few-shot, val)",70554,128,8192,True,False,1673,1.81,80.5,76.47,59.29,65.7,47.28,32.76,39.71,63.58,63.41 google/gemma-2-9b-it (few-shot),9242,256,8193,True,False,2062,1.83,61.82,60.41,61.06,65.94,62.46,52.99,39.1,49.45,67.96 Qwen/Qwen2.5-72B-Instruct (few-shot),72706,152,32768,True,False,1219,1.85,72.21,70.24,39.85,62.86,63.14,43.24,43.41,70.2,76.79 google/gemma-2-27b (few-shot),27227,256,8193,True,False,1531,1.9,42.58,40.43,64.15,66.84,51.59,42.41,58.55,57.29,53.73 +"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,1.92,72.74,69.17,67.45,63.0,74.27,54.83,3.67,59.93,76.01 ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot),28411,256,4096,True,False,3633,1.93,76.25,77.91,40.54,64.53,59.75,47.82,40.99,56.11,67.72 -"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,1.93,72.74,69.17,67.45,63.0,74.27,54.83,3.67,59.93,76.01 "gpt-3.5-turbo-0613 (few-shot, val)",-1,100,4095,True,False,921,1.95,77.7,73.92,58.88,64.18,54.29,32.82,45.35,40.26,59.02 Qwen/QwQ-32B-Preview (few-shot),32764,152,32768,True,False,2258,2.01,75.68,75.89,38.41,62.79,56.42,39.34,44.35,61.53,67.36 google/gemma-2-9b (few-shot),9242,256,8193,True,False,2038,2.03,54.08,54.33,62.54,65.67,50.88,41.01,46.25,50.81,53.37 "152334H/miqu-1-70b-sf (few-shot, val)",68977,32,32764,True,False,2126,2.08,66.75,66.81,60.58,64.64,47.53,17.14,41.92,51.01,58.23 "meta-llama/Llama-2-70b-hf (few-shot, val)",68977,32,4096,True,False,1892,2.11,62.46,64.68,59.68,66.09,27.34,3.95,57.44,43.88,53.85 "gpt-4-1106-preview (zero-shot, val)",-1,100,8191,True,False,436,2.12,60.16,48.74,39.62,62.75,71.38,42.94,36.04,68.89,59.02 -timpal0l/sol (few-shot),10732,32,4096,False,False,3701,2.15,65.14,65.88,57.06,66.21,26.41,19.58,51.6,36.06,64.97 -mistralai/Mistral-Nemo-Instruct-2407 (few-shot),12248,131,1024001,True,False,7095,2.2,70.14,68.74,60.64,64.61,35.59,29.22,49.87,39.07,37.95 +timpal0l/sol (few-shot),10732,32,4096,False,False,3701,2.14,65.14,65.88,57.06,66.21,26.41,19.58,51.6,36.06,64.97 "gpt-4o-mini-2024-07-18 (zero-shot, val)",-1,200,8191,True,False,908,2.22,60.43,55.59,39.82,62.77,54.84,33.8,36.55,56.96,68.7 +mistralai/Mistral-Nemo-Instruct-2407 (few-shot),12248,131,1024001,True,False,7095,2.22,70.14,68.74,60.64,64.61,35.59,29.22,49.87,39.07,37.95 mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot),341029,256,4096,True,False,1904,2.25,56.41,55.6,25.18,59.28,62.56,53.09,42.57,45.67,73.86 upstage/SOLAR-10.7B-v1.0 (few-shot),10732,32,4096,True,False,3780,2.25,68.11,68.19,55.33,65.51,10.15,7.51,55.33,35.57,62.76 nvidia/Llama-3.1-Nemotron-70B-Instruct-HF (few-shot),70554,128,131072,True,False,1208,2.27,69.21,70.45,39.87,47.52,57.8,40.31,40.97,63.81,73.01 Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,8192,False,False,4136,2.36,66.22,64.14,55.48,65.32,26.13,17.32,49.75,29.72,46.78 -"claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,2.39,74.23,70.5,50.92,62.57,76.1,72.03,40.57,24.04,-0.8 -"birgermoell/Munin-NeuralBeagle-NorskGPT (few-shot, val)",7242,32,32768,False,True,2903,2.44,63.33,68.84,58.28,65.94,18.65,10.72,44.39,26.61,46.64 -"birgermoell/WestLake-Munin-Cat-NorskGPT (few-shot, val)",7242,32,32768,False,True,2856,2.44,63.33,68.84,58.28,65.94,18.65,10.72,44.39,26.61,46.64 -NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,2.45,58.05,59.65,57.94,65.07,51.36,42.84,14.72,43.18,37.58 +"claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,2.38,74.23,70.5,50.92,62.57,76.1,72.03,40.57,24.04,-0.8 +"birgermoell/Munin-NeuralBeagle-NorskGPT (few-shot, val)",7242,32,32768,False,True,2903,2.45,63.33,68.84,58.28,65.94,18.65,10.72,44.39,26.61,46.64 +"birgermoell/WestLake-Munin-Cat-NorskGPT (few-shot, val)",7242,32,32768,False,True,2856,2.45,63.33,68.84,58.28,65.94,18.65,10.72,44.39,26.61,46.64 +NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,2.46,58.05,59.65,57.94,65.07,51.36,42.84,14.72,43.18,37.58 +skole-gpt-mixtral (few-shot),-1,32,32768,False,False,3583,2.49,62.52,61.55,52.09,64.7,21.99,16.84,47.3,39.99,32.16 "RJuro/munin-neuralbeagle-7b (few-shot, val)",7242,32,32768,False,True,2493,2.5,61.18,65.16,55.61,65.99,20.84,9.12,42.92,27.77,39.67 -skole-gpt-mixtral (few-shot),-1,32,32768,False,False,3583,2.5,62.52,61.55,52.09,64.7,21.99,16.84,47.3,39.99,32.16 CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,2.52,73.2,72.26,35.8,63.37,36.86,23.4,40.32,38.62,40.5 four-two-labs/orpo-llama-3-swe (few-shot),8030,128,8192,False,False,4974,2.54,61.63,61.3,48.85,63.67,24.15,21.33,53.66,33.52,26.04 "timpal0l/BeagleCatMunin2 (few-shot, val)",7242,32,32768,False,True,2477,2.56,61.17,65.44,58.69,66.08,15.03,5.95,42.42,27.31,41.63 NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,2.59,58.53,60.26,59.48,54.54,51.85,41.89,25.62,43.35,26.91 -"meta-llama/Llama-2-70b-chat-hf (few-shot, val)",68977,32,3843,True,False,1979,2.6,60.21,62.99,55.12,65.11,27.12,6.82,38.5,32.3,34.43 -"timpal0l/dolphin-2.9-llama3-8b-flashback (few-shot, val)",8030,128,8192,False,True,5018,2.6,64.51,65.66,52.9,64.12,29.34,17.42,38.49,25.77,31.8 -meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131073,True,False,1005,2.62,64.55,66.44,35.17,64.48,27.41,15.6,43.11,38.1,39.3 -senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.62,64.37,62.77,50.6,65.09,18.09,12.25,38.34,27.33,41.59 -meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1007,2.64,74.47,72.93,34.44,63.98,27.77,20.35,42.9,33.44,30.91 -"mlabonne/NeuralBeagle14-7B (few-shot, val)",7242,32,8192,False,True,2549,2.65,62.47,66.69,54.04,65.74,16.75,13.0,34.48,28.39,35.19 -CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,2.66,66.55,63.63,38.61,64.48,15.8,12.3,43.26,36.48,35.85 -utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.66,40.91,42.91,52.62,65.77,9.7,11.98,47.36,36.97,37.64 +"timpal0l/dolphin-2.9-llama3-8b-flashback (few-shot, val)",8030,128,8192,False,True,5018,2.59,64.51,65.66,52.9,64.12,29.34,17.42,38.49,25.77,31.8 +"meta-llama/Llama-2-70b-chat-hf (few-shot, val)",68977,32,3843,True,False,1979,2.61,60.21,62.99,55.12,65.11,27.12,6.82,38.5,32.3,34.43 +"mlabonne/NeuralBeagle14-7B (few-shot, val)",7242,32,8192,False,True,2549,2.63,62.47,66.69,54.04,65.74,16.75,13.0,34.48,28.39,35.19 +senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.63,64.37,62.77,50.6,65.09,18.09,12.25,38.34,27.33,41.59 +CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,2.65,66.55,63.63,38.61,64.48,15.8,12.3,43.26,36.48,35.85 +meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1473,2.65,64.55,66.44,35.17,64.48,27.41,15.6,43.11,35.02,37.61 +utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.67,40.91,42.91,52.62,65.77,9.7,11.98,47.36,36.97,37.64 +meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1483,2.68,66.56,68.29,34.47,63.8,28.22,18.21,47.34,30.78,31.49 mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,2.68,63.92,62.15,46.68,63.04,33.38,19.99,31.87,38.91,20.29 -Mabeck/Heidrun-Mistral-7B-chat (few-shot),7242,32,32768,False,False,1419,2.69,61.41,59.49,49.19,64.22,15.17,10.78,48.99,27.64,25.74 +"birgermoell/BeagleCatMunin-Flashback-Bellman (few-shot, val)",7242,32,32768,False,True,2890,2.69,53.96,63.45,52.7,65.23,14.87,2.48,41.43,27.42,36.05 mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot),46703,32,32768,True,False,5535,2.69,55.02,57.37,36.76,61.59,30.73,18.96,41.01,39.07,40.48 -"birgermoell/BeagleCatMunin-Flashback-Bellman (few-shot, val)",7242,32,32768,False,True,2890,2.7,53.96,63.45,52.7,65.23,14.87,2.48,41.43,27.42,36.05 -"timpal0l/BeagleCatMunin (few-shot, val)",7242,32,32768,False,True,2495,2.7,54.04,62.21,54.74,65.6,14.51,5.38,42.83,25.82,32.01 -meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1335,2.71,61.48,61.58,32.94,63.38,21.2,19.65,53.35,33.02,24.93 -nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,2.71,64.15,62.16,55.29,60.17,32.3,22.82,32.62,35.37,15.39 -mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,2.73,67.24,66.08,31.41,62.89,28.72,20.55,40.6,30.39,38.46 -"mlabonne/AlphaMonarch-7B (few-shot, val)",7242,32,8192,False,True,5340,2.73,61.9,66.92,48.8,64.72,19.53,9.83,30.27,28.18,36.2 -"birgermoell/Flashback-Bellman (few-shot, val)",7242,32,32768,False,True,2887,2.74,56.44,66.56,53.24,64.96,11.96,2.5,39.21,26.64,31.14 -"birgermoell/Rapid-Cycling (few-shot, val)",7242,32,32768,False,True,2346,2.74,55.93,63.85,50.41,65.1,15.74,2.23,39.81,26.34,34.85 -"KennethEnevoldsen/munin_mistral-7b (few-shot, val)",7242,32,32768,False,True,2543,2.75,51.82,62.55,56.37,63.74,6.04,-0.02,48.85,28.43,20.49 +"timpal0l/BeagleCatMunin (few-shot, val)",7242,32,32768,False,True,2495,2.69,54.04,62.21,54.74,65.6,14.51,5.38,42.83,25.82,32.01 +Mabeck/Heidrun-Mistral-7B-chat (few-shot),7242,32,32768,False,False,1419,2.7,61.41,59.49,49.19,64.22,15.17,10.78,48.99,27.64,25.74 +"mlabonne/AlphaMonarch-7B (few-shot, val)",7242,32,8192,False,True,5340,2.71,61.9,66.92,48.8,64.72,19.53,9.83,30.27,28.18,36.2 +nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,2.72,64.15,62.16,55.29,60.17,32.3,22.82,32.62,35.37,15.39 +"birgermoell/Rapid-Cycling (few-shot, val)",7242,32,32768,False,True,2346,2.73,55.93,63.85,50.41,65.1,15.74,2.23,39.81,26.34,34.85 +mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,2.74,67.24,66.08,31.41,62.89,28.72,20.55,40.6,30.39,38.46 "RJuro/munin-neuralbeagle-SkoleGPTOpenOrca-7b (few-shot, val)",7242,32,32768,False,True,3008,2.75,53.68,61.92,47.78,64.23,0.91,1.24,47.76,28.59,42.57 +"KennethEnevoldsen/munin_mistral-7b (few-shot, val)",7242,32,32768,False,True,2543,2.76,51.82,62.55,56.37,63.74,6.04,-0.02,48.85,28.43,20.49 +"birgermoell/Flashback-Bellman (few-shot, val)",7242,32,32768,False,True,2887,2.76,56.44,66.56,53.24,64.96,11.96,2.5,39.21,26.64,31.14 ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4096,True,False,2515,2.76,49.94,52.17,53.27,63.01,17.22,12.01,45.04,24.31,30.34 "merge-crew/da-sv-dare-ties-density-0.9 (few-shot, val)",7242,32,32768,True,True,2443,2.76,48.24,61.5,49.4,64.56,24.12,13.2,47.93,26.21,17.0 google/gemma-7b (few-shot),8538,256,8067,True,False,1378,2.77,26.43,32.66,41.82,64.02,25.82,20.16,52.68,39.96,27.82 danish-foundation-models/munin-7b-v0.1dev0 (few-shot),7242,32,8192,True,False,6113,2.78,50.43,54.2,39.21,65.46,20.51,11.66,51.57,28.97,25.41 "merge-crew/da-sv-slerp (few-shot, val)",7242,32,32768,True,True,2467,2.78,49.67,61.11,56.07,64.97,3.81,-1.29,44.98,28.63,25.43 "merge-crew/da-sv-task-arithmetic (few-shot, val)",7242,32,32768,True,True,2500,2.78,49.69,61.78,55.87,64.94,2.99,-1.29,44.62,28.26,25.83 -timpal0l/njord-alpha (few-shot),7242,32,32768,True,False,5431,2.78,50.47,51.97,48.03,65.48,22.65,17.1,44.72,25.82,21.35 -bineric/NorskGPT-Llama-13B-v0.1 (few-shot),13016,32,4096,False,False,2856,2.8,56.72,57.62,48.86,64.86,9.87,6.9,41.27,24.51,31.41 -AI-Sweden-Models/Llama-3-8B (few-shot),8030,128,8192,True,False,4141,2.82,44.53,47.02,41.84,65.29,19.97,15.61,50.91,30.85,18.61 +timpal0l/njord-alpha (few-shot),7242,32,32768,True,False,5431,2.79,50.47,51.97,48.03,65.48,22.65,17.1,44.72,25.82,21.35 +bineric/NorskGPT-Llama-13B-v0.1 (few-shot),13016,32,4096,False,False,2856,2.81,56.72,57.62,48.86,64.86,9.87,6.9,41.27,24.51,31.41 +AI-Sweden-Models/Llama-3-8B (few-shot),8030,128,8192,True,False,4141,2.83,44.53,47.02,41.84,65.29,19.97,15.61,50.91,30.85,18.61 ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,2.83,53.79,56.13,51.36,62.4,6.83,8.09,48.01,24.55,26.71 "merge-crew/da-sv-ties (few-shot, val)",7242,32,32768,True,True,2457,2.83,47.61,60.57,44.46,64.59,23.99,11.6,47.02,27.13,15.65 meta-llama/Llama-3.1-8B (few-shot),8030,128,131072,True,False,2986,2.83,65.17,60.22,34.02,62.16,32.48,18.38,33.06,32.4,24.89 "AI-Sweden-Models/tyr (few-shot, val)",7242,32,32768,False,True,6079,2.84,58.6,63.15,51.85,64.57,0.66,0.53,43.22,26.09,23.43 -AI-Sweden-Models/Llama-3-8B-instruct (few-shot),8030,128,8192,False,False,1472,2.85,81.96,78.42,35.3,63.15,12.11,5.94,22.15,28.72,44.75 +AI-Sweden-Models/Llama-3-8B-instruct (few-shot),8030,128,8192,False,False,1472,2.86,81.96,78.42,35.3,63.15,12.11,5.94,22.15,28.72,44.75 bineric/NorskGPT-Llama-7B-v0.1 (few-shot),6738,32,4096,False,False,5384,2.86,56.18,56.96,50.94,64.21,8.19,5.55,41.35,21.27,26.81 "merge-crew/da-sv-dare-ties-density-0.6 (few-shot, val)",7242,32,32768,True,True,2515,2.86,47.26,59.35,54.93,64.25,9.0,5.26,45.95,21.89,15.32 ThatsGroes/munin-SkoleGPTOpenOrca-7b-16bit (few-shot),7242,32,32768,False,False,3006,2.88,51.99,52.74,50.39,64.2,0.99,1.27,47.95,25.74,26.36 bineric/NorskGPT-Mistral-7b (few-shot),7242,32,4096,False,False,1440,2.9,62.98,60.12,35.47,64.52,15.15,9.49,27.06,29.91,37.75 -mhenrichsen/hestenettetLM (few-shot),7242,32,32768,True,False,1151,2.9,52.52,55.6,48.23,63.53,8.53,6.65,46.89,27.67,14.2 timpal0l/Llama-3-8B-flashback-v1 (few-shot),8030,128,8192,True,False,3004,2.9,59.09,60.02,47.58,58.88,10.52,6.67,49.89,29.17,17.46 +timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,2.9,48.97,51.52,49.05,63.32,14.37,9.96,44.07,25.07,15.56 CohereForAI/c4ai-command-r-v01 (few-shot),34981,256,8192,False,False,1919,2.91,61.54,60.94,35.73,63.77,21.33,13.2,32.36,27.87,30.99 -timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,2.91,48.97,51.52,49.05,63.32,14.37,9.96,44.07,25.07,15.56 -NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,2.92,65.75,70.12,41.9,53.29,47.88,35.66,0.03,46.91,33.14 +NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,2.91,65.75,70.12,41.9,53.29,47.88,35.66,0.03,46.91,33.14 +mhenrichsen/hestenettetLM (few-shot),7242,32,32768,True,False,1151,2.91,52.52,55.6,48.23,63.53,8.53,6.65,46.89,27.67,14.2 mistralai/Mistral-7B-v0.1 (few-shot),7242,32,32768,True,False,1446,2.94,52.0,55.12,47.25,63.49,8.66,6.8,46.86,27.78,10.88 meta-llama/Llama-2-13b-chat-hf (few-shot),13016,32,4096,True,False,2849,2.95,57.21,59.62,38.93,65.12,8.65,5.92,42.32,23.88,22.33 RuterNorway/Llama-2-13b-chat-norwegian (few-shot),-1,32,4096,False,False,3254,2.96,58.61,60.4,41.36,65.33,6.52,3.95,38.93,23.32,22.3 +meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1477,2.96,62.89,56.18,33.07,61.95,30.73,20.57,30.77,30.95,21.98 alpindale/Mistral-7B-v0.2-hf (few-shot),7242,32,32768,True,False,1841,2.98,50.63,52.69,44.05,63.11,11.6,9.26,45.23,28.19,13.65 mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,2.98,50.56,52.65,44.61,63.13,12.1,9.3,45.15,28.31,13.59 CohereForAI/aya-23-8B (few-shot),8028,256,8192,False,False,2707,3.0,60.94,59.61,35.73,62.45,6.18,4.0,46.52,20.14,27.5 Mabeck/Heidrun-Mistral-7B-base (few-shot),7242,32,32768,True,False,3823,3.01,50.1,54.81,48.64,62.29,10.31,1.11,42.2,27.39,11.76 +google/gemma-2-2b-it (few-shot),2614,256,8193,True,False,5374,3.02,35.56,37.7,46.84,64.58,17.15,14.38,29.75,23.02,33.13 mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,3.02,53.42,54.34,38.79,64.43,17.06,11.0,35.74,20.37,21.16 bineric/NorskGPT-Llama3-8b (few-shot),8030,128,8192,False,False,3382,3.03,64.34,59.5,35.6,61.95,31.69,24.58,0.1,32.03,41.45 -google/gemma-2-2b-it (few-shot),2614,256,8193,True,False,5374,3.03,35.56,37.7,46.84,64.58,17.15,14.38,29.75,23.02,33.13 occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,3.04,45.5,45.96,44.46,63.95,0.0,0.0,52.19,20.61,16.18 google/gemma-7b-it (few-shot),8538,256,8192,False,False,1792,3.05,59.77,60.98,28.14,60.86,14.01,10.15,51.08,19.07,16.52 meta-llama/Llama-2-13b-hf (few-shot),13016,32,4096,True,False,2898,3.06,51.12,55.35,23.75,64.56,14.0,7.61,49.24,23.6,16.55 @@ -104,11 +104,11 @@ ibm-granite/granite-8b-code-base-4k (few-shot),8055,49,4096,True,False,2313,3.17 neph1/bellman-7b-mistral-instruct-v0.2 (few-shot),7242,32,32768,False,False,2518,3.18,57.01,56.77,38.81,62.13,14.16,9.29,32.75,17.08,10.52 ibm-granite/granite-3.0-2b-instruct (few-shot),2634,49,4097,True,False,10194,3.19,52.68,53.17,39.87,62.24,12.08,7.18,36.0,15.78,9.98 meta-llama/Llama-3.2-3B-Instruct (few-shot),3213,128,131073,False,False,10424,3.19,49.66,51.98,44.13,60.5,0.67,1.11,28.62,26.82,20.98 -neuralmagic/Sparse-Llama-3.1-8B-2of4 (few-shot),8030,128,131072,True,False,2996,3.19,35.54,36.61,32.18,60.89,0.0,0.0,43.47,24.47,33.51 +neuralmagic/Sparse-Llama-3.1-8B-2of4 (few-shot),8030,128,131072,True,False,2996,3.2,35.54,36.61,32.18,60.89,0.0,0.0,43.47,24.47,33.51 abhishek/autotrain-llama3-orpo-v2 (few-shot),8030,128,8192,False,False,1471,3.21,62.25,59.5,22.79,61.69,8.51,5.1,43.34,17.58,13.68 01-ai/Yi-1.5-6B (few-shot),6061,64,4097,True,False,2867,3.22,46.02,48.72,27.86,60.92,2.41,2.5,44.7,23.93,22.39 ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,3.23,66.91,62.82,40.71,60.59,9.5,6.74,32.83,11.35,6.21 -microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,3194,3.24,56.33,54.68,37.18,61.44,6.76,6.79,30.11,15.54,17.55 +microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,8681,3.25,56.33,54.68,37.18,61.44,6.76,6.79,30.11,15.54,17.55 meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4096,False,False,2643,3.26,44.99,49.09,41.56,63.59,3.04,4.03,33.77,14.81,12.69 emillykkejensen/Phi-3-mini-4k-instruct-dansk (few-shot),3821,32,4096,False,False,1360,3.29,56.41,53.95,42.27,60.58,0.0,0.21,29.35,18.57,13.36 microsoft/Phi-3-mini-128k-instruct (few-shot),3821,32,130819,True,False,7312,3.29,52.18,50.53,33.3,60.69,2.63,4.0,37.08,17.34,17.43 @@ -121,24 +121,24 @@ norallm/normistral-7b-warm-instruct (few-shot),7248,33,2048,True,False,6194,3.33 ibm-granite/granite-3.0-2b-base (few-shot),2534,49,4097,True,False,10187,3.35,43.0,45.08,35.36,62.0,2.79,1.95,37.33,15.76,12.98 "merge-crew/da-sv-dare-ties-density-0.3 (few-shot, val)",7242,32,32768,True,True,2461,3.36,35.98,47.39,38.98,61.99,11.54,5.2,37.54,10.4,2.52 AI-Sweden-Models/gpt-sw3-20b (few-shot),20918,64,2048,True,False,1875,3.37,30.82,39.56,34.5,63.1,15.17,12.46,42.81,4.51,5.27 -tollefj/nordavind-7b-instruct-warm (few-shot),7248,33,2048,False,False,6450,3.38,38.82,43.28,38.05,64.04,8.45,7.5,40.47,2.6,3.83 "claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,3.39,69.39,62.76,3.97,62.53,31.65,5.86,36.65,16.62,2.63 -NorwAI/NorwAI-Llama2-7B (few-shot),7033,68,4096,True,False,4438,3.4,31.45,33.85,36.06,63.06,8.34,6.84,48.31,3.28,1.87 +tollefj/nordavind-7b-instruct-warm (few-shot),7248,33,2048,False,False,6450,3.39,38.82,43.28,38.05,64.04,8.45,7.5,40.47,2.6,3.83 +NorwAI/NorwAI-Llama2-7B (few-shot),7033,68,4096,True,False,4438,3.41,31.45,33.85,36.06,63.06,8.34,6.84,48.31,3.28,1.87 allenai/OLMo-1.7-7B-hf (few-shot),6888,50,4096,True,False,3371,3.42,42.78,42.85,36.68,59.58,2.39,1.91,39.16,13.41,7.83 Qwen/Qwen1.5-4B (few-shot),3950,152,32768,True,False,3248,3.44,32.12,36.86,36.97,57.64,5.27,1.4,40.0,16.5,13.27 TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot),7800,100,4096,True,False,6197,3.44,42.77,45.69,37.79,61.05,8.77,8.47,44.24,-1.34,-0.94 AI-Sweden-Models/gpt-sw3-6.7b-v2-instruct (few-shot),7111,64,2048,True,False,1473,3.45,24.67,29.03,34.39,64.07,2.42,5.11,42.52,6.89,12.81 -AI-Sweden-Models/gpt-sw3-40b (few-shot),39927,64,1795,True,False,409,3.46,24.07,26.67,31.05,61.58,10.8,8.89,48.78,6.67,6.25 +AI-Sweden-Models/gpt-sw3-40b (few-shot),39927,64,1795,True,False,409,3.47,24.07,26.67,31.05,61.58,10.8,8.89,48.78,6.67,6.25 AI-Sweden-Models/gpt-sw3-6.7b-v2 (few-shot),7111,64,2048,True,False,2351,3.47,29.62,32.3,34.67,61.61,8.37,7.76,44.62,3.03,5.57 google/gemma-2-2b (few-shot),2614,256,8193,True,False,5235,3.51,20.47,24.18,32.61,60.17,3.22,3.91,41.16,19.03,7.35 timpal0l/Mistral-7B-v0.1-flashback-v2-instruct (few-shot),7242,32,32768,False,False,5172,3.52,50.34,52.06,32.19,58.71,-0.22,0.0,20.57,22.27,11.71 ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,3.53,44.89,48.08,32.29,59.77,7.49,4.65,26.37,11.54,3.42 -LumiOpen/Viking-33B@1000B (few-shot),33119,131,4099,True,False,2080,3.56,40.4,44.45,40.79,56.55,5.91,2.98,37.75,1.16,-0.29 +LumiOpen/Viking-33B@1000B (few-shot),33119,131,4099,True,False,2080,3.57,40.4,44.45,40.79,56.55,5.91,2.98,37.75,1.16,-0.29 norallm/normistral-7b-warm (few-shot),7248,33,2048,True,False,3175,3.58,42.29,46.29,27.05,62.81,1.63,2.57,39.18,1.5,-0.05 AI-Sweden-Models/gpt-sw3-1.3b-instruct (few-shot),1445,64,2048,True,False,4544,3.59,33.08,38.28,35.58,63.11,0.82,1.43,36.06,-0.68,-0.32 ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,3.62,40.08,43.96,31.9,59.98,-0.07,1.27,23.32,11.78,5.48 ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,3.62,53.78,55.14,26.21,57.11,3.9,2.42,24.86,10.36,5.85 -MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,3.63,36.96,39.38,32.67,51.44,2.18,5.33,45.23,9.35,4.85 +MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,3.64,36.96,39.38,32.67,51.44,2.18,5.33,45.23,9.35,4.85 openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,3.67,37.36,42.83,16.02,61.93,-0.08,2.29,31.6,8.67,9.8 openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.69,34.78,39.0,10.69,62.73,6.17,5.9,31.25,7.97,10.39 ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,3.72,37.73,40.07,21.5,59.05,0.86,2.01,27.03,9.41,7.04 @@ -159,38 +159,39 @@ ibm-granite/granite-7b-base (few-shot),6738,32,2048,True,False,4405,3.9,32.21,36 mhenrichsen/danskgpt-tiny-chat (few-shot),1100,32,2048,False,False,1745,3.9,28.74,30.34,27.49,60.01,-2.17,0.26,19.1,3.21,0.18 AI-Sweden-Models/gpt-sw3-356m (few-shot),471,64,2048,True,False,5758,3.95,27.37,31.22,34.21,54.28,0.92,1.25,18.52,0.33,0.11 HPLT/gpt-13b-nordic-prerelease (few-shot),14030,131,4099,True,False,3520,3.95,28.94,33.83,27.32,54.05,1.46,-0.59,25.62,0.32,0.92 -HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,3.97,26.7,28.23,23.25,56.31,-0.47,0.26,13.4,11.12,2.53 -meta-llama/Llama-3.2-1B (few-shot),1236,128,131073,True,False,7577,3.97,30.54,31.34,29.5,53.31,-0.13,0.02,19.59,2.49,2.53 +HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,3.98,26.7,28.23,23.25,56.31,-0.47,0.26,13.4,11.12,2.53 ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,3.98,53.93,54.04,23.83,50.59,3.91,1.55,2.37,8.68,6.19 +meta-llama/Llama-3.2-1B (few-shot),1236,128,131073,True,False,7577,3.98,30.54,31.34,29.5,53.31,-0.13,0.02,19.59,2.49,2.53 HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,4.01,37.6,38.38,24.05,48.55,3.56,2.61,13.58,9.52,3.62 Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,4.05,26.99,25.74,19.85,55.08,1.96,-0.01,16.33,7.79,5.61 NbAiLab/nb-llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1296,4.06,31.65,31.54,10.64,60.02,1.81,1.72,16.32,0.61,-1.4 state-spaces/mamba-2.8b-hf (few-shot),2768,50,32769,True,False,2722,4.06,26.9,34.59,31.06,53.77,0.21,-0.17,10.35,-0.16,1.32 Qwen/Qwen1.5-1.8B (few-shot),1837,152,32768,True,False,5666,4.09,12.1,13.42,22.82,54.48,2.7,2.21,16.31,9.57,6.02 ibm-granite/granite-3.0-1b-a400m-instruct (few-shot),1335,49,4096,True,False,7964,4.1,36.99,37.27,19.55,56.69,1.95,2.31,7.33,-0.29,0.99 -ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,4.15,31.16,29.73,17.59,55.43,1.07,1.59,6.92,0.05,-0.1 -mhenrichsen/danskgpt-tiny (few-shot),1100,32,2048,True,False,8597,4.17,27.37,27.59,18.09,56.77,-0.19,-0.8,5.84,-0.5,0.07 +ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,4.16,31.16,29.73,17.59,55.43,1.07,1.59,6.92,0.05,-0.1 +mhenrichsen/danskgpt-tiny (few-shot),1100,32,2048,True,False,8597,4.16,27.37,27.59,18.09,56.77,-0.19,-0.8,5.84,-0.5,0.07 RuterNorway/Llama-2-7b-chat-norwegian (few-shot),-1,32,4096,False,False,10890,4.18,21.04,18.71,12.22,53.49,-1.18,0.36,26.86,0.21,-0.3 HPLT/gpt-7b-nordic-prerelease (few-shot),7550,131,4096,True,False,1382,4.19,20.25,28.99,17.44,49.59,3.2,2.61,21.5,0.86,0.0 PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,4.2,28.82,27.81,18.74,53.53,-0.46,-0.84,12.66,-1.29,1.29 -PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,4.22,38.96,40.42,19.42,49.86,-0.13,0.77,4.7,-0.21,-0.56 AI-Sweden-Models/gpt-sw3-126m-instruct (few-shot),186,64,2048,True,False,7717,4.23,27.66,30.88,5.13,58.91,0.0,0.0,7.55,-0.68,0.32 +PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,4.23,38.96,40.42,19.42,49.86,-0.13,0.77,4.7,-0.21,-0.56 +tiiuae/Falcon3-1B-Instruct (few-shot),1669,131,8192,True,False,9270,4.23,29.25,25.45,11.28,55.58,1.52,0.52,8.47,2.62,-0.06 PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,4.31,27.47,23.82,22.22,49.84,-2.06,-0.77,2.48,0.39,-1.07 Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,4.32,34.46,33.41,6.31,49.88,-1.59,0.61,5.95,2.81,2.92 allenai/OLMo-7B (few-shot),6888,50,2051,True,False,5403,4.34,34.42,35.17,21.46,45.34,0.34,0.26,0.12,2.61,0.96 HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.39,24.37,24.69,8.84,53.61,-1.2,-0.5,0.16,-0.81,-0.71 HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.47,20.89,19.62,2.78,53.93,-0.98,0.93,0.15,-0.48,-0.53 -HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.48,26.6,23.7,6.21,48.59,-0.39,0.21,4.65,-1.13,-0.51 -HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.49,20.37,21.27,7.6,49.27,1.31,0.51,4.8,-0.9,-1.0 -NbAiLab/nb-llama-3.2-1B (few-shot),1236,128,131072,True,False,3424,4.5,10.6,22.63,19.76,44.3,2.8,0.17,3.99,0.29,0.57 +HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.49,26.6,23.7,6.21,48.59,-0.39,0.21,4.65,-1.13,-0.51 +NbAiLab/nb-llama-3.2-1B (few-shot),1236,128,131072,True,False,3424,4.49,10.6,22.63,19.76,44.3,2.8,0.17,3.99,0.29,0.57 +HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.5,20.37,21.27,7.6,49.27,1.31,0.51,4.8,-0.9,-1.0 NbAiLab/nb-llama-3.2-3B (few-shot),3213,128,131072,True,False,1880,4.5,2.73,0.26,15.1,45.63,13.58,7.78,0.15,11.57,1.19 -AI-Sweden-Models/gpt-sw3-126m (few-shot),186,64,2048,True,False,8958,4.52,13.55,9.38,7.78,51.68,-1.46,-2.97,2.32,0.39,-0.8 allenai/OLMo-7B-Twin-2T (few-shot),6888,50,2051,True,False,5484,4.52,9.06,17.16,25.52,40.94,0.68,0.17,0.46,2.43,2.35 +AI-Sweden-Models/gpt-sw3-126m (few-shot),186,64,2048,True,False,8958,4.53,13.55,9.38,7.78,51.68,-1.46,-2.97,2.32,0.39,-0.8 NorwAI/NorwAI-Mistral-7B-pretrain (few-shot),7537,68,4096,True,False,3024,4.53,12.77,10.51,8.7,49.76,0.0,0.82,1.85,-2.42,1.23 -PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.53,26.59,26.78,7.91,45.83,0.28,0.04,0.65,0.57,-0.66 +PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.54,26.59,26.78,7.91,45.83,0.28,0.04,0.65,0.57,-0.66 NbAiLab/nb-gpt-j-6B-v2 (few-shot),6051,50,1024,False,False,2556,4.57,5.29,6.77,20.84,44.23,0.45,0.48,2.43,0.17,-0.49 -RJuro/kanelsnegl-v0.1 (few-shot),7242,32,512,True,False,5847,4.57,0.0,0.0,0.95,59.32,0.0,0.0,0.0,0.18,0.3 RJuro/kanelsnegl-v0.2 (few-shot),7242,32,512,True,False,1373,4.57,0.0,0.0,1.27,59.1,0.0,0.0,0.0,0.83,0.09 +RJuro/kanelsnegl-v0.1 (few-shot),7242,32,512,True,False,5847,4.58,0.0,0.0,0.95,59.32,0.0,0.0,0.0,0.18,0.3 allenai/OLMo-1B (few-shot),1177,50,2051,True,False,8536,4.6,30.79,31.12,9.95,40.45,-0.95,-0.04,0.0,0.32,0.12 PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.62,25.02,21.59,8.05,43.81,-0.15,-0.97,0.37,-0.31,-0.61 NbAiLab/nb-gpt-j-6B@sharded (few-shot),-1,50,1024,True,False,2630,4.76,0.22,0.24,20.64,38.55,-0.99,-0.15,0.53,0.63,-0.09 diff --git a/norwegian-nlg.md b/norwegian-nlg.md index 5158fa28..c869f6a9 100644 --- a/norwegian-nlg.md +++ b/norwegian-nlg.md @@ -3,7 +3,7 @@ layout: leaderboard title: Norwegian NLG 🇳🇴 --- -
Last updated: 10/01/2025 12:30:15 CET
+
Last updated: 11/01/2025 11:03:25 CET
@@ -105,7 +105,7 @@ title: Norwegian NLG 🇳🇴 128000 True 576 ± 221 / 81 ± 28 - 1.39 + 1.40 77.48 ± 2.32 / 55.87 ± 2.83 78.70 ± 2.78 / 57.58 ± 4.23 62.55 ± 2.82 / 72.41 ± 2.42 @@ -186,7 +186,7 @@ title: Norwegian NLG 🇳🇴 8193 True 1,516 ± 257 / 480 ± 148 - 1.62 + 1.63 67.35 ± 2.33 / 56.75 ± 3.04 66.61 ± 1.81 / 57.24 ± 4.35 67.14 ± 1.22 / 78.63 ± 0.96 @@ -206,6 +206,33 @@ title: Norwegian NLG 🇳🇴 13.0.0 13.0.0 + + meta-llama/Llama-3.1-70B-Instruct (few-shot) + 70554 + 128 + 131072 + True + 1,409 ± 457 / 186 ± 63 + 1.77 + 80.05 ± 0.87 / 67.79 ± 1.90 + 80.67 ± 0.92 / 69.11 ± 3.29 + 40.65 ± 0.45 / 40.56 ± 0.29 + 66.56 ± 0.47 / 19.70 ± 1.10 + 56.42 ± 1.70 / 76.10 ± 1.29 + 38.21 ± 3.25 / 64.61 ± 2.89 + 49.22 ± 2.20 / 74.92 ± 1.35 + 65.19 ± 0.76 / 73.71 ± 0.58 + 69.47 ± 1.66 / 76.67 ± 1.30 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + 14.0.3 + gpt-4o-2024-05-13 (zero-shot, val) unknown @@ -234,22 +261,22 @@ title: Norwegian NLG 🇳🇴 14.0.3 - meta-llama/Llama-3.1-70B-Instruct (few-shot) + meta-llama/Llama-3.3-70B-Instruct (few-shot) 70554 128 131072 True - 1,409 ± 457 / 186 ± 63 + 1,353 ± 443 / 180 ± 61 1.78 - 80.05 ± 0.87 / 67.79 ± 1.90 - 80.67 ± 0.92 / 69.11 ± 3.29 - 40.65 ± 0.45 / 40.56 ± 0.29 - 66.56 ± 0.47 / 19.70 ± 1.10 - 56.42 ± 1.70 / 76.10 ± 1.29 - 38.21 ± 3.25 / 64.61 ± 2.89 - 49.22 ± 2.20 / 74.92 ± 1.35 - 65.19 ± 0.76 / 73.71 ± 0.58 - 69.47 ± 1.66 / 76.67 ± 1.30 + 79.84 ± 1.02 / 67.96 ± 2.10 + 79.93 ± 0.81 / 68.93 ± 2.65 + 41.11 ± 0.41 / 40.48 ± 0.38 + 65.38 ± 0.46 / 16.49 ± 1.02 + 57.84 ± 1.77 / 77.97 ± 1.19 + 43.52 ± 2.51 / 69.59 ± 2.20 + 40.92 ± 1.45 / 69.52 ± 1.07 + 66.69 ± 0.64 / 74.99 ± 0.47 + 75.66 ± 1.08 / 81.60 ± 0.80 14.0.3 14.0.3 14.0.3 @@ -287,33 +314,6 @@ title: Norwegian NLG 🇳🇴 12.9.1 12.9.1 - - meta-llama/Llama-3.3-70B-Instruct (few-shot) - 70554 - 128 - 131072 - True - 1,353 ± 443 / 180 ± 61 - 1.79 - 79.84 ± 1.02 / 67.96 ± 2.10 - 79.93 ± 0.81 / 68.93 ± 2.65 - 41.11 ± 0.41 / 40.48 ± 0.38 - 65.38 ± 0.46 / 16.49 ± 1.02 - 57.84 ± 1.77 / 77.97 ± 1.19 - 43.52 ± 2.51 / 69.59 ± 2.20 - 40.92 ± 1.45 / 69.52 ± 1.07 - 66.69 ± 0.64 / 74.99 ± 0.47 - 75.66 ± 1.08 / 81.60 ± 0.80 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - 14.0.3 - meta-llama/Meta-Llama-3-70B-Instruct (few-shot, val) 70554 @@ -422,6 +422,33 @@ title: Norwegian NLG 🇳🇴 13.0.0 13.0.0 + + gpt-4o-mini-2024-07-18 (few-shot, val) + unknown + 200 + 8191 + True + 784 ± 310 / 95 ± 28 + 1.92 + 72.74 ± 1.75 / 57.58 ± 4.14 + 69.17 ± 2.61 / 56.34 ± 3.95 + 67.45 ± 4.03 / 77.50 ± 2.90 + 63.00 ± 0.19 / 11.59 ± 0.22 + 74.27 ± 2.57 / 86.86 ± 1.43 + 54.83 ± 3.96 / 76.91 ± 1.98 + 3.67 ± 1.64 / 43.45 ± 1.90 + 59.93 ± 2.64 / 69.69 ± 1.97 + 76.01 ± 1.56 / 81.33 ± 1.30 + 14.0.0 + 14.0.0 + 14.0.0 + 14.0.1 + 14.0.0 + 14.0.0 + 14.0.0 + 14.0.1 + 14.0.1 + ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot) 28411 @@ -449,33 +476,6 @@ title: Norwegian NLG 🇳🇴 14.0.4 14.0.4 - - gpt-4o-mini-2024-07-18 (few-shot, val) - unknown - 200 - 8191 - True - 784 ± 310 / 95 ± 28 - 1.93 - 72.74 ± 1.75 / 57.58 ± 4.14 - 69.17 ± 2.61 / 56.34 ± 3.95 - 67.45 ± 4.03 / 77.50 ± 2.90 - 63.00 ± 0.19 / 11.59 ± 0.22 - 74.27 ± 2.57 / 86.86 ± 1.43 - 54.83 ± 3.96 / 76.91 ± 1.98 - 3.67 ± 1.64 / 43.45 ± 1.90 - 59.93 ± 2.64 / 69.69 ± 1.97 - 76.01 ± 1.56 / 81.33 ± 1.30 - 14.0.0 - 14.0.0 - 14.0.0 - 14.0.1 - 14.0.0 - 14.0.0 - 14.0.0 - 14.0.1 - 14.0.1 - gpt-3.5-turbo-0613 (few-shot, val) unknown @@ -645,7 +645,7 @@ title: Norwegian NLG 🇳🇴 4096 False 3,701 ± 876 / 771 ± 247 - 2.15 + 2.14 65.14 ± 1.62 / 52.85 ± 3.03 65.88 ± 1.53 / 52.89 ± 2.29 57.06 ± 1.54 / 71.09 ± 1.17 @@ -665,33 +665,6 @@ title: Norwegian NLG 🇳🇴 12.7.0 12.7.0 - - mistralai/Mistral-Nemo-Instruct-2407 (few-shot) - 12248 - 131 - 1024001 - True - 7,095 ± 2,193 / 1,063 ± 344 - 2.20 - 70.14 ± 1.60 / 51.03 ± 3.02 - 68.74 ± 1.21 / 51.48 ± 2.96 - 60.64 ± 2.52 / 73.91 ± 1.86 - 64.61 ± 0.40 / 16.10 ± 0.86 - 35.59 ± 2.29 / 61.96 ± 3.72 - 29.22 ± 2.86 / 60.98 ± 4.02 - 49.87 ± 4.84 / 76.01 ± 3.10 - 39.07 ± 0.84 / 53.89 ± 0.69 - 37.95 ± 3.53 / 51.35 ± 3.17 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - gpt-4o-mini-2024-07-18 (zero-shot, val) unknown @@ -719,6 +692,33 @@ title: Norwegian NLG 🇳🇴 14.0.3 14.0.3 + + mistralai/Mistral-Nemo-Instruct-2407 (few-shot) + 12248 + 131 + 1024001 + True + 7,095 ± 2,193 / 1,063 ± 344 + 2.22 + 70.14 ± 1.60 / 51.03 ± 3.02 + 68.74 ± 1.21 / 51.48 ± 2.96 + 60.64 ± 2.52 / 73.91 ± 1.86 + 64.61 ± 0.40 / 16.10 ± 0.86 + 35.59 ± 2.29 / 61.96 ± 3.72 + 29.22 ± 2.86 / 60.98 ± 4.02 + 49.87 ± 4.84 / 76.01 ± 3.10 + 39.07 ± 0.84 / 53.89 ± 0.69 + 37.95 ± 3.53 / 51.35 ± 3.17 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot) 341029 @@ -834,7 +834,7 @@ title: Norwegian NLG 🇳🇴 200000 True 193 ± 87 / 55 ± 19 - 2.39 + 2.38 74.23 ± 1.86 / 70.66 ± 2.97 70.50 ± 2.19 / 65.71 ± 2.34 50.92 ± 2.29 / 63.00 ± 1.71 @@ -861,7 +861,7 @@ title: Norwegian NLG 🇳🇴 32768 False 2,903 ± 407 / 1,157 ± 350 - 2.44 + 2.45 63.33 ± 2.69 / 53.24 ± 3.41 68.84 ± 1.87 / 53.85 ± 3.78 58.28 ± 3.11 / 69.79 ± 2.39 @@ -888,7 +888,7 @@ title: Norwegian NLG 🇳🇴 32768 False 2,856 ± 391 / 1,142 ± 342 - 2.44 + 2.45 63.33 ± 2.69 / 53.24 ± 3.41 68.84 ± 1.87 / 53.85 ± 3.78 58.28 ± 3.11 / 69.79 ± 2.39 @@ -915,7 +915,7 @@ title: Norwegian NLG 🇳🇴 32768 True 9,015 ± 2,966 / 1,121 ± 510 - 2.45 + 2.46 58.05 ± 2.37 / 42.27 ± 2.34 59.65 ± 1.56 / 42.70 ± 2.35 57.94 ± 2.51 / 71.75 ± 2.43 @@ -935,6 +935,33 @@ title: Norwegian NLG 🇳🇴 14.0.4 14.0.4 + + skole-gpt-mixtral (few-shot) + unknown + 32 + 32768 + False + 3,583 ± 977 / 686 ± 231 + 2.49 + 62.52 ± 2.14 / 49.42 ± 3.83 + 61.55 ± 1.68 / 47.53 ± 3.65 + 52.09 ± 3.90 / 65.72 ± 4.14 + 64.70 ± 0.79 / 17.10 ± 1.35 + 21.99 ± 2.17 / 58.21 ± 2.38 + 16.84 ± 2.07 / 51.76 ± 4.30 + 47.30 ± 4.17 / 72.60 ± 3.24 + 39.99 ± 1.01 / 54.90 ± 0.76 + 32.16 ± 1.91 / 48.12 ± 1.74 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + RJuro/munin-neuralbeagle-7b (few-shot, val) 7242 @@ -962,33 +989,6 @@ title: Norwegian NLG 🇳🇴 9.3.2 9.3.2 - - skole-gpt-mixtral (few-shot) - unknown - 32 - 32768 - False - 3,583 ± 977 / 686 ± 231 - 2.50 - 62.52 ± 2.14 / 49.42 ± 3.83 - 61.55 ± 1.68 / 47.53 ± 3.65 - 52.09 ± 3.90 / 65.72 ± 4.14 - 64.70 ± 0.79 / 17.10 ± 1.35 - 21.99 ± 2.17 / 58.21 ± 2.38 - 16.84 ± 2.07 / 51.76 ± 4.30 - 47.30 ± 4.17 / 72.60 ± 3.24 - 39.99 ± 1.01 / 54.90 ± 0.76 - 32.16 ± 1.91 / 48.12 ± 1.74 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - CohereForAI/c4ai-command-r-08-2024 (few-shot) 32296 @@ -1097,33 +1097,6 @@ title: Norwegian NLG 🇳🇴 14.0.4 14.0.4 - - meta-llama/Llama-2-70b-chat-hf (few-shot, val) - 68977 - 32 - 3843 - True - 1,979 ± 621 / 320 ± 105 - 2.60 - 60.21 ± 1.86 / 47.06 ± 3.08 - 62.99 ± 2.66 / 48.82 ± 5.49 - 55.12 ± 5.10 / 66.55 ± 5.07 - 65.11 ± 0.66 / 17.82 ± 1.21 - 27.12 ± 4.90 / 54.26 ± 6.80 - 6.82 ± 5.06 / 46.18 ± 4.14 - 38.50 ± 3.93 / 69.99 ± 2.23 - 32.30 ± 3.42 / 48.32 ± 2.61 - 34.43 ± 2.91 / 49.65 ± 2.23 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - timpal0l/dolphin-2.9-llama3-8b-flashback (few-shot, val) 8030 @@ -1131,7 +1104,7 @@ title: Norwegian NLG 🇳🇴 8192 False 5,018 ± 1,216 / 996 ± 324 - 2.60 + 2.59 64.51 ± 3.28 / 51.06 ± 4.78 65.66 ± 3.82 / 53.90 ± 4.32 52.90 ± 4.31 / 65.38 ± 3.73 @@ -1152,85 +1125,31 @@ title: Norwegian NLG 🇳🇴 12.7.0 - meta-llama/Llama-3.1-8B-Instruct (few-shot) - 8030 - 128 - 131073 - True - 1,005 ± 330 / 196 ± 74 - 2.62 - 64.55 ± 1.69 / 56.81 ± 2.50 - 66.44 ± 1.38 / 60.02 ± 3.36 - 35.17 ± 0.32 / 38.11 ± 0.29 - 64.48 ± 0.14 / 15.14 ± 0.38 - 27.41 ± 1.97 / 54.94 ± 2.06 - 15.60 ± 2.05 / 46.51 ± 2.41 - 43.11 ± 2.22 / 69.74 ± 1.60 - 38.10 ± 0.57 / 52.64 ± 0.47 - 39.30 ± 1.01 / 54.03 ± 0.82 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 13.0.0 - 13.0.0 - - - senseable/WestLake-7B-v2 (few-shot) - 7242 + meta-llama/Llama-2-70b-chat-hf (few-shot, val) + 68977 32 - 32768 - False - 5,993 ± 1,028 / 1,742 ± 561 - 2.62 - 64.37 ± 2.17 / 52.81 ± 2.48 - 62.77 ± 0.83 / 51.80 ± 2.77 - 50.60 ± 4.90 / 66.76 ± 3.04 - 65.09 ± 0.31 / 17.27 ± 0.66 - 18.09 ± 2.04 / 52.56 ± 2.60 - 12.25 ± 2.18 / 50.79 ± 2.42 - 38.34 ± 2.39 / 69.54 ± 1.96 - 27.33 ± 0.72 / 45.16 ± 0.55 - 41.59 ± 2.61 / 56.02 ± 2.08 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - - - meta-llama/Meta-Llama-3-8B-Instruct (few-shot) - 8030 - 128 - 8192 + 3843 True - 1,007 ± 316 / 162 ± 45 - 2.64 - 74.47 ± 1.47 / 65.57 ± 2.39 - 72.93 ± 1.00 / 65.44 ± 2.55 - 34.44 ± 0.42 / 37.94 ± 0.39 - 63.98 ± 0.50 / 14.75 ± 0.79 - 27.77 ± 1.63 / 61.75 ± 1.77 - 20.35 ± 1.92 / 57.74 ± 2.28 - 42.90 ± 3.57 / 69.90 ± 3.17 - 33.44 ± 0.67 / 48.76 ± 0.58 - 30.91 ± 1.88 / 45.85 ± 1.93 - 12.6.1 - 12.6.1 - 14.0.4 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 + 1,979 ± 621 / 320 ± 105 + 2.61 + 60.21 ± 1.86 / 47.06 ± 3.08 + 62.99 ± 2.66 / 48.82 ± 5.49 + 55.12 ± 5.10 / 66.55 ± 5.07 + 65.11 ± 0.66 / 17.82 ± 1.21 + 27.12 ± 4.90 / 54.26 ± 6.80 + 6.82 ± 5.06 / 46.18 ± 4.14 + 38.50 ± 3.93 / 69.99 ± 2.23 + 32.30 ± 3.42 / 48.32 ± 2.61 + 34.43 ± 2.91 / 49.65 ± 2.23 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 mlabonne/NeuralBeagle14-7B (few-shot, val) @@ -1239,7 +1158,7 @@ title: Norwegian NLG 🇳🇴 8192 False 2,549 ± 472 / 784 ± 245 - 2.65 + 2.63 62.47 ± 2.56 / 57.71 ± 3.02 66.69 ± 2.91 / 58.83 ± 3.70 54.04 ± 2.91 / 66.46 ± 2.59 @@ -1259,6 +1178,33 @@ title: Norwegian NLG 🇳🇴 9.3.2 9.3.2 + + senseable/WestLake-7B-v2 (few-shot) + 7242 + 32 + 32768 + False + 5,993 ± 1,028 / 1,742 ± 561 + 2.63 + 64.37 ± 2.17 / 52.81 ± 2.48 + 62.77 ± 0.83 / 51.80 ± 2.77 + 50.60 ± 4.90 / 66.76 ± 3.04 + 65.09 ± 0.31 / 17.27 ± 0.66 + 18.09 ± 2.04 / 52.56 ± 2.60 + 12.25 ± 2.18 / 50.79 ± 2.42 + 38.34 ± 2.39 / 69.54 ± 1.96 + 27.33 ± 0.72 / 45.16 ± 0.55 + 41.59 ± 2.61 / 56.02 ± 2.08 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + 12.6.1 + CohereForAI/aya-expanse-8b (few-shot) 8028 @@ -1266,7 +1212,7 @@ title: Norwegian NLG 🇳🇴 8192 False 2,686 ± 685 / 491 ± 164 - 2.66 + 2.65 66.55 ± 2.12 / 39.28 ± 3.45 63.63 ± 1.62 / 37.25 ± 3.49 38.61 ± 2.28 / 51.46 ± 2.62 @@ -1286,6 +1232,33 @@ title: Norwegian NLG 🇳🇴 13.0.0 13.0.0 + + meta-llama/Llama-3.1-8B-Instruct (few-shot) + 8030 + 128 + 131072 + True + 1,473 ± 377 / 283 ± 96 + 2.65 + 64.55 ± 1.69 / 56.81 ± 2.50 + 66.44 ± 1.38 / 60.02 ± 3.36 + 35.17 ± 0.32 / 38.11 ± 0.29 + 64.48 ± 0.14 / 15.14 ± 0.38 + 27.41 ± 1.97 / 54.94 ± 2.06 + 15.60 ± 2.05 / 46.51 ± 2.41 + 43.11 ± 2.22 / 69.74 ± 1.60 + 35.02 ± 1.24 / 51.13 ± 0.94 + 37.61 ± 0.88 / 53.12 ± 0.65 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + utter-project/EuroLLM-9B-Instruct (few-shot) 9152 @@ -1293,7 +1266,7 @@ title: Norwegian NLG 🇳🇴 4096 False 1,483 ± 321 / 379 ± 158 - 2.66 + 2.67 40.91 ± 2.56 / 33.22 ± 1.88 42.91 ± 2.34 / 35.61 ± 2.22 52.62 ± 2.01 / 68.10 ± 2.25 @@ -1313,6 +1286,33 @@ title: Norwegian NLG 🇳🇴 13.1.0 13.1.0 + + meta-llama/Meta-Llama-3-8B-Instruct (few-shot) + 8030 + 128 + 8192 + True + 1,483 ± 377 / 287 ± 97 + 2.68 + 66.56 ± 1.70 / 58.47 ± 2.40 + 68.29 ± 1.47 / 61.22 ± 2.25 + 34.47 ± 0.44 / 37.96 ± 0.40 + 63.80 ± 0.09 / 13.37 ± 0.15 + 28.22 ± 1.37 / 59.51 ± 1.44 + 18.21 ± 2.21 / 52.84 ± 2.30 + 47.34 ± 1.99 / 73.23 ± 1.29 + 30.78 ± 0.81 / 47.33 ± 0.75 + 31.49 ± 1.29 / 48.15 ± 0.99 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + mistralai/Mixtral-8x7B-v0.1 (few-shot) 46703 @@ -1340,32 +1340,32 @@ title: Norwegian NLG 🇳🇴 14.0.4 14.0.4 - - Mabeck/Heidrun-Mistral-7B-chat (few-shot) + + birgermoell/BeagleCatMunin-Flashback-Bellman (few-shot, val) 7242 32 32768 False - 1,419 ± 349 / 286 ± 97 + 2,890 ± 401 / 1,155 ± 348 2.69 - 61.41 ± 1.71 / 52.32 ± 2.63 - 59.49 ± 1.26 / 49.45 ± 3.31 - 49.19 ± 1.64 / 63.36 ± 1.52 - 64.22 ± 0.52 / 16.72 ± 0.66 - 15.17 ± 2.64 / 50.25 ± 4.51 - 10.78 ± 1.99 / 50.08 ± 4.20 - 48.99 ± 2.91 / 73.08 ± 2.26 - 27.64 ± 1.39 / 45.78 ± 1.03 - 25.74 ± 1.87 / 43.95 ± 1.58 - 10.0.1 - 10.0.1 - 10.0.1 - 12.5.0 - 10.0.1 - 10.0.1 - 12.5.0 - 10.0.1 - 10.0.1 + 53.96 ± 3.37 / 49.84 ± 3.30 + 63.45 ± 2.27 / 53.13 ± 3.43 + 52.70 ± 4.58 / 66.82 ± 3.41 + 65.23 ± 0.55 / 18.64 ± 0.86 + 14.87 ± 3.37 / 40.83 ± 1.91 + 2.48 ± 3.31 / 35.61 ± 1.83 + 41.43 ± 3.34 / 67.26 ± 2.73 + 27.42 ± 2.13 / 45.20 ± 1.58 + 36.05 ± 3.95 / 51.68 ± 2.96 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 12.5.2 + 9.3.1 + 9.3.1 mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot) @@ -1394,33 +1394,6 @@ title: Norwegian NLG 🇳🇴 14.0.4 14.0.4 - - birgermoell/BeagleCatMunin-Flashback-Bellman (few-shot, val) - 7242 - 32 - 32768 - False - 2,890 ± 401 / 1,155 ± 348 - 2.70 - 53.96 ± 3.37 / 49.84 ± 3.30 - 63.45 ± 2.27 / 53.13 ± 3.43 - 52.70 ± 4.58 / 66.82 ± 3.41 - 65.23 ± 0.55 / 18.64 ± 0.86 - 14.87 ± 3.37 / 40.83 ± 1.91 - 2.48 ± 3.31 / 35.61 ± 1.83 - 41.43 ± 3.34 / 67.26 ± 2.73 - 27.42 ± 2.13 / 45.20 ± 1.58 - 36.05 ± 3.95 / 51.68 ± 2.96 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 12.5.2 - 9.3.1 - 9.3.1 - timpal0l/BeagleCatMunin (few-shot, val) 7242 @@ -1428,7 +1401,7 @@ title: Norwegian NLG 🇳🇴 32768 False 2,495 ± 458 / 775 ± 244 - 2.70 + 2.69 54.04 ± 2.86 / 48.50 ± 2.85 62.21 ± 3.31 / 50.38 ± 4.32 54.74 ± 3.71 / 67.81 ± 2.80 @@ -1449,31 +1422,58 @@ title: Norwegian NLG 🇳🇴 9.3.2 - meta-llama/Meta-Llama-3-8B (few-shot) - 8030 - 128 + Mabeck/Heidrun-Mistral-7B-chat (few-shot) + 7242 + 32 + 32768 + False + 1,419 ± 349 / 286 ± 97 + 2.70 + 61.41 ± 1.71 / 52.32 ± 2.63 + 59.49 ± 1.26 / 49.45 ± 3.31 + 49.19 ± 1.64 / 63.36 ± 1.52 + 64.22 ± 0.52 / 16.72 ± 0.66 + 15.17 ± 2.64 / 50.25 ± 4.51 + 10.78 ± 1.99 / 50.08 ± 4.20 + 48.99 ± 2.91 / 73.08 ± 2.26 + 27.64 ± 1.39 / 45.78 ± 1.03 + 25.74 ± 1.87 / 43.95 ± 1.58 + 10.0.1 + 10.0.1 + 10.0.1 + 12.5.0 + 10.0.1 + 10.0.1 + 12.5.0 + 10.0.1 + 10.0.1 + + + mlabonne/AlphaMonarch-7B (few-shot, val) + 7242 + 32 8192 - True - 1,335 ± 338 / 260 ± 88 + False + 5,340 ± 1,262 / 1,157 ± 375 2.71 - 61.48 ± 1.83 / 47.65 ± 2.94 - 61.58 ± 2.21 / 50.10 ± 2.68 - 32.94 ± 0.86 / 37.52 ± 0.43 - 63.38 ± 1.15 / 15.74 ± 1.68 - 21.20 ± 6.57 / 52.29 ± 7.43 - 19.65 ± 4.32 / 56.66 ± 4.40 - 53.35 ± 4.33 / 74.98 ± 3.70 - 33.02 ± 1.35 / 49.25 ± 1.04 - 24.93 ± 3.13 / 42.47 ± 2.74 - 12.6.1 - 12.6.1 - 14.1.2 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 - 12.6.1 + 61.90 ± 2.57 / 57.16 ± 2.81 + 66.92 ± 2.52 / 57.81 ± 3.54 + 48.80 ± 4.56 / 63.38 ± 3.06 + 64.72 ± 0.39 / 17.40 ± 0.60 + 19.53 ± 5.49 / 51.96 ± 4.90 + 9.83 ± 4.57 / 47.95 ± 2.22 + 30.27 ± 2.28 / 62.04 ± 2.19 + 28.18 ± 1.89 / 45.23 ± 1.44 + 36.20 ± 3.97 / 50.74 ± 3.38 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 + 12.5.2 nvidia/mistral-nemo-minitron-8b-base (few-shot) @@ -1482,7 +1482,7 @@ title: Norwegian NLG 🇳🇴 8192 True 2,470 ± 836 / 326 ± 111 - 2.71 + 2.72 64.15 ± 1.20 / 47.04 ± 3.09 62.16 ± 1.91 / 46.60 ± 2.86 55.29 ± 1.82 / 69.90 ± 1.64 @@ -1502,6 +1502,33 @@ title: Norwegian NLG 🇳🇴 14.1.1 14.1.1 + + birgermoell/Rapid-Cycling (few-shot, val) + 7242 + 32 + 32768 + False + 2,346 ± 450 / 666 ± 249 + 2.73 + 55.93 ± 2.70 / 50.51 ± 3.15 + 63.85 ± 2.45 / 53.11 ± 4.11 + 50.41 ± 5.49 / 64.49 ± 4.37 + 65.10 ± 0.51 / 18.12 ± 0.74 + 15.74 ± 4.15 / 41.16 ± 2.21 + 2.23 ± 4.69 / 34.70 ± 1.39 + 39.81 ± 2.81 / 65.65 ± 2.64 + 26.34 ± 1.48 / 44.69 ± 1.13 + 34.85 ± 4.33 / 50.23 ± 3.39 + 9.3.2 + 9.3.2 + 9.3.2 + 9.3.2 + 9.3.2 + 9.3.2 + 12.5.2 + 9.3.2 + 9.3.2 + mistralai/Ministral-8B-Instruct-2410 (few-shot) 8020 @@ -1509,7 +1536,7 @@ title: Norwegian NLG 🇳🇴 32768 True 1,302 ± 323 / 253 ± 86 - 2.73 + 2.74 67.24 ± 1.06 / 45.38 ± 2.20 66.08 ± 1.05 / 47.58 ± 3.27 31.41 ± 0.86 / 33.56 ± 0.97 @@ -1530,76 +1557,22 @@ title: Norwegian NLG 🇳🇴 14.1.2 - mlabonne/AlphaMonarch-7B (few-shot, val) - 7242 - 32 - 8192 - False - 5,340 ± 1,262 / 1,157 ± 375 - 2.73 - 61.90 ± 2.57 / 57.16 ± 2.81 - 66.92 ± 2.52 / 57.81 ± 3.54 - 48.80 ± 4.56 / 63.38 ± 3.06 - 64.72 ± 0.39 / 17.40 ± 0.60 - 19.53 ± 5.49 / 51.96 ± 4.90 - 9.83 ± 4.57 / 47.95 ± 2.22 - 30.27 ± 2.28 / 62.04 ± 2.19 - 28.18 ± 1.89 / 45.23 ± 1.44 - 36.20 ± 3.97 / 50.74 ± 3.38 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - 12.5.2 - - - birgermoell/Flashback-Bellman (few-shot, val) - 7242 - 32 - 32768 - False - 2,887 ± 403 / 1,144 ± 345 - 2.74 - 56.44 ± 3.14 / 50.10 ± 4.61 - 66.56 ± 2.40 / 54.48 ± 4.93 - 53.24 ± 4.75 / 67.94 ± 3.75 - 64.96 ± 0.56 / 17.92 ± 0.82 - 11.96 ± 2.46 / 37.26 ± 1.15 - 2.50 ± 4.21 / 35.26 ± 1.79 - 39.21 ± 3.48 / 64.09 ± 3.49 - 26.64 ± 1.95 / 44.88 ± 1.41 - 31.14 ± 2.64 / 48.01 ± 2.14 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 12.5.2 - 9.3.1 - 9.3.1 - - - birgermoell/Rapid-Cycling (few-shot, val) + RJuro/munin-neuralbeagle-SkoleGPTOpenOrca-7b (few-shot, val) 7242 32 32768 False - 2,346 ± 450 / 666 ± 249 - 2.74 - 55.93 ± 2.70 / 50.51 ± 3.15 - 63.85 ± 2.45 / 53.11 ± 4.11 - 50.41 ± 5.49 / 64.49 ± 4.37 - 65.10 ± 0.51 / 18.12 ± 0.74 - 15.74 ± 4.15 / 41.16 ± 2.21 - 2.23 ± 4.69 / 34.70 ± 1.39 - 39.81 ± 2.81 / 65.65 ± 2.64 - 26.34 ± 1.48 / 44.69 ± 1.13 - 34.85 ± 4.33 / 50.23 ± 3.39 + 3,008 ± 429 / 991 ± 323 + 2.75 + 53.68 ± 2.01 / 49.22 ± 2.67 + 61.92 ± 4.06 / 49.03 ± 3.97 + 47.78 ± 3.19 / 57.76 ± 2.55 + 64.23 ± 0.75 / 16.90 ± 0.94 + 0.91 ± 1.78 / 33.51 ± 0.85 + 1.24 ± 1.66 / 33.71 ± 0.94 + 47.76 ± 2.93 / 70.99 ± 2.39 + 28.59 ± 2.31 / 46.48 ± 1.80 + 42.57 ± 2.86 / 56.64 ± 2.17 9.3.2 9.3.2 9.3.2 @@ -1617,7 +1590,7 @@ title: Norwegian NLG 🇳🇴 32768 False 2,543 ± 466 / 787 ± 247 - 2.75 + 2.76 51.82 ± 4.16 / 44.64 ± 4.66 62.55 ± 3.84 / 49.66 ± 5.87 56.37 ± 4.27 / 69.55 ± 4.52 @@ -1638,31 +1611,31 @@ title: Norwegian NLG 🇳🇴 12.3.2 - RJuro/munin-neuralbeagle-SkoleGPTOpenOrca-7b (few-shot, val) + birgermoell/Flashback-Bellman (few-shot, val) 7242 32 32768 False - 3,008 ± 429 / 991 ± 323 - 2.75 - 53.68 ± 2.01 / 49.22 ± 2.67 - 61.92 ± 4.06 / 49.03 ± 3.97 - 47.78 ± 3.19 / 57.76 ± 2.55 - 64.23 ± 0.75 / 16.90 ± 0.94 - 0.91 ± 1.78 / 33.51 ± 0.85 - 1.24 ± 1.66 / 33.71 ± 0.94 - 47.76 ± 2.93 / 70.99 ± 2.39 - 28.59 ± 2.31 / 46.48 ± 1.80 - 42.57 ± 2.86 / 56.64 ± 2.17 - 9.3.2 - 9.3.2 - 9.3.2 - 9.3.2 - 9.3.2 - 9.3.2 + 2,887 ± 403 / 1,144 ± 345 + 2.76 + 56.44 ± 3.14 / 50.10 ± 4.61 + 66.56 ± 2.40 / 54.48 ± 4.93 + 53.24 ± 4.75 / 67.94 ± 3.75 + 64.96 ± 0.56 / 17.92 ± 0.82 + 11.96 ± 2.46 / 37.26 ± 1.15 + 2.50 ± 4.21 / 35.26 ± 1.79 + 39.21 ± 3.48 / 64.09 ± 3.49 + 26.64 ± 1.95 / 44.88 ± 1.41 + 31.14 ± 2.64 / 48.01 ± 2.14 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 12.5.2 - 9.3.2 - 9.3.2 + 9.3.1 + 9.3.1 ibm-granite/granite-3.0-8b-base (few-shot) @@ -1833,7 +1806,7 @@ title: Norwegian NLG 🇳🇴 32768 True 5,431 ± 1,267 / 1,139 ± 365 - 2.78 + 2.79 50.47 ± 2.96 / 43.31 ± 2.54 51.97 ± 3.83 / 42.66 ± 5.03 48.03 ± 1.71 / 65.89 ± 1.68 @@ -1860,7 +1833,7 @@ title: Norwegian NLG 🇳🇴 4096 False 2,856 ± 645 / 709 ± 243 - 2.80 + 2.81 56.72 ± 1.90 / 43.75 ± 2.72 57.62 ± 1.35 / 43.81 ± 2.43 48.86 ± 2.54 / 64.32 ± 2.46 @@ -1887,7 +1860,7 @@ title: Norwegian NLG 🇳🇴 8192 True 4,141 ± 994 / 905 ± 299 - 2.82 + 2.83 44.53 ± 2.58 / 36.59 ± 1.71 47.02 ± 2.08 / 39.99 ± 2.76 41.84 ± 2.46 / 56.79 ± 2.95 @@ -2022,7 +1995,7 @@ title: Norwegian NLG 🇳🇴 8192 False 1,472 ± 376 / 284 ± 96 - 2.85 + 2.86 81.96 ± 1.06 / 76.25 ± 1.32 78.42 ± 1.01 / 75.40 ± 0.97 35.30 ± 0.64 / 38.72 ± 0.50 @@ -2130,52 +2103,25 @@ title: Norwegian NLG 🇳🇴 4096 False 1,440 ± 352 / 293 ± 99 - 2.90 - 62.98 ± 1.81 / 46.61 ± 3.01 - 60.12 ± 1.27 / 43.49 ± 2.87 - 35.47 ± 0.80 / 38.01 ± 0.57 - 64.52 ± 0.21 / 16.96 ± 0.28 - 15.15 ± 1.99 / 45.37 ± 3.71 - 9.49 ± 1.78 / 43.92 ± 3.35 - 27.06 ± 2.88 / 49.25 ± 4.63 - 29.91 ± 1.13 / 47.03 ± 0.81 - 37.75 ± 1.52 / 52.97 ± 1.19 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - - - mhenrichsen/hestenettetLM (few-shot) - 7242 - 32 - 32768 - True - 1,151 ± 294 / 227 ± 76 - 2.90 - 52.52 ± 1.85 / 43.46 ± 2.21 - 55.60 ± 3.22 / 45.25 ± 4.20 - 48.23 ± 3.31 / 65.51 ± 3.01 - 63.53 ± 1.47 / 16.54 ± 1.59 - 8.53 ± 3.72 / 38.61 ± 3.22 - 6.65 ± 1.40 / 39.32 ± 2.51 - 46.89 ± 3.29 / 70.96 ± 2.84 - 27.67 ± 0.91 / 45.77 ± 0.66 - 14.20 ± 3.45 / 34.89 ± 2.57 - 12.5.2 - 12.5.2 - 12.3.2 - 12.3.2 - 12.3.2 - 12.3.2 - 12.3.2 - 12.3.2 - 12.3.2 + 2.90 + 62.98 ± 1.81 / 46.61 ± 3.01 + 60.12 ± 1.27 / 43.49 ± 2.87 + 35.47 ± 0.80 / 38.01 ± 0.57 + 64.52 ± 0.21 / 16.96 ± 0.28 + 15.15 ± 1.99 / 45.37 ± 3.71 + 9.49 ± 1.78 / 43.92 ± 3.35 + 27.06 ± 2.88 / 49.25 ± 4.63 + 29.91 ± 1.13 / 47.03 ± 0.81 + 37.75 ± 1.52 / 52.97 ± 1.19 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 timpal0l/Llama-3-8B-flashback-v1 (few-shot) @@ -2204,6 +2150,33 @@ title: Norwegian NLG 🇳🇴 12.7.0 12.7.0 + + timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot) + 7242 + 32 + 32768 + True + 5,054 ± 1,200 / 1,056 ± 339 + 2.90 + 48.97 ± 2.42 / 39.15 ± 2.78 + 51.52 ± 2.96 / 40.17 ± 3.62 + 49.05 ± 2.73 / 63.94 ± 2.42 + 63.32 ± 1.58 / 16.33 ± 1.63 + 14.37 ± 2.18 / 47.80 ± 4.36 + 9.96 ± 1.34 / 48.97 ± 3.77 + 44.07 ± 3.40 / 68.49 ± 2.97 + 25.07 ± 1.48 / 43.13 ± 1.15 + 15.56 ± 3.55 / 35.85 ± 2.56 + 12.5.3 + 12.5.3 + 12.5.3 + 12.5.3 + 12.5.3 + 12.5.3 + 12.5.3 + 12.5.3 + 12.5.3 + CohereForAI/c4ai-command-r-v01 (few-shot) 34981 @@ -2231,33 +2204,6 @@ title: Norwegian NLG 🇳🇴 14.1.1 14.1.1 - - timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot) - 7242 - 32 - 32768 - True - 5,054 ± 1,200 / 1,056 ± 339 - 2.91 - 48.97 ± 2.42 / 39.15 ± 2.78 - 51.52 ± 2.96 / 40.17 ± 3.62 - 49.05 ± 2.73 / 63.94 ± 2.42 - 63.32 ± 1.58 / 16.33 ± 1.63 - 14.37 ± 2.18 / 47.80 ± 4.36 - 9.96 ± 1.34 / 48.97 ± 3.77 - 44.07 ± 3.40 / 68.49 ± 2.97 - 25.07 ± 1.48 / 43.13 ± 1.15 - 15.56 ± 3.55 / 35.85 ± 2.56 - 12.5.3 - 12.5.3 - 12.5.3 - 12.5.3 - 12.5.3 - 12.5.3 - 12.5.3 - 12.5.3 - 12.5.3 - NbAiLab/nb-llama-3.1-70B (few-shot) 70554 @@ -2265,7 +2211,7 @@ title: Norwegian NLG 🇳🇴 131072 True 1,220 ± 411 / 158 ± 53 - 2.92 + 2.91 65.75 ± 2.00 / 59.69 ± 2.98 70.12 ± 2.22 / 63.49 ± 3.83 41.90 ± 3.43 / 49.94 ± 4.59 @@ -2285,6 +2231,33 @@ title: Norwegian NLG 🇳🇴 14.0.4 14.0.4 + + mhenrichsen/hestenettetLM (few-shot) + 7242 + 32 + 32768 + True + 1,151 ± 294 / 227 ± 76 + 2.91 + 52.52 ± 1.85 / 43.46 ± 2.21 + 55.60 ± 3.22 / 45.25 ± 4.20 + 48.23 ± 3.31 / 65.51 ± 3.01 + 63.53 ± 1.47 / 16.54 ± 1.59 + 8.53 ± 3.72 / 38.61 ± 3.22 + 6.65 ± 1.40 / 39.32 ± 2.51 + 46.89 ± 3.29 / 70.96 ± 2.84 + 27.67 ± 0.91 / 45.77 ± 0.66 + 14.20 ± 3.45 / 34.89 ± 2.57 + 12.5.2 + 12.5.2 + 12.3.2 + 12.3.2 + 12.3.2 + 12.3.2 + 12.3.2 + 12.3.2 + 12.3.2 + mistralai/Mistral-7B-v0.1 (few-shot) 7242 @@ -2366,6 +2339,33 @@ title: Norwegian NLG 🇳🇴 9.3.1 9.3.1 + + meta-llama/Meta-Llama-3-8B (few-shot) + 8030 + 128 + 8192 + True + 1,477 ± 376 / 285 ± 97 + 2.96 + 62.89 ± 2.01 / 50.34 ± 1.92 + 56.18 ± 3.26 / 47.30 ± 2.62 + 33.07 ± 0.73 / 37.60 ± 0.49 + 61.95 ± 0.69 / 14.09 ± 0.61 + 30.73 ± 3.19 / 64.06 ± 2.41 + 20.57 ± 3.62 / 54.22 ± 5.15 + 30.77 ± 4.33 / 54.66 ± 4.41 + 30.95 ± 1.33 / 47.50 ± 0.99 + 21.98 ± 2.12 / 41.07 ± 1.52 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + alpindale/Mistral-7B-v0.2-hf (few-shot) 7242 @@ -2474,6 +2474,33 @@ title: Norwegian NLG 🇳🇴 11.0.0 11.0.0 + + google/gemma-2-2b-it (few-shot) + 2614 + 256 + 8193 + True + 5,374 ± 1,233 / 1,193 ± 377 + 3.02 + 35.56 ± 1.33 / 28.84 ± 2.19 + 37.70 ± 3.04 / 30.22 ± 2.39 + 46.84 ± 2.20 / 63.22 ± 1.89 + 64.58 ± 0.33 / 17.00 ± 0.45 + 17.15 ± 2.01 / 55.59 ± 2.04 + 14.38 ± 1.96 / 54.95 ± 1.73 + 29.75 ± 1.52 / 63.79 ± 1.50 + 23.02 ± 1.05 / 41.85 ± 0.86 + 33.13 ± 1.06 / 49.47 ± 0.83 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + mistralai/Mistral-7B-Instruct-v0.2 (few-shot) 7242 @@ -2528,33 +2555,6 @@ title: Norwegian NLG 🇳🇴 14.1.2 14.1.2 - - google/gemma-2-2b-it (few-shot) - 2614 - 256 - 8193 - True - 5,374 ± 1,233 / 1,193 ± 377 - 3.03 - 35.56 ± 1.33 / 28.84 ± 2.19 - 37.70 ± 3.04 / 30.22 ± 2.39 - 46.84 ± 2.20 / 63.22 ± 1.89 - 64.58 ± 0.33 / 17.00 ± 0.45 - 17.15 ± 2.01 / 55.59 ± 2.04 - 14.38 ± 1.96 / 54.95 ± 1.73 - 29.75 ± 1.52 / 63.79 ± 1.50 - 23.02 ± 1.05 / 41.85 ± 0.86 - 33.13 ± 1.06 / 49.47 ± 0.83 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - occiglot/occiglot-7b-eu5-instruct (few-shot) 7242 @@ -2886,7 +2886,7 @@ title: Norwegian NLG 🇳🇴 131072 True 2,996 ± 817 / 284 ± 96 - 3.19 + 3.20 35.54 ± 2.35 / 35.62 ± 2.50 36.61 ± 1.95 / 37.38 ± 1.88 32.18 ± 6.27 / 50.19 ± 5.62 @@ -2993,8 +2993,8 @@ title: Norwegian NLG 🇳🇴 32 4096 True - 3,194 ± 687 / 650 ± 216 - 3.24 + 8,681 ± 1,650 / 2,177 ± 717 + 3.25 56.33 ± 1.63 / 36.68 ± 3.27 54.68 ± 1.29 / 37.85 ± 3.79 37.18 ± 1.30 / 55.44 ± 1.46 @@ -3338,33 +3338,6 @@ title: Norwegian NLG 🇳🇴 9.3.1 9.3.1 - - tollefj/nordavind-7b-instruct-warm (few-shot) - 7248 - 33 - 2048 - False - 6,450 ± 961 / 2,082 ± 658 - 3.38 - 38.82 ± 5.36 / 30.48 ± 1.91 - 43.28 ± 3.13 / 33.87 ± 3.30 - 38.05 ± 1.85 / 47.06 ± 3.97 - 64.04 ± 1.06 / 17.41 ± 1.19 - 8.45 ± 2.47 / 46.75 ± 3.97 - 7.50 ± 1.65 / 48.14 ± 4.65 - 40.47 ± 2.77 / 64.21 ± 2.94 - 2.60 ± 1.14 / 25.13 ± 0.64 - 3.83 ± 0.86 / 26.37 ± 0.88 - 12.5.2 - 12.5.2 - 12.3.2 - 12.4.0 - 12.3.2 - 12.3.2 - 12.4.0 - 12.3.2 - 12.3.2 - claude-3-5-haiku-20241022 (zero-shot, val) unknown @@ -3392,6 +3365,33 @@ title: Norwegian NLG 🇳🇴 14.0.3 14.0.3 + + tollefj/nordavind-7b-instruct-warm (few-shot) + 7248 + 33 + 2048 + False + 6,450 ± 961 / 2,082 ± 658 + 3.39 + 38.82 ± 5.36 / 30.48 ± 1.91 + 43.28 ± 3.13 / 33.87 ± 3.30 + 38.05 ± 1.85 / 47.06 ± 3.97 + 64.04 ± 1.06 / 17.41 ± 1.19 + 8.45 ± 2.47 / 46.75 ± 3.97 + 7.50 ± 1.65 / 48.14 ± 4.65 + 40.47 ± 2.77 / 64.21 ± 2.94 + 2.60 ± 1.14 / 25.13 ± 0.64 + 3.83 ± 0.86 / 26.37 ± 0.88 + 12.5.2 + 12.5.2 + 12.3.2 + 12.4.0 + 12.3.2 + 12.3.2 + 12.4.0 + 12.3.2 + 12.3.2 + NorwAI/NorwAI-Llama2-7B (few-shot) 7033 @@ -3399,7 +3399,7 @@ title: Norwegian NLG 🇳🇴 4096 True 4,438 ± 1,128 / 1,028 ± 346 - 3.40 + 3.41 31.45 ± 1.64 / 31.64 ± 1.89 33.85 ± 1.95 / 34.29 ± 1.91 36.06 ± 3.96 / 52.59 ± 4.67 @@ -3534,7 +3534,7 @@ title: Norwegian NLG 🇳🇴 1795 True 409 ± 53 / 182 ± 54 - 3.46 + 3.47 24.07 ± 5.59 / 19.09 ± 2.55 26.67 ± 6.24 / 21.18 ± 2.80 31.05 ± 7.03 / 45.69 ± 8.29 @@ -3669,7 +3669,7 @@ title: Norwegian NLG 🇳🇴 4099 True 2,080 ± 700 / 331 ± 117 - 3.56 + 3.57 40.40 ± 2.29 / 30.41 ± 2.07 44.45 ± 3.61 / 34.06 ± 3.27 40.79 ± 1.70 / 57.84 ± 2.77 @@ -3804,7 +3804,7 @@ title: Norwegian NLG 🇳🇴 4096 True 6,275 ± 1,193 / 1,755 ± 578 - 3.63 + 3.64 36.96 ± 3.10 / 34.68 ± 3.19 39.38 ± 3.30 / 37.06 ± 3.49 32.67 ± 2.52 / 44.37 ± 3.06 @@ -4371,7 +4371,7 @@ title: Norwegian NLG 🇳🇴 8192 True 16,249 ± 3,690 / 3,689 ± 1,226 - 3.97 + 3.98 26.70 ± 4.42 / 24.56 ± 2.10 28.23 ± 3.78 / 28.27 ± 2.80 23.25 ± 4.16 / 36.07 ± 4.20 @@ -4388,35 +4388,8 @@ title: Norwegian NLG 🇳🇴 13.1.0 13.1.0 13.1.0 - 13.1.0 - 13.1.0 - - - meta-llama/Llama-3.2-1B (few-shot) - 1236 - 128 - 131073 - True - 7,577 ± 1,884 / 1,555 ± 492 - 3.97 - 30.54 ± 3.75 / 29.88 ± 3.25 - 31.34 ± 4.72 / 30.46 ± 4.56 - 29.50 ± 4.18 / 49.19 ± 4.59 - 53.31 ± 0.98 / 7.37 ± 0.54 - -0.13 ± 1.28 / 37.46 ± 3.25 - 0.02 ± 1.75 / 39.49 ± 4.41 - 19.59 ± 5.61 / 34.02 ± 8.33 - 2.49 ± 1.09 / 25.63 ± 0.78 - 2.53 ± 1.16 / 26.30 ± 0.82 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 + 13.1.0 + 13.1.0 ibm-granite/granite-3b-code-base-2k (few-shot) @@ -4445,6 +4418,33 @@ title: Norwegian NLG 🇳🇴 13.0.0 13.0.0 + + meta-llama/Llama-3.2-1B (few-shot) + 1236 + 128 + 131073 + True + 7,577 ± 1,884 / 1,555 ± 492 + 3.98 + 30.54 ± 3.75 / 29.88 ± 3.25 + 31.34 ± 4.72 / 30.46 ± 4.56 + 29.50 ± 4.18 / 49.19 ± 4.59 + 53.31 ± 0.98 / 7.37 ± 0.54 + -0.13 ± 1.28 / 37.46 ± 3.25 + 0.02 ± 1.75 / 39.49 ± 4.41 + 19.59 ± 5.61 / 34.02 ± 8.33 + 2.49 ± 1.09 / 25.63 ± 0.78 + 2.53 ± 1.16 / 26.30 ± 0.82 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot) 1711 @@ -4614,7 +4614,7 @@ title: Norwegian NLG 🇳🇴 4096 True 7,808 ± 2,183 / 1,289 ± 428 - 4.15 + 4.16 31.16 ± 1.45 / 30.20 ± 1.07 29.73 ± 2.46 / 28.89 ± 2.68 17.59 ± 2.80 / 35.34 ± 4.36 @@ -4641,7 +4641,7 @@ title: Norwegian NLG 🇳🇴 2048 True 8,597 ± 1,983 / 1,926 ± 600 - 4.17 + 4.16 27.37 ± 6.89 / 27.19 ± 7.19 27.59 ± 6.34 / 28.03 ± 6.94 18.09 ± 6.14 / 31.83 ± 6.77 @@ -4742,6 +4742,33 @@ title: Norwegian NLG 🇳🇴 14.0.4 14.0.4 + + AI-Sweden-Models/gpt-sw3-126m-instruct (few-shot) + 186 + 64 + 2048 + True + 7,717 ± 1,553 / 2,013 ± 625 + 4.23 + 27.66 ± 2.00 / 28.61 ± 2.15 + 30.88 ± 2.13 / 31.97 ± 2.10 + 5.13 ± 3.33 / 20.41 ± 3.12 + 58.91 ± 0.95 / 9.74 ± 0.45 + 0.00 ± 0.00 / 33.25 ± 0.30 + 0.00 ± 0.00 / 32.79 ± 0.34 + 7.55 ± 1.17 / 15.63 ± 2.64 + -0.68 ± 1.27 / 22.92 ± 0.65 + 0.32 ± 0.59 / 25.12 ± 0.69 + 11.0.0 + 11.0.0 + 9.3.2 + 12.4.0 + 11.0.0 + 11.0.0 + 12.4.0 + 11.0.0 + 11.0.0 + PleIAs/Pleias-1.2b-Preview (few-shot) 1195 @@ -4749,7 +4776,7 @@ title: Norwegian NLG 🇳🇴 2048 True 10,756 ± 3,589 / 1,157 ± 670 - 4.22 + 4.23 38.96 ± 2.67 / 36.82 ± 3.28 40.42 ± 2.43 / 38.81 ± 2.83 19.42 ± 3.09 / 26.64 ± 4.19 @@ -4770,31 +4797,31 @@ title: Norwegian NLG 🇳🇴 14.0.4 - AI-Sweden-Models/gpt-sw3-126m-instruct (few-shot) - 186 - 64 - 2048 + tiiuae/Falcon3-1B-Instruct (few-shot) + 1669 + 131 + 8192 True - 7,717 ± 1,553 / 2,013 ± 625 + 9,270 ± 2,690 / 1,434 ± 437 4.23 - 27.66 ± 2.00 / 28.61 ± 2.15 - 30.88 ± 2.13 / 31.97 ± 2.10 - 5.13 ± 3.33 / 20.41 ± 3.12 - 58.91 ± 0.95 / 9.74 ± 0.45 - 0.00 ± 0.00 / 33.25 ± 0.30 - 0.00 ± 0.00 / 32.79 ± 0.34 - 7.55 ± 1.17 / 15.63 ± 2.64 - -0.68 ± 1.27 / 22.92 ± 0.65 - 0.32 ± 0.59 / 25.12 ± 0.69 - 11.0.0 - 11.0.0 - 9.3.2 - 12.4.0 - 11.0.0 - 11.0.0 - 12.4.0 - 11.0.0 - 11.0.0 + 29.25 ± 2.15 / 28.34 ± 2.25 + 25.45 ± 1.96 / 24.62 ± 1.86 + 11.28 ± 0.71 / 22.46 ± 2.54 + 55.58 ± 1.17 / 8.04 ± 0.65 + 1.52 ± 1.26 / 40.82 ± 4.08 + 0.52 ± 1.60 / 41.23 ± 4.21 + 8.47 ± 0.94 / 20.43 ± 2.10 + 2.62 ± 1.32 / 26.23 ± 0.92 + -0.06 ± 0.97 / 24.96 ± 0.73 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 PleIAs/Pleias-Nano (few-shot) @@ -4938,7 +4965,7 @@ title: Norwegian NLG 🇳🇴 8192 True 22,023 ± 6,203 / 3,675 ± 1,231 - 4.48 + 4.49 26.60 ± 1.99 / 23.60 ± 2.05 23.70 ± 1.58 / 23.04 ± 2.17 6.21 ± 2.55 / 23.74 ± 3.28 @@ -4958,33 +4985,6 @@ title: Norwegian NLG 🇳🇴 13.1.0 13.1.0 - - HuggingFaceTB/SmolLM2-360M-Instruct (few-shot) - 362 - 49 - 8192 - True - 21,777 ± 6,115 / 3,617 ± 1,211 - 4.49 - 20.37 ± 5.55 / 21.57 ± 3.57 - 21.27 ± 5.10 / 22.34 ± 4.41 - 7.60 ± 2.24 / 26.47 ± 2.89 - 49.27 ± 1.15 / 7.39 ± 0.36 - 1.31 ± 1.92 / 45.75 ± 3.36 - 0.51 ± 1.81 / 38.71 ± 2.95 - 4.80 ± 1.18 / 10.53 ± 2.12 - -0.90 ± 0.97 / 21.85 ± 0.55 - -1.00 ± 0.70 / 24.21 ± 0.55 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - 13.1.0 - NbAiLab/nb-llama-3.2-1B (few-shot) 1236 @@ -4992,7 +4992,7 @@ title: Norwegian NLG 🇳🇴 131072 True 3,424 ± 1,080 / 464 ± 158 - 4.50 + 4.49 10.60 ± 5.84 / 10.10 ± 5.81 22.63 ± 4.50 / 21.78 ± 4.61 19.76 ± 6.05 / 25.69 ± 5.67 @@ -5012,6 +5012,33 @@ title: Norwegian NLG 🇳🇴 14.0.4 14.0.4 + + HuggingFaceTB/SmolLM2-360M-Instruct (few-shot) + 362 + 49 + 8192 + True + 21,777 ± 6,115 / 3,617 ± 1,211 + 4.50 + 20.37 ± 5.55 / 21.57 ± 3.57 + 21.27 ± 5.10 / 22.34 ± 4.41 + 7.60 ± 2.24 / 26.47 ± 2.89 + 49.27 ± 1.15 / 7.39 ± 0.36 + 1.31 ± 1.92 / 45.75 ± 3.36 + 0.51 ± 1.81 / 38.71 ± 2.95 + 4.80 ± 1.18 / 10.53 ± 2.12 + -0.90 ± 0.97 / 21.85 ± 0.55 + -1.00 ± 0.70 / 24.21 ± 0.55 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + 13.1.0 + NbAiLab/nb-llama-3.2-3B (few-shot) 3213 @@ -5039,33 +5066,6 @@ title: Norwegian NLG 🇳🇴 14.0.4 14.0.4 - - AI-Sweden-Models/gpt-sw3-126m (few-shot) - 186 - 64 - 2048 - True - 8,958 ± 1,815 / 2,240 ± 696 - 4.52 - 13.55 ± 6.73 / 15.90 ± 5.66 - 9.38 ± 4.88 / 11.18 ± 4.52 - 7.78 ± 3.76 / 21.70 ± 5.02 - 51.68 ± 2.20 / 7.39 ± 0.89 - -1.46 ± 1.07 / 43.30 ± 2.30 - -2.97 ± 1.29 / 44.41 ± 3.18 - 2.32 ± 0.68 / 6.65 ± 1.90 - 0.39 ± 1.28 / 23.22 ± 0.56 - -0.80 ± 0.71 / 24.77 ± 0.62 - 9.2.0 - 9.2.0 - 9.2.0 - 11.0.0 - 9.2.0 - 9.2.0 - 12.5.1 - 9.2.0 - 9.2.0 - allenai/OLMo-7B-Twin-2T (few-shot) 6888 @@ -5093,6 +5093,33 @@ title: Norwegian NLG 🇳🇴 12.5.2 12.5.2 + + AI-Sweden-Models/gpt-sw3-126m (few-shot) + 186 + 64 + 2048 + True + 8,958 ± 1,815 / 2,240 ± 696 + 4.53 + 13.55 ± 6.73 / 15.90 ± 5.66 + 9.38 ± 4.88 / 11.18 ± 4.52 + 7.78 ± 3.76 / 21.70 ± 5.02 + 51.68 ± 2.20 / 7.39 ± 0.89 + -1.46 ± 1.07 / 43.30 ± 2.30 + -2.97 ± 1.29 / 44.41 ± 3.18 + 2.32 ± 0.68 / 6.65 ± 1.90 + 0.39 ± 1.28 / 23.22 ± 0.56 + -0.80 ± 0.71 / 24.77 ± 0.62 + 9.2.0 + 9.2.0 + 9.2.0 + 11.0.0 + 9.2.0 + 9.2.0 + 12.5.1 + 9.2.0 + 9.2.0 + NorwAI/NorwAI-Mistral-7B-pretrain (few-shot) 7537 @@ -5127,7 +5154,7 @@ title: Norwegian NLG 🇳🇴 2048 True 10,242 ± 3,432 / 1,335 ± 484 - 4.53 + 4.54 26.59 ± 2.14 / 26.61 ± 2.49 26.78 ± 1.46 / 26.94 ± 1.92 7.91 ± 2.20 / 17.44 ± 3.42 @@ -5174,33 +5201,6 @@ title: Norwegian NLG 🇳🇴 11.0.0 11.0.0 - - RJuro/kanelsnegl-v0.1 (few-shot) - 7242 - 32 - 512 - True - 5,847 ± 1,029 / 1,640 ± 525 - 4.57 - 0.00 ± 0.00 / 0.00 ± 0.00 - 0.00 ± 0.00 / 0.00 ± 0.00 - 0.95 ± 0.80 / 9.68 ± 0.28 - 59.32 ± 0.11 / 9.47 ± 0.12 - 0.00 ± 0.00 / 33.25 ± 0.30 - 0.00 ± 0.00 / 32.79 ± 0.34 - 0.00 ± 0.00 / 33.45 ± 0.27 - 0.18 ± 0.35 / 21.91 ± 0.52 - 0.30 ± 0.40 / 25.03 ± 0.88 - 9.3.1 - 9.3.1 - 9.3.1 - 11.0.0 - 9.3.1 - 9.3.1 - 12.5.1 - 9.3.1 - 9.3.1 - RJuro/kanelsnegl-v0.2 (few-shot) 7242 @@ -5228,6 +5228,33 @@ title: Norwegian NLG 🇳🇴 10.0.1 11.0.0 + + RJuro/kanelsnegl-v0.1 (few-shot) + 7242 + 32 + 512 + True + 5,847 ± 1,029 / 1,640 ± 525 + 4.58 + 0.00 ± 0.00 / 0.00 ± 0.00 + 0.00 ± 0.00 / 0.00 ± 0.00 + 0.95 ± 0.80 / 9.68 ± 0.28 + 59.32 ± 0.11 / 9.47 ± 0.12 + 0.00 ± 0.00 / 33.25 ± 0.30 + 0.00 ± 0.00 / 32.79 ± 0.34 + 0.00 ± 0.00 / 33.45 ± 0.27 + 0.18 ± 0.35 / 21.91 ± 0.52 + 0.30 ± 0.40 / 25.03 ± 0.88 + 9.3.1 + 9.3.1 + 9.3.1 + 11.0.0 + 9.3.1 + 9.3.1 + 12.5.1 + 9.3.1 + 9.3.1 + allenai/OLMo-1B (few-shot) 1177 diff --git a/norwegian-nlu.csv b/norwegian-nlu.csv index 66355b09..12cc0935 100644 --- a/norwegian-nlu.csv +++ b/norwegian-nlu.csv @@ -14,7 +14,7 @@ NbAiLab/nb-roberta-base-scandi,278,250,512,True,False,15079,1.58,92.24,87.58,59. "meta-llama/Meta-Llama-3-70B (few-shot, val)",70554,128,8192,True,False,312,1.58,75.31,75.94,66.74,59.82,47.56,60.87 pere/roberta-base-exp-8,278,250,512,True,False,15112,1.65,88.99,82.99,57.37,69.92,70.05,41.98 "gpt-4-1106-preview (few-shot, val)",-1,100,128000,True,False,576,1.67,77.48,78.7,62.55,74.45,56.31,44.67 -microsoft/mdeberta-v3-base,279,251,512,True,False,20637,1.68,91.9,86.81,53.69,70.55,61.21,48.82 +microsoft/mdeberta-v3-base,279,251,512,True,False,20637,1.67,91.9,86.81,53.69,70.55,61.21,48.82 NbAiLab/nb-bert-base,178,120,512,True,False,14050,1.7,93.01,88.43,60.84,73.89,72.1,33.01 sentence-transformers/use-cmlm-multilingual,471,501,512,True,False,30231,1.7,90.08,86.04,56.35,59.38,46.54,55.05 KennethEnevoldsen/dfm-sentence-encoder-medium-3,178,120,512,True,False,14050,1.71,91.17,87.3,59.1,74.32,72.94,34.06 @@ -30,7 +30,7 @@ FacebookAI/xlm-roberta-large,560,250,512,True,False,17897,1.77,91.66,86.19,50.25 google/gemma-2-27b-it (few-shot),27227,256,8193,True,False,1516,1.83,67.35,66.61,67.14,64.66,52.49,44.85 pere/roberta-debug-32,278,250,512,True,False,14958,1.86,89.07,83.27,53.23,70.06,66.81,34.17 setu4993/LaBSE,471,501,512,True,False,25418,1.87,90.58,85.21,54.26,59.44,49.3,46.42 -"claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,1.89,74.23,70.5,50.92,76.1,72.03,40.57 +"claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,1.88,74.23,70.5,50.92,76.1,72.03,40.57 abhishek/autotrain-llama3-oh-sft-v0-2 (few-shot),70554,128,8192,False,False,2668,1.95,80.97,79.69,63.91,39.82,26.86,47.06 "gpt-3.5-turbo-0613 (few-shot, val)",-1,100,4095,True,False,921,2.04,77.7,73.92,58.88,54.29,32.82,45.35 google/gemma-2-27b (few-shot),27227,256,8193,True,False,1531,2.05,42.58,40.43,64.15,51.59,42.41,58.55 @@ -50,7 +50,7 @@ ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot),28411,256,4096,True,False,3633, vesteinn/DanskBERT,124,50,512,True,False,15749,2.25,86.82,79.91,47.84,51.99,30.57,36.75 AI-Sweden-Models/bert-large-nordic-pile-1M-steps,369,64,512,True,False,6571,2.26,87.5,80.57,47.11,52.62,25.06,38.4 intfloat/multilingual-e5-base,278,250,512,True,False,14965,2.28,88.26,81.37,54.61,50.35,22.15,31.77 -"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,2.3,72.74,69.17,67.45,74.27,54.83,3.67 +"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,2.29,72.74,69.17,67.45,74.27,54.83,3.67 microsoft/xlm-align-base,278,250,512,True,False,14744,2.31,90.07,85.65,54.46,12.16,8.99,49.24 timpal0l/sol (few-shot),10732,32,4096,False,False,3701,2.31,65.14,65.88,57.06,26.41,19.58,51.6 Qwen/QwQ-32B-Preview (few-shot),32764,152,32768,True,False,2258,2.32,75.68,75.89,38.41,56.42,39.34,44.35 @@ -72,16 +72,16 @@ Geotrend/bert-base-en-fr-de-no-da-cased,118,42,512,True,False,13973,2.48,88.05,8 ZurichNLP/unsup-simcse-xlm-roberta-base,278,250,512,True,False,34520,2.49,86.56,80.57,49.62,38.45,11.38,31.5 Geotrend/bert-base-en-da-cased,111,33,512,True,False,14062,2.52,88.55,83.09,35.16,31.82,32.94,39.46 microsoft/infoxlm-base,278,250,512,True,False,34735,2.52,90.14,84.12,44.42,11.2,7.12,47.69 -"timpal0l/dolphin-2.9-llama3-8b-flashback (few-shot, val)",8030,128,8192,False,True,5018,2.52,64.51,65.66,52.9,29.34,17.42,38.49 microsoft/infoxlm-large,560,250,512,True,False,6696,2.53,91.9,86.59,30.56,9.79,6.36,60.47 +"timpal0l/dolphin-2.9-llama3-8b-flashback (few-shot, val)",8030,128,8192,False,True,5018,2.53,64.51,65.66,52.9,29.34,17.42,38.49 AI-Nordics/bert-large-swedish-cased,335,31,512,True,False,7199,2.54,83.32,77.97,38.44,37.54,23.1,39.97 skole-gpt-mixtral (few-shot),-1,32,32768,False,False,3583,2.54,62.52,61.55,52.09,21.99,16.84,47.3 "RJuro/munin-neuralbeagle-7b (few-shot, val)",7242,32,32768,False,True,2493,2.56,61.18,65.16,55.61,20.84,9.12,42.92 "gpt-4-1106-preview (zero-shot, val)",-1,100,8191,True,False,436,2.56,60.16,48.74,39.62,71.38,42.94,36.04 nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,2.56,64.15,62.16,55.29,32.3,22.82,32.62 +Geotrend/bert-base-da-cased,104,23,512,True,False,15432,2.57,87.52,82.66,32.73,36.41,30.37,37.71 KBLab/megatron-bert-large-swedish-cased-165k,370,64,512,True,False,7138,2.57,85.99,79.47,39.53,27.39,23.56,39.01 "timpal0l/BeagleCatMunin2 (few-shot, val)",7242,32,32768,False,True,2477,2.57,61.17,65.44,58.69,15.03,5.95,42.42 -Geotrend/bert-base-da-cased,104,23,512,True,False,15432,2.58,87.52,82.66,32.73,36.41,30.37,37.71 NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,2.59,58.05,59.65,57.94,51.36,42.84,14.72 "meta-llama/Llama-2-70b-chat-hf (few-shot, val)",68977,32,4096,True,False,1979,2.61,60.21,62.99,55.12,27.12,6.82,38.5 mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot),341029,256,4096,True,False,1904,2.62,56.41,55.6,25.18,62.56,53.09,42.57 @@ -96,16 +96,15 @@ ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4097,True,False,2515,2.69,49. "mlabonne/NeuralBeagle14-7B (few-shot, val)",7242,32,8192,False,True,2549,2.69,62.47,66.69,54.04,16.75,13.0,34.48 senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.69,64.37,62.77,50.6,18.09,12.25,38.34 "merge-crew/da-sv-dare-ties-density-0.6 (few-shot, val)",7242,32,32768,True,True,2515,2.71,47.26,59.35,54.93,9.0,5.26,45.95 -meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1007,2.71,74.47,72.93,34.44,27.77,20.35,42.9 mhenrichsen/danskgpt-chat-v2.1 (few-shot),-1,32,32768,True,False,5085,2.71,62.43,60.68,53.41,-1.16,0.3,49.15 KBLab/megatron-bert-base-swedish-cased-600k,135,64,512,True,False,15726,2.72,82.2,76.64,40.2,24.45,19.18,30.69 clips/mfaq,278,250,128,True,False,5591,2.72,89.46,79.71,52.91,27.55,15.2,12.36 +meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1483,2.72,66.56,68.29,34.47,28.22,18.21,47.34 KBLab/megatron-bert-large-swedish-cased-110k,370,64,512,True,False,7075,2.73,84.03,77.98,39.15,21.39,17.1,35.32 -meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1335,2.73,61.48,61.58,32.94,21.2,19.65,53.35 mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,2.73,63.92,62.15,46.68,33.38,19.99,31.87 +timpal0l/njord-alpha (few-shot),7242,32,32768,True,False,5431,2.73,50.47,51.97,48.03,22.65,17.1,44.72 "birgermoell/BeagleCatMunin-Flashback-Bellman (few-shot, val)",7242,32,32768,False,True,2890,2.74,53.96,63.45,52.7,14.87,2.48,41.43 ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,2.74,53.79,56.13,51.36,6.83,8.09,48.01 -timpal0l/njord-alpha (few-shot),7242,32,32768,True,False,5431,2.74,50.47,51.97,48.03,22.65,17.1,44.72 "merge-crew/da-sv-ties (few-shot, val)",7242,32,32768,True,True,2457,2.75,47.61,60.57,44.46,23.99,11.6,47.02 KB/bert-base-swedish-cased,125,50,512,True,False,16181,2.76,85.91,79.67,38.7,39.13,24.13,19.04 "birgermoell/Flashback-Bellman (few-shot, val)",7242,32,32768,False,True,2887,2.76,56.44,66.56,53.24,11.96,2.5,39.21 @@ -118,11 +117,11 @@ utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.78 facebook/xlm-v-base,778,902,512,True,False,25396,2.79,89.99,78.6,17.93,43.46,10.97,43.74 CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,2.8,66.55,63.63,38.61,15.8,12.3,43.26 danish-foundation-models/munin-7b-v0.1dev0 (few-shot),7242,32,8192,True,False,6113,2.8,50.43,54.2,39.21,20.51,11.66,51.57 +AI-Sweden-Models/Llama-3-8B (few-shot),8030,128,8192,True,False,4141,2.81,44.53,47.02,41.84,19.97,15.61,50.91 "AI-Sweden-Models/tyr (few-shot, val)",7242,32,32768,False,True,6079,2.81,58.6,63.15,51.85,0.66,0.53,43.22 KBLab/bert-base-swedish-cased,125,50,512,True,False,16164,2.81,85.33,79.44,38.17,39.49,22.17,19.04 -meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1005,2.81,64.55,66.44,35.17,27.41,15.6,43.11 +meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1473,2.81,64.55,66.44,35.17,27.41,15.6,43.11 "mlabonne/AlphaMonarch-7B (few-shot, val)",7242,32,8192,False,True,5340,2.81,61.9,66.92,48.8,19.53,9.83,30.27 -AI-Sweden-Models/Llama-3-8B (few-shot),8030,128,8192,True,False,4141,2.82,44.53,47.02,41.84,19.97,15.61,50.91 bineric/NorskGPT-Llama-13B-v0.1 (few-shot),-1,32,4096,False,False,2856,2.82,56.72,57.62,48.86,9.87,6.9,41.27 bineric/NorskGPT-Llama-7B-v0.1 (few-shot),6738,32,4096,False,False,5384,2.82,56.18,56.96,50.94,8.19,5.55,41.35 timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,2.82,48.97,51.52,49.05,14.37,9.96,44.07 @@ -137,30 +136,31 @@ Geotrend/distilbert-base-en-no-cased,69,33,512,True,False,26597,2.87,83.93,79.39 mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot),46703,32,32768,True,False,5535,2.87,55.02,57.37,36.76,30.73,18.96,41.01 sentence-transformers/paraphrase-multilingual-mpnet-base-v2,278,250,512,True,False,15100,2.87,81.94,75.56,55.53,36.01,14.99,0.0 ThatsGroes/munin-SkoleGPTOpenOrca-7b-16bit (few-shot),7242,32,32768,False,False,3006,2.88,51.99,52.74,50.39,0.99,1.27,47.95 +jhu-clsp/bernice,278,250,128,True,False,5567,2.88,84.11,77.82,39.63,45.75,33.74,5.35 Geotrend/distilbert-base-en-fr-de-no-da-cased,76,42,512,True,False,26081,2.89,83.49,80.23,32.66,33.65,29.07,19.29 Mabeck/Heidrun-Mistral-7B-base (few-shot),7242,32,32768,True,False,3823,2.89,50.1,54.81,48.64,10.31,1.11,42.2 -jhu-clsp/bernice,278,250,128,True,False,5567,2.89,84.11,77.82,39.63,45.75,33.74,5.35 alpindale/Mistral-7B-v0.2-hf (few-shot),7242,32,32768,True,False,1841,2.91,50.63,52.69,44.05,11.6,9.26,45.23 mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,2.91,50.56,52.65,44.61,12.1,9.3,45.15 sentence-transformers/paraphrase-xlm-r-multilingual-v1,278,250,512,True,False,20154,2.91,81.26,74.05,49.93,38.26,25.17,0.0 google/gemma-7b-it (few-shot),8538,256,8192,False,False,1792,2.92,59.77,60.98,28.14,14.01,10.15,51.08 +google/gemma-7b (few-shot),8538,256,8192,True,False,1378,2.93,26.43,32.66,41.82,25.82,20.16,52.68 meta-llama/Llama-3.1-8B (few-shot),8030,128,131072,True,False,2986,2.94,65.17,60.22,34.02,32.48,18.38,33.06 -google/gemma-7b (few-shot),8538,256,8192,True,False,1378,2.95,26.43,32.66,41.82,25.82,20.16,52.68 ibm-granite/granite-8b-code-base-4k (few-shot),8055,49,4096,True,False,2313,2.96,68.4,65.15,42.0,5.2,3.32,37.51 meta-llama/Llama-2-13b-chat-hf (few-shot),13016,32,4096,True,False,2849,2.96,57.21,59.62,38.93,8.65,5.92,42.32 CohereForAI/aya-23-8B (few-shot),8028,256,8192,False,False,2707,2.98,60.94,59.61,35.73,6.18,4.0,46.52 Geotrend/distilbert-base-en-da-cased,69,33,512,True,False,26196,2.98,83.27,79.59,29.37,31.5,24.06,18.62 Geotrend/distilbert-base-da-cased,61,23,512,True,False,28950,2.99,82.84,78.83,30.7,34.24,27.2,16.44 flax-community/nordic-roberta-wiki,125,50,512,True,False,16227,2.99,85.42,78.92,36.27,48.07,29.81,0.44 +occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,2.99,45.5,45.96,44.46,0.0,0.0,52.19 sentence-transformers/stsb-xlm-r-multilingual,278,250,512,True,False,15040,2.99,80.08,74.59,52.16,36.3,14.21,0.0 -occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,3.0,45.5,45.96,44.46,0.0,0.0,52.19 NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,3.01,65.75,70.12,41.9,47.88,35.66,0.03 RuterNorway/Llama-2-13b-chat-norwegian (few-shot),-1,32,4096,False,False,3254,3.01,58.61,60.4,41.36,6.52,3.95,38.93 KBLab/megatron-bert-base-swedish-cased-125k,135,64,512,True,False,15763,3.02,77.98,75.0,33.88,24.23,18.18,20.56 ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,3.02,66.91,62.82,40.71,9.5,6.74,32.83 jannikskytt/MeDa-Bert,111,32,511,True,False,16114,3.02,71.69,60.0,38.94,30.32,7.99,24.02 meta-llama/Llama-3.2-3B (few-shot),3213,128,131073,True,False,3713,3.02,49.57,52.13,39.96,3.2,3.72,45.54 -mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32768,False,False,634,3.03,50.08,51.27,43.65,14.09,8.28,37.23 +meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1477,3.02,62.89,56.18,33.07,30.73,20.57,30.77 +mistralai/Mistral-7B-Instruct-v0.1 (few-shot),7242,32,32768,False,False,634,3.02,50.08,51.27,43.65,14.09,8.28,37.23 CohereForAI/c4ai-command-r-v01 (few-shot),34981,256,8192,False,False,1919,3.04,61.54,60.94,35.73,21.33,13.2,32.36 mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,3.04,53.42,54.34,38.79,17.06,11.0,35.74 occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,3.06,45.28,46.0,44.95,0.0,0.0,43.88 @@ -173,7 +173,7 @@ neph1/bellman-7b-mistral-instruct-v0.2 (few-shot),7242,32,32768,False,False,2518 sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2,118,250,512,True,False,29201,3.1,78.31,72.13,47.53,26.92,14.63,0.0 danish-foundation-models/encoder-medium-v1,111,32,512,True,False,16130,3.11,68.66,61.77,36.56,31.23,5.4,22.56 meta-llama/Llama-2-13b-hf (few-shot),13016,32,4096,True,False,2898,3.11,51.12,55.35,23.75,14.0,7.61,49.24 -sarnikowski/convbert-medium-small-da-cased,24,29,512,True,False,13821,3.12,79.5,73.03,32.4,41.65,25.53,5.41 +sarnikowski/convbert-medium-small-da-cased,24,29,512,True,False,13821,3.11,79.5,73.03,32.4,41.65,25.53,5.41 TrustLLMeu/baseline-7-8b_1t-tokens_llama (few-shot),7800,100,4096,True,False,6197,3.13,42.77,45.69,37.79,8.77,8.47,44.24 bineric/NorskGPT-Mistral-7b (few-shot),7242,32,4096,False,False,1440,3.14,62.98,60.12,35.47,15.15,9.49,27.06 google/gemma-2-2b-it (few-shot),2614,256,8193,True,False,5374,3.16,35.56,37.7,46.84,17.15,14.38,29.75 @@ -181,7 +181,7 @@ Addedk/mbert-swedish-distilled-cased,135,120,512,True,False,26091,3.17,82.98,76. meta-llama/Llama-2-7b-hf (few-shot),6738,32,4096,True,False,930,3.19,42.13,43.8,41.74,0.0,0.02,44.19 LumiOpen/Viking-33B@1000B (few-shot),33119,131,4099,True,False,2080,3.2,40.4,44.45,40.79,5.91,2.98,37.75 emillykkejensen/Phi-3-mini-4k-instruct-dansk (few-shot),3821,32,4096,False,False,1360,3.2,56.41,53.95,42.27,0.0,0.21,29.35 -microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,3194,3.2,56.33,54.68,37.18,6.76,6.79,30.11 +microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,8681,3.2,56.33,54.68,37.18,6.76,6.79,30.11 tollefj/nordavind-7b-instruct-warm (few-shot),7248,33,2048,False,False,6450,3.2,38.82,43.28,38.05,8.45,7.5,40.47 AI-Sweden-Models/gpt-sw3-20b (few-shot),20918,64,2048,True,False,1875,3.21,30.82,39.56,34.5,15.17,12.46,42.81 Twitter/twhin-bert-large,561,250,512,True,False,9707,3.21,86.26,80.1,34.17,12.11,4.28,11.74 @@ -193,11 +193,11 @@ flax-community/swe-roberta-wiki-oscar,125,50,512,True,False,15437,3.23,79.25,75. norallm/normistral-7b-warm-instruct (few-shot),-1,33,2048,True,False,6194,3.23,46.49,51.46,37.98,7.86,7.23,33.31 meta-llama/Llama-3.2-3B-Instruct (few-shot),3213,128,131073,False,False,10424,3.24,49.66,51.98,44.13,0.67,1.11,28.62 sarnikowski/convbert-small-da-cased,13,29,512,True,False,14273,3.24,76.07,70.94,32.49,35.43,21.11,1.84 +microsoft/Phi-3-mini-128k-instruct (few-shot),3821,32,131072,True,False,7312,3.25,52.18,50.53,33.3,2.63,4.0,37.08 DDSC/roberta-base-danish,125,50,512,True,False,15004,3.26,76.14,72.88,32.29,0.45,-0.08,23.91 -microsoft/Phi-3-mini-128k-instruct (few-shot),3821,32,131072,True,False,7312,3.26,52.18,50.53,33.3,2.63,4.0,37.08 +Qwen/Qwen1.5-4B-Chat (few-shot),3950,152,32768,False,False,4347,3.27,44.83,46.29,32.7,3.57,1.61,42.55 AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot),20918,64,2048,True,False,1831,3.28,23.95,26.55,40.89,9.45,8.32,43.19 AI-Sweden-Models/gpt-sw3-6.7b-v2 (few-shot),7111,64,2048,True,False,2351,3.28,29.62,32.3,34.67,8.37,7.76,44.62 -Qwen/Qwen1.5-4B-Chat (few-shot),3950,152,32768,False,False,4347,3.28,44.83,46.29,32.7,3.57,1.61,42.55 MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,3.29,36.96,39.38,32.67,2.18,5.33,45.23 Maltehb/danish-bert-botxo,111,32,512,True,False,16091,3.29,72.62,58.73,40.65,29.47,12.95,0.91 allenai/OLMo-1.7-7B-hf (few-shot),6888,50,4096,True,False,3371,3.29,42.78,42.85,36.68,2.39,1.91,39.16 @@ -214,20 +214,20 @@ Maltehb/aelaectra-danish-electra-small-cased,14,32,128,True,False,4593,3.36,71.8 Addedk/kbbert-distilled-cased,82,50,512,True,False,29698,3.37,81.82,75.89,33.42,14.99,13.63,0.0 dbmdz/bert-base-historic-multilingual-cased,111,32,512,True,False,20047,3.38,68.63,67.7,25.68,6.73,3.35,22.57 nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,3.38,63.7,62.53,34.35,31.53,22.71,0.06 +NbAiLab/nb-llama-3.1-8B (few-shot),8030,128,131072,True,False,1297,3.4,51.85,54.79,31.84,36.3,32.19,0.71 +"claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,3.4,69.39,62.76,3.97,31.65,5.86,36.65 neuralmagic/Sparse-Llama-3.1-8B-2of4 (few-shot),8030,128,131072,True,False,2996,3.4,35.54,36.61,32.18,0.0,0.0,43.47 AI-Sweden-Models/gpt-sw3-6.7b-v2-instruct (few-shot),7111,64,2048,True,False,1473,3.41,24.67,29.03,34.39,2.42,5.11,42.52 -NbAiLab/nb-llama-3.1-8B (few-shot),8030,128,131072,True,False,1297,3.41,51.85,54.79,31.84,36.3,32.19,0.71 -"claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,3.41,69.39,62.76,3.97,31.65,5.86,36.65 AI-Sweden-Models/gpt-sw3-1.3b-instruct (few-shot),1445,64,2048,True,False,4544,3.42,33.08,38.28,35.58,0.82,1.43,36.06 -HPLT/gpt-33b-nordic-prerelease (few-shot),33119,131,4099,True,False,501,3.44,31.38,37.84,38.88,3.41,3.11,30.39 -dbmdz/bert-medium-historic-multilingual-cased,42,32,512,True,False,24291,3.44,69.65,66.78,26.33,6.62,5.16,15.75 +HPLT/gpt-33b-nordic-prerelease (few-shot),33119,131,4099,True,False,501,3.43,31.38,37.84,38.88,3.41,3.11,30.39 +dbmdz/bert-medium-historic-multilingual-cased,42,32,512,True,False,24291,3.43,69.65,66.78,26.33,6.62,5.16,15.75 ibm-granite/granite-3.0-3b-a800m-instruct (few-shot),3374,49,4096,True,False,10246,3.44,44.89,48.08,32.29,7.49,4.65,26.37 norallm/normistral-7b-warm (few-shot),7248,33,2048,True,False,3175,3.44,42.29,46.29,27.05,1.63,2.57,39.18 Maltehb/aelaectra-danish-electra-small-uncased,14,32,128,True,False,5995,3.47,59.76,51.44,33.41,32.87,20.09,0.0 stabilityai/stablelm-2-1_6b (few-shot),1645,100,4096,True,False,7259,3.48,41.64,42.37,33.71,-0.19,-0.01,30.14 birgermoell/roberta-swedish-scandi,125,50,512,True,False,15385,3.5,72.74,69.74,29.68,15.83,8.7,1.04 +google/gemma-2-2b (few-shot),2614,256,8193,True,False,5235,3.52,20.47,24.18,32.61,3.22,3.91,41.16 LumiOpen/Viking-7B (few-shot),7550,131,4096,True,False,1431,3.53,22.37,29.9,35.86,1.03,2.92,34.39 -google/gemma-2-2b (few-shot),2614,256,8193,True,False,5235,3.53,20.47,24.18,32.61,3.22,3.91,41.16 ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,3.55,53.78,55.14,26.21,3.9,2.42,24.86 jjzha/dajobbert-base-uncased,110,32,512,True,False,16243,3.56,65.95,55.29,33.31,20.34,8.07,0.0 timpal0l/Mistral-7B-v0.1-flashback-v2-instruct (few-shot),7242,32,32768,False,False,5172,3.57,50.34,52.06,32.19,-0.22,0.0,20.57 @@ -237,58 +237,59 @@ ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,3 google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,3.62,39.78,43.58,22.01,2.76,1.45,32.42 google/gemma-2b (few-shot),2506,256,8192,True,False,6087,3.67,15.53,19.78,32.89,1.18,0.0,33.33 meta-llama/Llama-3.2-1B-Instruct (few-shot),1236,128,131073,False,False,7436,3.71,44.66,47.78,27.43,0.07,1.14,18.0 +sentence-transformers/distiluse-base-multilingual-cased-v2,135,120,512,True,False,33247,3.71,63.79,60.96,32.83,1.09,0.18,0.0 +sentence-transformers/distiluse-base-multilingual-cased,135,120,512,True,False,19206,3.71,63.79,60.96,32.83,1.09,0.18,0.0 openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,3.72,37.36,42.83,16.02,-0.08,2.29,31.6 -sentence-transformers/distiluse-base-multilingual-cased-v2,135,120,512,True,False,33247,3.72,63.79,60.96,32.83,1.09,0.18,0.0 -sentence-transformers/distiluse-base-multilingual-cased,135,120,512,True,False,19206,3.72,63.79,60.96,32.83,1.09,0.18,0.0 ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,3.73,37.73,40.07,21.5,0.86,2.01,27.03 NorwAI/NorwAI-Mistral-7B-instruct (few-shot),7537,68,4096,False,False,3027,3.74,27.49,32.33,47.78,3.92,4.27,2.46 -AI-Sweden-Models/gpt-sw3-6.7b (few-shot),7111,64,2048,True,False,2285,3.76,22.35,21.98,18.23,1.68,2.49,41.8 -dbmdz/bert-mini-historic-multilingual-cased,12,32,512,True,False,47122,3.76,61.55,59.9,24.59,3.45,2.72,3.99 +AI-Sweden-Models/gpt-sw3-6.7b (few-shot),7111,64,2048,True,False,2285,3.75,22.35,21.98,18.23,1.68,2.49,41.8 +dbmdz/bert-mini-historic-multilingual-cased,12,32,512,True,False,47122,3.75,61.55,59.9,24.59,3.45,2.72,3.99 AI-Sweden-Models/gpt-sw3-356m (few-shot),471,64,2048,True,False,5758,3.77,27.37,31.22,34.21,0.92,1.25,18.52 HPLT/gpt-13b-nordic-prerelease (few-shot),14030,131,4099,True,False,3520,3.77,28.94,33.83,27.32,1.46,-0.59,25.62 AI-Sweden-Models/gpt-sw3-356m-instruct (few-shot),471,64,2048,True,False,5855,3.78,24.38,31.28,30.88,-0.3,0.45,23.99 AI-Sweden-Models/gpt-sw3-1.3b (few-shot),1445,64,2048,True,False,4608,3.79,13.49,14.74,27.28,3.09,1.86,34.91 NbAiLab/nb-gpt-j-6B-alpaca (few-shot),6055,50,1024,False,False,2607,3.79,23.82,26.04,32.6,0.34,2.26,21.33 sentence-transformers/distiluse-base-multilingual-cased-v1,135,120,512,True,False,34042,3.8,60.76,59.62,25.98,2.65,3.47,0.2 -EuropeanParliament/EUBERT,94,66,512,True,False,20070,3.82,49.92,44.37,19.81,8.64,3.11,15.89 +EuropeanParliament/EUBERT,94,66,512,True,False,20070,3.81,49.92,44.37,19.81,8.64,3.11,15.89 HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,3.82,37.6,38.38,24.05,3.56,2.61,13.58 meta-llama/Llama-3.2-1B (few-shot),1236,128,131073,True,False,7577,3.82,30.54,31.34,29.5,-0.13,0.02,19.59 openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.83,34.78,39.0,10.69,6.17,5.9,31.25 KBLab/albert-base-swedish-cased-alpha,14,50,512,True,False,15925,3.84,66.97,63.9,18.85,5.83,4.02,0.0 ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,3.84,53.93,54.04,23.83,3.91,1.55,2.37 ibm-granite/granite-7b-base (few-shot),6738,32,2048,True,False,4405,3.85,32.21,36.62,16.98,1.57,0.97,26.28 -mhenrichsen/danskgpt-tiny-chat (few-shot),1100,32,2048,False,False,1745,3.9,28.74,30.34,27.49,-2.17,0.26,19.1 +mhenrichsen/danskgpt-tiny-chat (few-shot),1100,32,2048,False,False,1745,3.89,28.74,30.34,27.49,-2.17,0.26,19.1 state-spaces/mamba-2.8b-hf (few-shot),2768,50,32769,True,False,2722,3.94,26.9,34.59,31.06,0.21,-0.17,10.35 HPLT/gpt-7b-nordic-prerelease (few-shot),7550,131,4096,True,False,1382,3.98,20.25,28.99,17.44,3.2,2.61,21.5 HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,4.01,26.7,28.23,23.25,-0.47,0.26,13.4 jannesg/bertsson,124,50,512,True,False,15314,4.01,49.3,46.11,23.21,2.26,-0.66,0.68 alexanderfalk/danbert-small-cased,83,52,512,True,False,30013,4.02,42.18,37.39,24.39,7.29,2.57,0.0 3ebdola/Dialectal-Arabic-XLM-R-Base,278,250,512,True,False,12783,4.05,55.55,53.53,12.69,2.79,1.66,0.0 +ibm-granite/granite-3.0-1b-a400m-instruct (few-shot),1335,49,4096,True,False,7964,4.07,36.99,37.27,19.55,1.95,2.31,7.33 Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,4.08,26.99,25.74,19.85,1.96,-0.01,16.33 -ibm-granite/granite-3.0-1b-a400m-instruct (few-shot),1335,49,4096,True,False,7964,4.08,36.99,37.27,19.55,1.95,2.31,7.33 -PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,4.09,38.96,40.42,19.42,-0.13,0.77,4.7 RuterNorway/Llama-2-7b-chat-norwegian (few-shot),-1,32,4096,False,False,10890,4.09,21.04,18.71,12.22,-1.18,0.36,26.86 +PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,4.1,38.96,40.42,19.42,-0.13,0.77,4.7 PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,4.1,28.82,27.81,18.74,-0.46,-0.84,12.66 dbmdz/bert-tiny-historic-multilingual-cased,5,32,512,True,False,78027,4.13,46.11,35.18,19.19,2.76,0.42,0.0 NbAiLab/nb-llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1296,4.14,31.65,31.54,10.64,1.81,1.72,16.32 -ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,4.14,31.16,29.73,17.59,1.07,1.59,6.92 Qwen/Qwen1.5-1.8B (few-shot),1837,152,32768,True,False,5666,4.15,12.1,13.42,22.82,2.7,2.21,16.31 allenai/OLMo-7B (few-shot),6888,50,2051,True,False,5403,4.15,34.42,35.17,21.46,0.34,0.26,0.12 -mhenrichsen/danskgpt-tiny (few-shot),1100,32,2048,True,False,8597,4.21,27.37,27.59,18.09,-0.19,-0.8,5.84 +ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,4.15,31.16,29.73,17.59,1.07,1.59,6.92 +mhenrichsen/danskgpt-tiny (few-shot),1100,32,2048,True,False,8597,4.19,27.37,27.59,18.09,-0.19,-0.8,5.84 PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,4.23,27.47,23.82,22.22,-2.06,-0.77,2.48 +Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,4.3,29.52,31.27,11.49,0.29,-0.12,7.8 allenai/OLMo-7B-Twin-2T (few-shot),6888,50,2051,True,False,5484,4.3,9.06,17.16,25.52,0.68,0.17,0.46 -Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,4.31,29.52,31.27,11.49,0.29,-0.12,7.8 +tiiuae/Falcon3-1B-Instruct (few-shot),1669,131,8192,True,False,9270,4.31,29.25,25.45,11.28,1.52,0.52,8.47 +NbAiLab/nb-llama-3.2-1B (few-shot),1236,128,131072,True,False,3424,4.32,10.6,22.63,19.76,2.8,0.17,3.99 RabotaRu/HRBert-mini,80,200,512,True,False,54951,4.32,31.87,32.47,15.07,1.26,0.49,0.0 -NbAiLab/nb-llama-3.2-1B (few-shot),1236,128,131072,True,False,3424,4.33,10.6,22.63,19.76,2.8,0.17,3.99 -Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,4.35,34.46,33.41,6.31,-1.59,0.61,5.95 -AI-Sweden-Models/gpt-sw3-126m-instruct (few-shot),186,64,2048,True,False,7717,4.37,27.66,30.88,5.13,0.0,0.0,7.55 +Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,4.34,34.46,33.41,6.31,-1.59,0.61,5.95 +AI-Sweden-Models/gpt-sw3-126m-instruct (few-shot),186,64,2048,True,False,7717,4.36,27.66,30.88,5.13,0.0,0.0,7.55 fresh-xlm-roberta-base,278,250,512,True,False,2214,4.37,25.49,25.94,12.6,0.5,1.83,0.0 NbAiLab/nb-gpt-j-6B-v2 (few-shot),6051,50,1024,False,False,2556,4.4,5.29,6.77,20.84,0.45,0.48,2.43 -allenai/OLMo-1B (few-shot),1177,50,2051,True,False,8536,4.41,30.79,31.12,9.95,-0.95,-0.04,0.0 +allenai/OLMo-1B (few-shot),1177,50,2051,True,False,8536,4.4,30.79,31.12,9.95,-0.95,-0.04,0.0 fresh-electra-small,14,31,512,True,False,7840,4.42,18.38,12.76,15.29,0.17,0.37,0.0 -HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.44,26.6,23.7,6.21,-0.39,0.21,4.65 +HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.45,26.6,23.7,6.21,-0.39,0.21,4.65 HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.46,24.37,24.69,8.84,-1.2,-0.5,0.16 -HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.46,20.37,21.27,7.6,1.31,0.51,4.8 +HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.48,20.37,21.27,7.6,1.31,0.51,4.8 PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.49,26.59,26.78,7.91,0.28,0.04,0.65 NbAiLab/nb-llama-3.2-3B (few-shot),3213,128,131072,True,False,1880,4.51,2.73,0.26,15.1,13.58,7.78,0.15 PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.52,25.02,21.59,8.05,-0.15,-0.97,0.37 diff --git a/norwegian-nlu.md b/norwegian-nlu.md index 8526a1ae..4454d742 100644 --- a/norwegian-nlu.md +++ b/norwegian-nlu.md @@ -3,7 +3,7 @@ layout: leaderboard title: Norwegian NLU 🇳🇴 --- -
Last updated: 10/01/2025 12:30:09 CET
+
Last updated: 11/01/2025 11:03:19 CET
@@ -360,7 +360,7 @@ title: Norwegian NLU 🇳🇴 512 True 20,637 ± 3,925 / 4,497 ± 1,502 - 1.68 + 1.67 91.90 ± 0.54 / 89.55 ± 0.57 86.81 ± 1.35 / 83.46 ± 1.68 53.69 ± 4.28 / 63.69 ± 6.95 @@ -696,7 +696,7 @@ title: Norwegian NLU 🇳🇴 200000 True 193 ± 87 / 55 ± 19 - 1.89 + 1.88 74.23 ± 1.86 / 70.66 ± 2.97 70.50 ± 2.19 / 65.71 ± 2.34 50.92 ± 2.29 / 63.00 ± 1.71 @@ -1116,7 +1116,7 @@ title: Norwegian NLU 🇳🇴 8191 True 784 ± 310 / 95 ± 28 - 2.30 + 2.29 72.74 ± 1.75 / 57.58 ± 4.14 69.17 ± 2.61 / 56.34 ± 3.95 67.45 ± 4.03 / 77.50 ± 2.90 @@ -1571,27 +1571,6 @@ title: Norwegian NLU 🇳🇴 0.0.0 0.0.0 - - timpal0l/dolphin-2.9-llama3-8b-flashback (few-shot, val) - 8030 - 128 - 8192 - False - 5,018 ± 1,216 / 996 ± 324 - 2.52 - 64.51 ± 3.28 / 51.06 ± 4.78 - 65.66 ± 3.82 / 53.90 ± 4.32 - 52.90 ± 4.31 / 65.38 ± 3.73 - 29.34 ± 4.34 / 59.36 ± 4.64 - 17.42 ± 4.38 / 52.01 ± 3.50 - 38.49 ± 4.41 / 67.16 ± 3.41 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - microsoft/infoxlm-large 560 @@ -1613,6 +1592,27 @@ title: Norwegian NLU 🇳🇴 0.0.0 0.0.0 + + timpal0l/dolphin-2.9-llama3-8b-flashback (few-shot, val) + 8030 + 128 + 8192 + False + 5,018 ± 1,216 / 996 ± 324 + 2.53 + 64.51 ± 3.28 / 51.06 ± 4.78 + 65.66 ± 3.82 / 53.90 ± 4.32 + 52.90 ± 4.31 / 65.38 ± 3.73 + 29.34 ± 4.34 / 59.36 ± 4.64 + 17.42 ± 4.38 / 52.01 ± 3.50 + 38.49 ± 4.41 / 67.16 ± 3.41 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + AI-Nordics/bert-large-swedish-cased 335 @@ -1718,6 +1718,27 @@ title: Norwegian NLU 🇳🇴 14.1.1 14.1.1 + + Geotrend/bert-base-da-cased + 104 + 23 + 512 + True + 15,432 ± 2,838 / 3,642 ± 1,189 + 2.57 + 87.52 ± 0.63 / 83.86 ± 0.68 + 82.66 ± 1.64 / 78.65 ± 2.01 + 32.73 ± 1.37 / 46.52 ± 1.86 + 36.41 ± 8.89 / 65.20 ± 6.41 + 30.37 ± 5.50 / 62.12 ± 5.66 + 37.71 ± 1.11 / 49.90 ± 1.47 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + KBLab/megatron-bert-large-swedish-cased-165k 370 @@ -1760,27 +1781,6 @@ title: Norwegian NLU 🇳🇴 9.3.1 12.5.2 - - Geotrend/bert-base-da-cased - 104 - 23 - 512 - True - 15,432 ± 2,838 / 3,642 ± 1,189 - 2.58 - 87.52 ± 0.63 / 83.86 ± 0.68 - 82.66 ± 1.64 / 78.65 ± 2.01 - 32.73 ± 1.37 / 46.52 ± 1.86 - 36.41 ± 8.89 / 65.20 ± 6.41 - 30.37 ± 5.50 / 62.12 ± 5.66 - 37.71 ± 1.11 / 49.90 ± 1.47 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot) 46998 @@ -2075,27 +2075,6 @@ title: Norwegian NLU 🇳🇴 10.0.1 10.0.1 - - meta-llama/Meta-Llama-3-8B-Instruct (few-shot) - 8030 - 128 - 8192 - True - 1,007 ± 316 / 162 ± 45 - 2.71 - 74.47 ± 1.47 / 65.57 ± 2.39 - 72.93 ± 1.00 / 65.44 ± 2.55 - 34.44 ± 0.42 / 37.94 ± 0.39 - 27.77 ± 1.63 / 61.75 ± 1.77 - 20.35 ± 1.92 / 57.74 ± 2.28 - 42.90 ± 3.57 / 69.90 ± 3.17 - 12.6.1 - 12.6.1 - 14.0.4 - 12.6.1 - 12.6.1 - 12.6.1 - mhenrichsen/danskgpt-chat-v2.1 (few-shot) unknown @@ -2159,6 +2138,27 @@ title: Norwegian NLU 🇳🇴 0.0.0 0.0.0 + + meta-llama/Meta-Llama-3-8B-Instruct (few-shot) + 8030 + 128 + 8192 + True + 1,483 ± 377 / 287 ± 97 + 2.72 + 66.56 ± 1.70 / 58.47 ± 2.40 + 68.29 ± 1.47 / 61.22 ± 2.25 + 34.47 ± 0.44 / 37.96 ± 0.40 + 28.22 ± 1.37 / 59.51 ± 1.44 + 18.21 ± 2.21 / 52.84 ± 2.30 + 47.34 ± 1.99 / 73.23 ± 1.29 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + KBLab/megatron-bert-large-swedish-cased-110k 370 @@ -2180,27 +2180,6 @@ title: Norwegian NLU 🇳🇴 0.0.0 0.0.0 - - meta-llama/Meta-Llama-3-8B (few-shot) - 8030 - 128 - 8192 - True - 1,335 ± 338 / 260 ± 88 - 2.73 - 61.48 ± 1.83 / 47.65 ± 2.94 - 61.58 ± 2.21 / 50.10 ± 2.68 - 32.94 ± 0.86 / 37.52 ± 0.43 - 21.20 ± 6.57 / 52.29 ± 7.43 - 19.65 ± 4.32 / 56.66 ± 4.40 - 53.35 ± 4.33 / 74.98 ± 3.70 - 12.6.1 - 12.6.1 - 14.1.2 - 12.6.1 - 12.6.1 - 12.6.1 - mistralai/Mixtral-8x7B-v0.1 (few-shot) 46703 @@ -2222,6 +2201,27 @@ title: Norwegian NLU 🇳🇴 14.0.4 14.0.4 + + timpal0l/njord-alpha (few-shot) + 7242 + 32 + 32768 + True + 5,431 ± 1,267 / 1,139 ± 365 + 2.73 + 50.47 ± 2.96 / 43.31 ± 2.54 + 51.97 ± 3.83 / 42.66 ± 5.03 + 48.03 ± 1.71 / 65.89 ± 1.68 + 22.65 ± 3.80 / 51.83 ± 5.03 + 17.10 ± 4.78 / 49.03 ± 6.45 + 44.72 ± 4.47 / 68.08 ± 4.22 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + birgermoell/BeagleCatMunin-Flashback-Bellman (few-shot, val) 7242 @@ -2264,27 +2264,6 @@ title: Norwegian NLU 🇳🇴 13.0.0 13.0.0 - - timpal0l/njord-alpha (few-shot) - 7242 - 32 - 32768 - True - 5,431 ± 1,267 / 1,139 ± 365 - 2.74 - 50.47 ± 2.96 / 43.31 ± 2.54 - 51.97 ± 3.83 / 42.66 ± 5.03 - 48.03 ± 1.71 / 65.89 ± 1.68 - 22.65 ± 3.80 / 51.83 ± 5.03 - 17.10 ± 4.78 / 49.03 ± 6.45 - 44.72 ± 4.47 / 68.08 ± 4.22 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - merge-crew/da-sv-ties (few-shot, val) 7242 @@ -2537,6 +2516,27 @@ title: Norwegian NLU 🇳🇴 12.4.0 12.4.0 + + AI-Sweden-Models/Llama-3-8B (few-shot) + 8030 + 128 + 8192 + True + 4,141 ± 994 / 905 ± 299 + 2.81 + 44.53 ± 2.58 / 36.59 ± 1.71 + 47.02 ± 2.08 / 39.99 ± 2.76 + 41.84 ± 2.46 / 56.79 ± 2.95 + 19.97 ± 3.99 / 47.40 ± 4.84 + 15.61 ± 4.20 / 43.40 ± 4.90 + 50.91 ± 4.42 / 73.43 ± 3.55 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + 12.10.5 + AI-Sweden-Models/tyr (few-shot, val) 7242 @@ -2585,7 +2585,7 @@ title: Norwegian NLU 🇳🇴 128 131072 True - 1,005 ± 330 / 196 ± 74 + 1,473 ± 377 / 283 ± 96 2.81 64.55 ± 1.69 / 56.81 ± 2.50 66.44 ± 1.38 / 60.02 ± 3.36 @@ -2621,27 +2621,6 @@ title: Norwegian NLU 🇳🇴 12.5.2 12.5.2 - - AI-Sweden-Models/Llama-3-8B (few-shot) - 8030 - 128 - 8192 - True - 4,141 ± 994 / 905 ± 299 - 2.82 - 44.53 ± 2.58 / 36.59 ± 1.71 - 47.02 ± 2.08 / 39.99 ± 2.76 - 41.84 ± 2.46 / 56.79 ± 2.95 - 19.97 ± 3.99 / 47.40 ± 4.84 - 15.61 ± 4.20 / 43.40 ± 4.90 - 50.91 ± 4.42 / 73.43 ± 3.55 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - 12.10.5 - bineric/NorskGPT-Llama-13B-v0.1 (few-shot) unknown @@ -2936,6 +2915,27 @@ title: Norwegian NLU 🇳🇴 11.0.0 12.4.0 + + jhu-clsp/bernice + 278 + 250 + 128 + True + 5,567 ± 450 / 2,483 ± 798 + 2.88 + 84.11 ± 1.13 / 81.19 ± 1.37 + 77.82 ± 1.28 / 73.93 ± 1.46 + 39.63 ± 1.06 / 49.23 ± 2.13 + 45.75 ± 3.27 / 71.33 ± 1.67 + 33.74 ± 2.91 / 63.89 ± 3.31 + 5.35 ± 3.79 / 7.65 ± 5.41 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + Geotrend/distilbert-base-en-fr-de-no-da-cased 76 @@ -2978,27 +2978,6 @@ title: Norwegian NLU 🇳🇴 11.0.0 11.0.0 - - jhu-clsp/bernice - 278 - 250 - 128 - True - 5,567 ± 450 / 2,483 ± 798 - 2.89 - 84.11 ± 1.13 / 81.19 ± 1.37 - 77.82 ± 1.28 / 73.93 ± 1.46 - 39.63 ± 1.06 / 49.23 ± 2.13 - 45.75 ± 3.27 / 71.33 ± 1.67 - 33.74 ± 2.91 / 63.89 ± 3.31 - 5.35 ± 3.79 / 7.65 ± 5.41 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - alpindale/Mistral-7B-v0.2-hf (few-shot) 7242 @@ -3083,27 +3062,6 @@ title: Norwegian NLU 🇳🇴 12.10.0 12.10.0 - - meta-llama/Llama-3.1-8B (few-shot) - 8030 - 128 - 131072 - True - 2,986 ± 823 / 276 ± 94 - 2.94 - 65.17 ± 2.02 / 52.91 ± 2.25 - 60.22 ± 2.29 / 50.51 ± 3.15 - 34.02 ± 0.93 / 37.19 ± 0.86 - 32.48 ± 3.31 / 63.48 ± 2.97 - 18.38 ± 4.77 / 49.87 ± 5.55 - 33.06 ± 4.39 / 58.56 ± 4.32 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - 14.1.2 - google/gemma-7b (few-shot) 8538 @@ -3111,7 +3069,7 @@ title: Norwegian NLU 🇳🇴 8192 True 1,378 ± 260 / 387 ± 119 - 2.95 + 2.93 26.43 ± 3.36 / 26.32 ± 2.35 32.66 ± 3.42 / 29.43 ± 1.74 41.82 ± 3.69 / 53.06 ± 5.15 @@ -3125,6 +3083,27 @@ title: Norwegian NLU 🇳🇴 12.9.1 12.9.1 + + meta-llama/Llama-3.1-8B (few-shot) + 8030 + 128 + 131072 + True + 2,986 ± 823 / 276 ± 94 + 2.94 + 65.17 ± 2.02 / 52.91 ± 2.25 + 60.22 ± 2.29 / 50.51 ± 3.15 + 34.02 ± 0.93 / 37.19 ± 0.86 + 32.48 ± 3.31 / 63.48 ± 2.97 + 18.38 ± 4.77 / 49.87 ± 5.55 + 33.06 ± 4.39 / 58.56 ± 4.32 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + ibm-granite/granite-8b-code-base-4k (few-shot) 8055 @@ -3251,6 +3230,27 @@ title: Norwegian NLU 🇳🇴 0.0.0 0.0.0 + + occiglot/occiglot-7b-eu5-instruct (few-shot) + 7242 + 32 + 32768 + False + 2,088 ± 352 / 706 ± 214 + 2.99 + 45.50 ± 2.71 / 40.02 ± 3.16 + 45.96 ± 2.67 / 41.28 ± 2.25 + 44.46 ± 3.40 / 62.00 ± 2.71 + 0.00 ± 0.00 / 33.41 ± 0.30 + 0.00 ± 0.00 / 33.86 ± 0.33 + 52.19 ± 2.88 / 74.97 ± 2.11 + 12.5.2 + 12.5.2 + 12.2.0 + 12.3.1 + 12.3.1 + 12.4.0 + sentence-transformers/stsb-xlm-r-multilingual 278 @@ -3272,27 +3272,6 @@ title: Norwegian NLU 🇳🇴 0.0.0 0.0.0 - - occiglot/occiglot-7b-eu5-instruct (few-shot) - 7242 - 32 - 32768 - False - 2,088 ± 352 / 706 ± 214 - 3.00 - 45.50 ± 2.71 / 40.02 ± 3.16 - 45.96 ± 2.67 / 41.28 ± 2.25 - 44.46 ± 3.40 / 62.00 ± 2.71 - 0.00 ± 0.00 / 33.41 ± 0.30 - 0.00 ± 0.00 / 33.86 ± 0.33 - 52.19 ± 2.88 / 74.97 ± 2.11 - 12.5.2 - 12.5.2 - 12.2.0 - 12.3.1 - 12.3.1 - 12.4.0 - NbAiLab/nb-llama-3.1-70B (few-shot) 70554 @@ -3419,6 +3398,27 @@ title: Norwegian NLU 🇳🇴 13.0.0 13.0.0 + + meta-llama/Meta-Llama-3-8B (few-shot) + 8030 + 128 + 8192 + True + 1,477 ± 376 / 285 ± 97 + 3.02 + 62.89 ± 2.01 / 50.34 ± 1.92 + 56.18 ± 3.26 / 47.30 ± 2.62 + 33.07 ± 0.73 / 37.60 ± 0.49 + 30.73 ± 3.19 / 64.06 ± 2.41 + 20.57 ± 3.62 / 54.22 ± 5.15 + 30.77 ± 4.33 / 54.66 ± 4.41 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + mistralai/Mistral-7B-Instruct-v0.1 (few-shot) 7242 @@ -3426,7 +3426,7 @@ title: Norwegian NLU 🇳🇴 32768 False 634 ± 179 / 110 ± 35 - 3.03 + 3.02 50.08 ± 1.54 / 34.52 ± 1.17 51.27 ± 1.52 / 33.37 ± 2.37 43.65 ± 1.98 / 60.88 ± 1.36 @@ -3699,7 +3699,7 @@ title: Norwegian NLU 🇳🇴 512 True 13,821 ± 2,209 / 3,547 ± 1,184 - 3.12 + 3.11 79.50 ± 0.70 / 76.09 ± 0.70 73.03 ± 1.28 / 68.84 ± 1.39 32.40 ± 1.48 / 44.59 ± 1.66 @@ -3866,7 +3866,7 @@ title: Norwegian NLU 🇳🇴 32 4096 True - 3,194 ± 687 / 650 ± 216 + 8,681 ± 1,650 / 2,177 ± 717 3.20 56.33 ± 1.63 / 36.68 ± 3.27 54.68 ± 1.29 / 37.85 ± 3.79 @@ -4112,6 +4112,27 @@ title: Norwegian NLU 🇳🇴 0.0.0 0.0.0 + + microsoft/Phi-3-mini-128k-instruct (few-shot) + 3821 + 32 + 131072 + True + 7,312 ± 1,668 / 1,609 ± 525 + 3.25 + 52.18 ± 2.03 / 29.83 ± 3.23 + 50.53 ± 1.49 / 31.94 ± 4.20 + 33.30 ± 2.01 / 51.15 ± 2.93 + 2.63 ± 2.56 / 40.21 ± 3.98 + 4.00 ± 1.87 / 44.87 ± 3.17 + 37.08 ± 2.44 / 61.14 ± 2.01 + 12.9.1 + 12.9.1 + 12.9.1 + 12.9.1 + 12.9.1 + 12.9.1 + DDSC/roberta-base-danish 125 @@ -4134,25 +4155,25 @@ title: Norwegian NLU 🇳🇴 0.0.0 - microsoft/Phi-3-mini-128k-instruct (few-shot) - 3821 - 32 - 131072 - True - 7,312 ± 1,668 / 1,609 ± 525 - 3.26 - 52.18 ± 2.03 / 29.83 ± 3.23 - 50.53 ± 1.49 / 31.94 ± 4.20 - 33.30 ± 2.01 / 51.15 ± 2.93 - 2.63 ± 2.56 / 40.21 ± 3.98 - 4.00 ± 1.87 / 44.87 ± 3.17 - 37.08 ± 2.44 / 61.14 ± 2.01 - 12.9.1 - 12.9.1 - 12.9.1 - 12.9.1 - 12.9.1 - 12.9.1 + Qwen/Qwen1.5-4B-Chat (few-shot) + 3950 + 152 + 32768 + False + 4,347 ± 893 / 1,135 ± 365 + 3.27 + 44.83 ± 1.58 / 40.11 ± 2.00 + 46.29 ± 1.65 / 41.63 ± 3.45 + 32.70 ± 1.59 / 45.73 ± 2.82 + 3.57 ± 1.55 / 37.05 ± 2.34 + 1.61 ± 2.11 / 37.85 ± 3.99 + 42.55 ± 3.36 / 67.11 ± 2.50 + 12.5.2 + 12.5.2 + 10.0.1 + 12.1.0 + 12.1.0 + 12.5.2 AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot) @@ -4196,27 +4217,6 @@ title: Norwegian NLU 🇳🇴 9.2.0 12.5.1 - - Qwen/Qwen1.5-4B-Chat (few-shot) - 3950 - 152 - 32768 - False - 4,347 ± 893 / 1,135 ± 365 - 3.28 - 44.83 ± 1.58 / 40.11 ± 2.00 - 46.29 ± 1.65 / 41.63 ± 3.45 - 32.70 ± 1.59 / 45.73 ± 2.82 - 3.57 ± 1.55 / 37.05 ± 2.34 - 1.61 ± 2.11 / 37.85 ± 3.99 - 42.55 ± 3.36 / 67.11 ± 2.50 - 12.5.2 - 12.5.2 - 10.0.1 - 12.1.0 - 12.1.0 - 12.5.2 - MaLA-LM/emma-500-llama2-7b (few-shot) 6738 @@ -4553,6 +4553,48 @@ title: Norwegian NLU 🇳🇴 14.1.1 14.1.1 + + NbAiLab/nb-llama-3.1-8B (few-shot) + 8030 + 128 + 131072 + True + 1,297 ± 338 / 245 ± 83 + 3.40 + 51.85 ± 2.62 / 39.77 ± 2.84 + 54.79 ± 2.20 / 44.97 ± 2.37 + 31.84 ± 1.39 / 35.10 ± 1.48 + 36.30 ± 2.59 / 64.51 ± 1.65 + 32.19 ± 2.36 / 62.68 ± 2.41 + 0.71 ± 0.32 / 21.70 ± 1.90 + 14.0.4 + 14.0.4 + 14.1.2 + 14.1.2 + 14.1.2 + 14.0.4 + + + claude-3-5-haiku-20241022 (zero-shot, val) + unknown + unknown + 200000 + True + 277 ± 77 / 70 ± 25 + 3.40 + 69.39 ± 2.58 / 47.10 ± 3.14 + 62.76 ± 2.10 / 42.75 ± 2.27 + 3.97 ± 1.70 / 32.77 ± 0.95 + 31.65 ± 2.14 / 54.96 ± 1.11 + 5.86 ± 3.28 / 38.81 ± 1.69 + 36.65 ± 1.08 / 66.51 ± 0.83 + 14.0.3 + 14.0.3 + 14.0.2 + 14.0.3 + 14.0.3 + 14.0.3 + neuralmagic/Sparse-Llama-3.1-8B-2of4 (few-shot) 8030 @@ -4596,67 +4638,25 @@ title: Norwegian NLU 🇳🇴 12.4.0 - NbAiLab/nb-llama-3.1-8B (few-shot) - 8030 - 128 - 131072 + AI-Sweden-Models/gpt-sw3-1.3b-instruct (few-shot) + 1445 + 64 + 2048 True - 1,297 ± 338 / 245 ± 83 - 3.41 - 51.85 ± 2.62 / 39.77 ± 2.84 - 54.79 ± 2.20 / 44.97 ± 2.37 - 31.84 ± 1.39 / 35.10 ± 1.48 - 36.30 ± 2.59 / 64.51 ± 1.65 - 32.19 ± 2.36 / 62.68 ± 2.41 - 0.71 ± 0.32 / 21.70 ± 1.90 - 14.0.4 - 14.0.4 - 14.1.2 - 14.1.2 - 14.1.2 - 14.0.4 - - - claude-3-5-haiku-20241022 (zero-shot, val) - unknown - unknown - 200000 - True - 277 ± 77 / 70 ± 25 - 3.41 - 69.39 ± 2.58 / 47.10 ± 3.14 - 62.76 ± 2.10 / 42.75 ± 2.27 - 3.97 ± 1.70 / 32.77 ± 0.95 - 31.65 ± 2.14 / 54.96 ± 1.11 - 5.86 ± 3.28 / 38.81 ± 1.69 - 36.65 ± 1.08 / 66.51 ± 0.83 - 14.0.3 - 14.0.3 - 14.0.2 - 14.0.3 - 14.0.3 - 14.0.3 - - - AI-Sweden-Models/gpt-sw3-1.3b-instruct (few-shot) - 1445 - 64 - 2048 - True - 4,544 ± 1,000 / 1,106 ± 359 - 3.42 - 33.08 ± 2.22 / 34.51 ± 2.24 - 38.28 ± 2.63 / 40.50 ± 2.72 - 35.58 ± 2.13 / 44.49 ± 2.52 - 0.82 ± 1.46 / 34.78 ± 1.54 - 1.43 ± 1.70 / 34.19 ± 1.10 - 36.06 ± 1.76 / 58.71 ± 1.63 - 12.5.2 - 12.5.2 - 9.3.1 - 12.1.0 - 12.1.0 - 12.4.0 + 4,544 ± 1,000 / 1,106 ± 359 + 3.42 + 33.08 ± 2.22 / 34.51 ± 2.24 + 38.28 ± 2.63 / 40.50 ± 2.72 + 35.58 ± 2.13 / 44.49 ± 2.52 + 0.82 ± 1.46 / 34.78 ± 1.54 + 1.43 ± 1.70 / 34.19 ± 1.10 + 36.06 ± 1.76 / 58.71 ± 1.63 + 12.5.2 + 12.5.2 + 9.3.1 + 12.1.0 + 12.1.0 + 12.4.0 HPLT/gpt-33b-nordic-prerelease (few-shot) @@ -4665,7 +4665,7 @@ title: Norwegian NLU 🇳🇴 4099 True 501 ± 50 / 238 ± 69 - 3.44 + 3.43 31.38 ± 5.44 / 24.32 ± 2.75 37.84 ± 4.65 / 29.35 ± 4.10 38.88 ± 4.12 / 54.92 ± 4.66 @@ -4686,7 +4686,7 @@ title: Norwegian NLU 🇳🇴 512 True 24,291 ± 4,887 / 5,096 ± 1,655 - 3.44 + 3.43 69.65 ± 1.48 / 66.15 ± 1.66 66.78 ± 1.28 / 62.75 ± 1.40 26.33 ± 1.84 / 40.67 ± 0.71 @@ -4805,6 +4805,27 @@ title: Norwegian NLU 🇳🇴 0.0.0 0.0.0 + + google/gemma-2-2b (few-shot) + 2614 + 256 + 8193 + True + 5,235 ± 1,226 / 1,154 ± 366 + 3.52 + 20.47 ± 4.02 / 21.28 ± 2.58 + 24.18 ± 4.24 / 24.41 ± 3.40 + 32.61 ± 1.86 / 47.91 ± 2.11 + 3.22 ± 1.55 / 36.61 ± 2.49 + 3.91 ± 2.50 / 45.37 ± 4.56 + 41.16 ± 3.73 / 63.31 ± 3.73 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + LumiOpen/Viking-7B (few-shot) 7550 @@ -4826,27 +4847,6 @@ title: Norwegian NLU 🇳🇴 12.7.0 12.7.0 - - google/gemma-2-2b (few-shot) - 2614 - 256 - 8193 - True - 5,235 ± 1,226 / 1,154 ± 366 - 3.53 - 20.47 ± 4.02 / 21.28 ± 2.58 - 24.18 ± 4.24 / 24.41 ± 3.40 - 32.61 ± 1.86 / 47.91 ± 2.11 - 3.22 ± 1.55 / 36.61 ± 2.49 - 3.91 ± 2.50 / 45.37 ± 4.56 - 41.16 ± 3.73 / 63.31 ± 3.73 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - ibm-granite/granite-3b-code-instruct-2k (few-shot) 3483 @@ -5036,27 +5036,6 @@ title: Norwegian NLU 🇳🇴 13.0.0 13.0.0 - - openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot) - 7453 - 251 - 4096 - False - 1,254 ± 328 / 243 ± 83 - 3.72 - 37.36 ± 1.61 / 24.19 ± 1.65 - 42.83 ± 1.41 / 28.06 ± 1.69 - 16.02 ± 6.40 / 30.81 ± 5.11 - -0.08 ± 0.82 / 33.74 ± 0.41 - 2.29 ± 1.78 / 35.36 ± 1.11 - 31.60 ± 1.36 / 56.15 ± 1.66 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - 14.0.4 - sentence-transformers/distiluse-base-multilingual-cased-v2 135 @@ -5064,7 +5043,7 @@ title: Norwegian NLU 🇳🇴 512 True 33,247 ± 8,123 / 6,017 ± 1,977 - 3.72 + 3.71 63.79 ± 2.11 / 67.14 ± 1.91 60.96 ± 1.11 / 64.65 ± 1.00 32.83 ± 1.48 / 43.32 ± 0.69 @@ -5085,7 +5064,7 @@ title: Norwegian NLU 🇳🇴 512 True 19,206 ± 4,451 / 3,658 ± 1,187 - 3.72 + 3.71 63.79 ± 2.11 / 67.14 ± 1.91 60.96 ± 1.11 / 64.65 ± 1.00 32.83 ± 1.48 / 43.32 ± 0.69 @@ -5099,6 +5078,27 @@ title: Norwegian NLU 🇳🇴 12.6.1 12.6.1 + + openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot) + 7453 + 251 + 4096 + False + 1,254 ± 328 / 243 ± 83 + 3.72 + 37.36 ± 1.61 / 24.19 ± 1.65 + 42.83 ± 1.41 / 28.06 ± 1.69 + 16.02 ± 6.40 / 30.81 ± 5.11 + -0.08 ± 0.82 / 33.74 ± 0.41 + 2.29 ± 1.78 / 35.36 ± 1.11 + 31.60 ± 1.36 / 56.15 ± 1.66 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + 14.0.4 + ibm-granite/granite-7b-instruct (few-shot) 6738 @@ -5148,7 +5148,7 @@ title: Norwegian NLU 🇳🇴 2048 True 2,285 ± 443 / 671 ± 205 - 3.76 + 3.75 22.35 ± 7.84 / 23.89 ± 4.74 21.98 ± 7.52 / 27.22 ± 4.97 18.23 ± 9.28 / 38.93 ± 6.44 @@ -5169,7 +5169,7 @@ title: Norwegian NLU 🇳🇴 512 True 47,122 ± 9,661 / 9,714 ± 3,152 - 3.76 + 3.75 61.55 ± 1.55 / 58.24 ± 1.47 59.90 ± 1.56 / 56.03 ± 1.41 24.59 ± 1.57 / 40.34 ± 0.99 @@ -5316,7 +5316,7 @@ title: Norwegian NLU 🇳🇴 512 True 20,070 ± 3,977 / 4,400 ± 1,435 - 3.82 + 3.81 49.92 ± 0.61 / 49.17 ± 0.71 44.37 ± 1.15 / 43.43 ± 1.21 19.81 ± 2.15 / 40.90 ± 2.60 @@ -5463,7 +5463,7 @@ title: Norwegian NLU 🇳🇴 2048 False 1,745 ± 978 / 686 ± 159 - 3.90 + 3.89 28.74 ± 4.18 / 28.29 ± 4.37 30.34 ± 6.08 / 30.02 ± 6.42 27.49 ± 3.13 / 48.00 ± 3.89 @@ -5603,6 +5603,27 @@ title: Norwegian NLU 🇳🇴 0.0.0 0.0.0 + + ibm-granite/granite-3.0-1b-a400m-instruct (few-shot) + 1335 + 49 + 4096 + True + 7,964 ± 2,255 / 1,299 ± 433 + 4.07 + 36.99 ± 2.08 / 28.69 ± 2.20 + 37.27 ± 1.09 / 27.84 ± 2.23 + 19.55 ± 3.00 / 40.67 ± 3.28 + 1.95 ± 1.15 / 43.75 ± 3.47 + 2.31 ± 0.84 / 46.79 ± 2.89 + 7.33 ± 1.88 / 19.74 ± 2.82 + 13.2.0 + 13.2.0 + 13.2.0 + 13.2.0 + 13.2.0 + 13.2.0 + Qwen/Qwen1.5-1.8B-Chat (few-shot) 1837 @@ -5625,25 +5646,25 @@ title: Norwegian NLU 🇳🇴 12.5.0 - ibm-granite/granite-3.0-1b-a400m-instruct (few-shot) - 1335 - 49 + RuterNorway/Llama-2-7b-chat-norwegian (few-shot) + unknown + 32 4096 - True - 7,964 ± 2,255 / 1,299 ± 433 - 4.08 - 36.99 ± 2.08 / 28.69 ± 2.20 - 37.27 ± 1.09 / 27.84 ± 2.23 - 19.55 ± 3.00 / 40.67 ± 3.28 - 1.95 ± 1.15 / 43.75 ± 3.47 - 2.31 ± 0.84 / 46.79 ± 2.89 - 7.33 ± 1.88 / 19.74 ± 2.82 - 13.2.0 - 13.2.0 - 13.2.0 - 13.2.0 - 13.2.0 - 13.2.0 + False + 10,890 ± 2,686 / 2,186 ± 750 + 4.09 + 21.04 ± 2.63 / 20.44 ± 2.47 + 18.71 ± 2.67 / 19.91 ± 2.89 + 12.22 ± 1.17 / 23.50 ± 3.03 + -1.18 ± 1.40 / 35.70 ± 2.67 + 0.36 ± 1.28 / 37.66 ± 4.07 + 26.86 ± 1.65 / 50.11 ± 1.80 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 9.3.1 + 12.5.2 PleIAs/Pleias-1.2b-Preview (few-shot) @@ -5652,7 +5673,7 @@ title: Norwegian NLU 🇳🇴 2048 True 10,756 ± 3,589 / 1,157 ± 670 - 4.09 + 4.10 38.96 ± 2.67 / 36.82 ± 3.28 40.42 ± 2.43 / 38.81 ± 2.83 19.42 ± 3.09 / 26.64 ± 4.19 @@ -5666,27 +5687,6 @@ title: Norwegian NLU 🇳🇴 14.1.2 14.0.4 - - RuterNorway/Llama-2-7b-chat-norwegian (few-shot) - unknown - 32 - 4096 - False - 10,890 ± 2,686 / 2,186 ± 750 - 4.09 - 21.04 ± 2.63 / 20.44 ± 2.47 - 18.71 ± 2.67 / 19.91 ± 2.89 - 12.22 ± 1.17 / 23.50 ± 3.03 - -1.18 ± 1.40 / 35.70 ± 2.67 - 0.36 ± 1.28 / 37.66 ± 4.07 - 26.86 ± 1.65 / 50.11 ± 1.80 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 9.3.1 - 12.5.2 - PleIAs/Pleias-3b-Preview (few-shot) 3212 @@ -5750,27 +5750,6 @@ title: Norwegian NLU 🇳🇴 14.0.4 14.0.4 - - ibm-granite/granite-3.0-1b-a400m-base (few-shot) - 1385 - 49 - 4096 - True - 7,808 ± 2,183 / 1,289 ± 428 - 4.14 - 31.16 ± 1.45 / 30.20 ± 1.07 - 29.73 ± 2.46 / 28.89 ± 2.68 - 17.59 ± 2.80 / 35.34 ± 4.36 - 1.07 ± 1.90 / 44.05 ± 3.72 - 1.59 ± 1.56 / 41.39 ± 3.86 - 6.92 ± 0.87 / 14.48 ± 2.08 - 13.2.0 - 13.2.0 - 13.2.0 - 13.2.0 - 13.2.0 - 13.2.0 - Qwen/Qwen1.5-1.8B (few-shot) 1837 @@ -5813,6 +5792,27 @@ title: Norwegian NLU 🇳🇴 12.5.2 12.5.2 + + ibm-granite/granite-3.0-1b-a400m-base (few-shot) + 1385 + 49 + 4096 + True + 7,808 ± 2,183 / 1,289 ± 428 + 4.15 + 31.16 ± 1.45 / 30.20 ± 1.07 + 29.73 ± 2.46 / 28.89 ± 2.68 + 17.59 ± 2.80 / 35.34 ± 4.36 + 1.07 ± 1.90 / 44.05 ± 3.72 + 1.59 ± 1.56 / 41.39 ± 3.86 + 6.92 ± 0.87 / 14.48 ± 2.08 + 13.2.0 + 13.2.0 + 13.2.0 + 13.2.0 + 13.2.0 + 13.2.0 + mhenrichsen/danskgpt-tiny (few-shot) 1100 @@ -5820,7 +5820,7 @@ title: Norwegian NLU 🇳🇴 2048 True 8,597 ± 1,983 / 1,926 ± 600 - 4.21 + 4.19 27.37 ± 6.89 / 27.19 ± 7.19 27.59 ± 6.34 / 28.03 ± 6.94 18.09 ± 6.14 / 31.83 ± 6.77 @@ -5855,6 +5855,27 @@ title: Norwegian NLU 🇳🇴 14.1.2 14.0.4 + + Qwen/Qwen1.5-0.5B-Chat (few-shot) + 620 + 152 + 32768 + False + 11,740 ± 3,000 / 2,209 ± 721 + 4.30 + 29.52 ± 1.48 / 29.79 ± 1.62 + 31.27 ± 1.30 / 31.91 ± 1.31 + 11.49 ± 1.38 / 27.12 ± 1.98 + 0.29 ± 1.58 / 40.21 ± 4.22 + -0.12 ± 1.48 / 39.92 ± 3.90 + 7.80 ± 1.19 / 17.09 ± 2.72 + 12.5.2 + 12.5.2 + 11.0.0 + 12.1.0 + 12.1.0 + 12.4.0 + allenai/OLMo-7B-Twin-2T (few-shot) 6888 @@ -5877,25 +5898,46 @@ title: Norwegian NLU 🇳🇴 12.5.2 - Qwen/Qwen1.5-0.5B-Chat (few-shot) - 620 - 152 - 32768 - False - 11,740 ± 3,000 / 2,209 ± 721 + tiiuae/Falcon3-1B-Instruct (few-shot) + 1669 + 131 + 8192 + True + 9,270 ± 2,690 / 1,434 ± 437 4.31 - 29.52 ± 1.48 / 29.79 ± 1.62 - 31.27 ± 1.30 / 31.91 ± 1.31 - 11.49 ± 1.38 / 27.12 ± 1.98 - 0.29 ± 1.58 / 40.21 ± 4.22 - -0.12 ± 1.48 / 39.92 ± 3.90 - 7.80 ± 1.19 / 17.09 ± 2.72 - 12.5.2 - 12.5.2 - 11.0.0 - 12.1.0 - 12.1.0 - 12.4.0 + 29.25 ± 2.15 / 28.34 ± 2.25 + 25.45 ± 1.96 / 24.62 ± 1.86 + 11.28 ± 0.71 / 22.46 ± 2.54 + 1.52 ± 1.26 / 40.82 ± 4.08 + 0.52 ± 1.60 / 41.23 ± 4.21 + 8.47 ± 0.94 / 20.43 ± 2.10 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + + + NbAiLab/nb-llama-3.2-1B (few-shot) + 1236 + 128 + 131072 + True + 3,424 ± 1,080 / 464 ± 158 + 4.32 + 10.60 ± 5.84 / 10.10 ± 5.81 + 22.63 ± 4.50 / 21.78 ± 4.61 + 19.76 ± 6.05 / 25.69 ± 5.67 + 2.80 ± 1.41 / 41.56 ± 3.36 + 0.17 ± 2.00 / 37.59 ± 2.30 + 3.99 ± 1.17 / 14.63 ± 2.66 + 14.0.4 + 14.0.4 + 14.1.2 + 14.1.2 + 14.1.2 + 14.0.4 RabotaRu/HRBert-mini @@ -5918,27 +5960,6 @@ title: Norwegian NLU 🇳🇴 0.0.0 0.0.0 - - NbAiLab/nb-llama-3.2-1B (few-shot) - 1236 - 128 - 131072 - True - 3,424 ± 1,080 / 464 ± 158 - 4.33 - 10.60 ± 5.84 / 10.10 ± 5.81 - 22.63 ± 4.50 / 21.78 ± 4.61 - 19.76 ± 6.05 / 25.69 ± 5.67 - 2.80 ± 1.41 / 41.56 ± 3.36 - 0.17 ± 2.00 / 37.59 ± 2.30 - 3.99 ± 1.17 / 14.63 ± 2.66 - 14.0.4 - 14.0.4 - 14.1.2 - 14.1.2 - 14.1.2 - 14.0.4 - Qwen/Qwen1.5-0.5B (few-shot) 620 @@ -5946,7 +5967,7 @@ title: Norwegian NLU 🇳🇴 32768 True 11,371 ± 2,924 / 2,122 ± 692 - 4.35 + 4.34 34.46 ± 2.01 / 33.09 ± 2.32 33.41 ± 2.21 / 33.91 ± 2.33 6.31 ± 3.46 / 20.67 ± 2.69 @@ -5967,7 +5988,7 @@ title: Norwegian NLU 🇳🇴 2048 True 7,717 ± 1,553 / 2,013 ± 625 - 4.37 + 4.36 27.66 ± 2.00 / 28.61 ± 2.15 30.88 ± 2.13 / 31.97 ± 2.10 5.13 ± 3.33 / 20.41 ± 3.12 @@ -6030,7 +6051,7 @@ title: Norwegian NLU 🇳🇴 2051 True 8,536 ± 1,926 / 1,940 ± 619 - 4.41 + 4.40 30.79 ± 1.95 / 32.18 ± 1.98 31.12 ± 2.36 / 33.10 ± 2.68 9.95 ± 3.92 / 29.01 ± 2.80 @@ -6072,7 +6093,7 @@ title: Norwegian NLU 🇳🇴 8192 True 22,023 ± 6,203 / 3,675 ± 1,231 - 4.44 + 4.45 26.60 ± 1.99 / 23.60 ± 2.05 23.70 ± 1.58 / 23.04 ± 2.17 6.21 ± 2.55 / 23.74 ± 3.28 @@ -6114,7 +6135,7 @@ title: Norwegian NLU 🇳🇴 8192 True 21,777 ± 6,115 / 3,617 ± 1,211 - 4.46 + 4.48 20.37 ± 5.55 / 21.57 ± 3.57 21.27 ± 5.10 / 22.34 ± 4.41 7.60 ± 2.24 / 26.47 ± 2.89 diff --git a/swedish-nlg.csv b/swedish-nlg.csv index 08f56f57..14263e46 100644 --- a/swedish-nlg.csv +++ b/swedish-nlg.csv @@ -4,8 +4,8 @@ model_id,num_model_parameters,vocabulary_size,max_sequence_length,commercially_l meta-llama/Llama-3.1-405B-Instruct-FP8 (few-shot),405869,128,131072,True,False,799,1.25,76.27,80.7,68.85,56.41,67.18,75.85,81.49 "gpt-4o-2024-05-13 (few-shot, val)",-1,200,128000,True,False,916,1.28,76.66,77.16,68.99,57.96,66.0,70.7,86.3 meta-llama/Llama-3.1-70B-Instruct (few-shot),70554,128,131072,True,False,1409,1.42,72.16,81.69,63.97,57.99,66.85,69.59,70.4 -"meta-llama/Meta-Llama-3-70B (few-shot, val)",70554,128,8192,True,False,312,1.42,74.61,78.61,63.2,61.98,67.6,61.55,66.21 meta-llama/Llama-3.3-70B-Instruct (few-shot),70554,128,131072,True,False,1353,1.43,71.98,81.15,64.46,51.22,66.63,70.79,75.48 +"meta-llama/Meta-Llama-3-70B (few-shot, val)",70554,128,8192,True,False,312,1.43,74.61,78.61,63.2,61.98,67.6,61.55,66.21 "gpt-4o-2024-05-13 (zero-shot, val)",-1,200,8191,True,False,637,1.44,75.06,74.85,65.23,53.02,65.8,71.87,69.98 Qwen/Qwen2.5-72B-Instruct (few-shot),72706,152,32768,True,False,1219,1.53,62.12,79.89,61.71,54.99,65.38,70.61,76.53 google/gemma-2-27b-it (few-shot),27227,256,8193,True,False,1516,1.59,62.59,80.73,61.37,58.76,65.54,58.15,68.94 @@ -15,7 +15,7 @@ ThatsGroes/gemma-2-27b-it-FP8-Dynamic (few-shot),28411,256,4096,True,False,3633, "152334H/miqu-1-70b-sf (few-shot, val)",68977,32,32764,True,False,2126,1.75,62.96,75.25,53.28,56.42,67.6,53.56,59.7 google/gemma-2-9b-it (few-shot),9242,256,8193,True,False,2062,1.78,52.5,78.51,61.28,55.22,66.3,51.58,66.61 "gpt-3.5-turbo-0613 (few-shot, val)",-1,100,4095,True,False,921,1.82,73.04,72.77,58.06,58.02,66.92,40.73,50.51 -"meta-llama/Meta-Llama-3-70B-Instruct (few-shot, val)",70554,128,8192,True,False,1673,1.84,77.06,53.56,47.5,46.86,68.25,61.31,66.73 +"meta-llama/Meta-Llama-3-70B-Instruct (few-shot, val)",70554,128,8192,True,False,1673,1.83,77.06,53.56,47.5,46.86,68.25,61.31,66.73 "meta-llama/Llama-2-70b-hf (few-shot, val)",68977,32,4096,True,False,1892,1.85,64.76,75.46,43.27,63.04,68.43,46.16,50.41 nvidia/Llama-3.1-Nemotron-70B-Instruct-HF (few-shot),70554,128,131072,True,False,1208,1.9,58.65,81.81,63.69,42.29,50.67,68.28,72.36 google/gemma-2-9b (few-shot),9242,256,8193,True,False,2038,1.91,50.43,80.55,50.86,59.35,65.63,52.06,49.8 @@ -24,7 +24,7 @@ AI-Sweden-Models/Llama-3-8B-instruct (few-shot),8030,128,8192,False,False,1472,2 timpal0l/sol (few-shot),10732,32,4096,False,False,3701,2.01,57.51,77.31,25.06,60.16,65.22,39.52,70.93 "gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,2.03,62.45,77.69,68.93,12.11,66.04,55.8,63.61 upstage/SOLAR-10.7B-v1.0 (few-shot),10732,32,4096,True,False,3780,2.06,59.65,77.48,16.94,62.65,65.19,39.82,68.87 -mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot),341029,256,4096,True,False,1904,2.1,48.92,62.08,68.93,36.4,61.39,48.18,56.87 +mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot),341029,256,4096,True,False,1904,2.09,48.92,62.08,68.93,36.4,61.39,48.18,56.87 "claude-3-5-sonnet-20241022 (zero-shot, val)",-1,-1,200000,True,False,193,2.12,70.22,77.7,74.34,49.32,65.36,28.34,13.68 mistralai/Mistral-Nemo-Instruct-2407 (few-shot),12248,131,1024001,True,False,7095,2.13,62.86,70.54,37.5,58.0,65.93,40.58,42.99 CohereForAI/c4ai-command-r-08-2024 (few-shot),32296,256,131072,False,False,1909,2.14,65.01,77.68,34.06,56.78,66.08,39.39,37.81 @@ -34,7 +34,7 @@ skole-gpt-mixtral (few-shot),-1,32,32768,False,False,3583,2.25,54.14,78.27,32.49 NorwAI/NorwAI-Mixtral-8x7B-instruct (few-shot),46998,68,32768,True,False,9015,2.27,57.66,80.04,45.21,52.73,59.91,42.25,28.88 four-two-labs/orpo-llama-3-swe (few-shot),8030,128,8192,False,False,4974,2.28,60.93,79.74,26.02,59.84,64.99,36.35,27.22 "RJuro/munin-neuralbeagle-7b (few-shot, val)",7242,32,32768,False,True,2493,2.3,62.96,77.13,15.73,58.43,67.58,32.54,34.94 -meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1005,2.3,55.8,79.23,32.67,46.88,66.43,36.35,37.89 +meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1473,2.3,55.8,79.23,32.67,46.88,66.43,36.35,37.89 "birgermoell/Flashback-Bellman (few-shot, val)",7242,32,32768,False,True,2887,2.33,55.29,78.29,18.45,58.42,67.54,29.44,37.45 google/gemma-7b (few-shot),8538,256,8192,True,False,1378,2.33,43.68,77.72,36.25,58.62,64.44,39.94,25.96 mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,2.33,60.92,79.78,34.88,50.35,62.82,43.74,19.86 @@ -43,17 +43,17 @@ mistralai/Mixtral-8x7B-v0.1 (few-shot),46703,32,32768,True,False,2363,2.33,60.92 "merge-crew/da-sv-task-arithmetic (few-shot, val)",7242,32,32768,True,True,2500,2.34,47.28,76.62,33.23,60.0,66.68,29.95,31.12 "meta-llama/Llama-2-70b-chat-hf (few-shot, val)",68977,32,3843,True,False,1979,2.34,55.91,64.52,23.85,58.88,67.57,37.6,31.78 utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.35,40.59,76.02,33.98,56.98,66.33,40.09,36.27 -CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,2.36,57.38,78.43,14.52,53.14,65.69,37.32,38.28 bineric/NorskGPT-Llama3-8b (few-shot),8030,128,8192,False,False,3382,2.36,63.19,76.06,5.34,56.7,66.25,36.23,43.6 mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot),46703,32,32768,True,False,5535,2.36,46.15,80.33,32.89,46.51,66.04,42.98,35.33 "AI-Sweden-Models/tyr (few-shot, val)",7242,32,32768,False,True,6079,2.37,56.21,78.3,14.35,61.08,67.96,31.74,30.12 +CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,2.37,57.38,78.43,14.52,53.14,65.69,37.32,38.28 "mlabonne/NeuralBeagle14-7B (few-shot, val)",7242,32,8192,False,True,2549,2.37,61.25,76.03,16.28,50.96,68.35,32.3,38.78 "RJuro/munin-neuralbeagle-SkoleGPTOpenOrca-7b (few-shot, val)",7242,32,32768,False,True,3008,2.38,59.36,72.04,22.38,57.96,65.13,29.81,35.59 "birgermoell/BeagleCatMunin-Flashback-Bellman (few-shot, val)",7242,32,32768,False,True,2890,2.4,52.96,76.99,14.27,59.92,67.62,27.95,36.11 "birgermoell/Munin-NeuralBeagle-NorskGPT (few-shot, val)",7242,32,32768,False,True,2903,2.4,63.85,73.72,-0.56,60.1,68.11,27.79,42.43 "birgermoell/WestLake-Munin-Cat-NorskGPT (few-shot, val)",7242,32,32768,False,True,2856,2.4,63.85,73.72,-0.56,60.1,68.11,27.79,42.43 "merge-crew/da-sv-dare-ties-density-0.9 (few-shot, val)",7242,32,32768,True,True,2443,2.4,46.61,76.38,34.16,58.77,66.77,29.77,25.38 -meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1007,2.41,69.67,59.93,27.63,49.84,66.6,33.54,30.32 +meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1483,2.41,69.67,59.93,27.63,49.84,66.6,33.54,30.32 "birgermoell/Rapid-Cycling (few-shot, val)",7242,32,32768,False,True,2346,2.42,53.66,77.72,16.22,59.75,67.57,27.24,32.04 timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot),7242,32,32768,True,False,5054,2.42,44.14,80.14,34.23,57.07,65.15,33.24,25.5 Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,2.44,56.28,77.51,23.25,47.09,65.58,31.52,39.95 @@ -64,7 +64,7 @@ bineric/NorskGPT-Mistral-7b (few-shot),7242,32,32768,False,False,1440,2.46,58.4, meta-llama/Llama-3.1-8B (few-shot),8030,128,131072,True,False,2986,2.46,62.19,80.31,30.29,42.78,64.14,35.1,23.18 nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,2.46,58.75,79.59,33.09,47.28,62.78,36.58,18.78 senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.46,58.9,67.74,16.52,49.41,66.09,31.76,45.84 -meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1335,2.47,59.92,80.91,26.39,47.69,63.94,33.39,20.21 +meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1477,2.47,59.92,80.91,26.39,47.69,63.94,33.39,20.21 timpal0l/Llama-3-8B-flashback-v1 (few-shot),8030,128,8192,True,False,3004,2.48,57.01,81.97,31.16,53.99,63.7,29.77,15.86 Mabeck/Heidrun-Mistral-7B-chat (few-shot),7242,32,32768,False,False,1419,2.49,55.06,77.5,17.47,58.67,64.18,31.04,23.57 NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,2.49,57.01,80.12,43.04,30.44,63.55,41.92,19.13 @@ -86,9 +86,9 @@ Mabeck/Heidrun-Mistral-7B-base (few-shot),7242,32,32768,True,False,3823,2.6,48.4 ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,2.6,44.94,76.78,16.96,56.83,65.09,26.57,24.62 meta-llama/Llama-2-13b-chat-hf (few-shot),13016,32,4096,True,False,2849,2.61,49.9,77.19,14.67,57.12,66.25,24.4,19.3 ThatsGroes/munin-SkoleGPTOpenOrca-7b-16bit (few-shot),7242,32,32768,False,False,3006,2.64,44.64,77.98,16.57,57.31,63.23,28.15,23.58 -timpal0l/Mistral-7B-v0.1-flashback-v2-instruct (few-shot),7242,32,32768,False,False,5172,2.66,46.74,77.06,14.0,56.74,62.56,30.87,15.79 bineric/NorskGPT-Llama-13B-v0.1 (few-shot),13016,32,4096,False,False,2856,2.67,49.26,79.05,0.22,56.78,65.99,25.56,28.26 danish-foundation-models/munin-7b-alpha (few-shot),7242,32,32768,True,False,6116,2.67,42.23,78.8,15.47,56.75,62.78,30.86,19.11 +timpal0l/Mistral-7B-v0.1-flashback-v2-instruct (few-shot),7242,32,32768,False,False,5172,2.67,46.74,77.06,14.0,56.74,62.56,30.87,15.79 meta-llama/Llama-3.2-3B-Instruct (few-shot),3213,128,131073,False,False,10424,2.68,43.74,76.98,16.01,48.38,64.98,29.44,22.42 mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,2.7,47.92,62.9,19.95,52.51,66.11,25.6,21.75 RuterNorway/Llama-2-13b-chat-norwegian (few-shot),-1,32,4096,False,False,3254,2.71,50.85,74.17,7.51,57.32,65.2,23.92,17.67 @@ -111,7 +111,7 @@ allenai/OLMo-1.7-7B-hf (few-shot),6888,50,4096,True,False,3371,2.93,41.25,76.6,6 ibm-granite/granite-3.0-2b-instruct (few-shot),2634,49,4097,True,False,10194,2.96,45.23,72.76,11.25,52.22,61.56,18.14,6.77 Qwen/Qwen1.5-4B-Chat (few-shot),3950,152,32768,False,False,4347,2.97,40.19,64.08,5.43,53.21,61.9,20.95,16.59 ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,2.97,52.85,73.93,8.27,48.49,60.98,13.69,5.68 -microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,3194,2.97,46.15,67.17,5.3,51.12,59.2,21.33,16.12 +microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,8681,2.97,46.15,67.17,5.3,51.12,59.2,21.33,16.12 AI-Sweden-Models/gpt-sw3-40b (few-shot),39927,64,1795,True,False,409,3.0,32.0,80.44,10.73,53.8,65.16,8.35,5.74 emillykkejensen/Phi-3-mini-4k-instruct-dansk (few-shot),3821,32,4096,False,False,1360,3.0,47.81,68.43,3.63,53.03,56.14,23.29,12.06 google/gemma-2-2b (few-shot),2614,256,8193,True,False,5235,3.0,30.45,76.36,6.06,55.19,63.12,20.8,6.24 @@ -133,10 +133,10 @@ openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False, AI-Sweden-Models/gpt-sw3-6.7b-v2 (few-shot),7111,64,2048,True,False,2351,3.18,28.73,77.47,8.78,50.57,62.41,5.23,5.39 norallm/normistral-7b-warm (few-shot),7248,33,2048,True,False,3175,3.2,48.78,76.09,2.53,48.93,57.49,1.28,1.27 AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot),20918,64,2048,True,False,1831,3.22,15.7,68.23,12.39,52.04,65.44,6.86,6.92 -meta-llama/Llama-3.2-1B-Instruct (few-shot),1236,128,131073,False,False,7436,3.23,41.6,71.86,3.72,43.57,56.69,14.64,3.1 LumiOpen/Viking-13B (few-shot),14030,131,4097,True,False,840,3.24,31.55,78.66,5.69,52.93,60.05,1.32,0.35 -stabilityai/stablelm-2-1_6b (few-shot),1645,100,4096,True,False,7259,3.25,38.0,75.15,1.04,53.11,55.63,8.72,3.19 +meta-llama/Llama-3.2-1B-Instruct (few-shot),1236,128,131073,False,False,7436,3.24,41.6,71.86,3.72,43.57,56.69,14.64,3.1 four-two-labs/lynx-micro (few-shot),2506,256,8192,False,False,12062,3.26,69.19,78.58,3.67,0.0,62.85,10.04,15.7 +stabilityai/stablelm-2-1_6b (few-shot),1645,100,4096,True,False,7259,3.26,38.0,75.15,1.04,53.11,55.63,8.72,3.19 ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,3.28,36.01,57.18,1.52,51.04,58.57,13.42,7.33 AI-Sweden-Models/gpt-sw3-6.7b-v2-instruct (few-shot),7111,64,2048,True,False,1473,3.31,14.58,56.6,10.92,50.18,64.89,6.16,10.9 google/gemma-2b (few-shot),2506,256,8192,True,False,6087,3.31,14.67,75.45,3.82,51.73,59.72,10.98,4.24 @@ -147,7 +147,7 @@ ibm-granite/granite-7b-base (few-shot),6738,32,2048,True,False,4405,3.33,33.34,7 HPLT/gpt-13b-nordic-prerelease (few-shot),14030,131,4099,True,False,3520,3.36,32.19,72.26,2.39,48.92,57.46,-0.49,0.5 LumiOpen/Viking-7B (few-shot),7550,131,4096,True,False,1431,3.36,30.64,72.02,1.08,48.72,57.93,1.14,1.13 HuggingFaceTB/SmolLM2-1.7B-Instruct (few-shot),1711,49,8192,True,False,15971,3.37,37.37,64.46,4.49,43.92,54.5,8.61,4.51 -AI-Sweden-Models/gpt-sw3-1.3b-instruct (few-shot),1445,64,2048,True,False,4544,3.38,19.04,73.34,2.9,47.45,63.33,0.65,-0.18 +AI-Sweden-Models/gpt-sw3-1.3b-instruct (few-shot),1445,64,2048,True,False,4544,3.39,19.04,73.34,2.9,47.45,63.33,0.65,-0.18 ibm-granite/granite-3b-code-base-2k (few-shot),3483,49,2048,True,False,2732,3.4,51.76,70.61,6.24,44.67,41.31,7.41,5.42 ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,3.4,50.1,65.67,4.55,42.83,45.16,7.58,3.79 ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,3.4,29.08,65.51,0.5,46.52,56.02,8.0,5.76 @@ -155,7 +155,7 @@ NorwAI/NorwAI-Mistral-7B-instruct (few-shot),7537,68,4065,False,False,3027,3.42, openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.42,37.17,20.2,6.13,46.66,65.28,10.89,9.2 meta-llama/Llama-3.2-1B (few-shot),1236,128,131073,True,False,7577,3.46,29.89,74.33,1.06,46.89,52.06,0.93,0.09 Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,3.48,20.94,52.54,0.34,43.55,61.19,10.74,4.83 -Qwen/Qwen1.5-4B (few-shot),3950,152,32768,True,False,3248,3.5,37.26,5.2,1.85,54.15,58.24,22.04,14.76 +Qwen/Qwen1.5-4B (few-shot),3950,152,32768,True,False,3248,3.49,37.26,5.2,1.85,54.15,58.24,22.04,14.76 AI-Sweden-Models/gpt-sw3-6.7b (few-shot),7111,64,2048,True,False,2285,3.51,18.83,53.68,3.49,49.81,61.05,1.22,0.6 AI-Sweden-Models/gpt-sw3-1.3b (few-shot),1445,64,2048,True,False,4608,3.52,6.08,71.38,1.17,45.55,60.11,2.2,0.67 HPLT/gpt-7b-nordic-prerelease (few-shot),7550,131,4096,True,False,1382,3.53,27.07,61.96,2.65,46.16,55.11,0.32,-0.0 @@ -167,18 +167,19 @@ allenai/OLMo-7B (few-shot),6888,50,2051,True,False,5403,3.66,37.36,72.08,-0.86,4 AI-Sweden-Models/gpt-sw3-356m-instruct (few-shot),471,64,2048,True,False,5855,3.67,14.84,59.0,0.06,34.37,61.28,0.48,0.33 mhenrichsen/danskgpt-tiny-chat (few-shot),1100,32,2048,False,False,1745,3.67,27.31,45.94,-0.97,35.57,55.79,0.14,0.52 ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,3.71,23.26,55.06,1.81,35.49,54.8,-1.51,0.41 -PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.75,21.42,45.75,-0.25,32.71,57.21,1.57,0.36 +PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.74,21.42,45.75,-0.25,32.71,57.21,1.57,0.36 allenai/OLMo-7B-Twin-2T (few-shot),6888,50,2051,True,False,5484,3.77,20.49,70.04,2.28,45.85,39.53,0.69,0.12 RuterNorway/Llama-2-7b-chat-norwegian (few-shot),-1,32,4096,False,False,10890,3.79,22.38,31.11,0.09,44.36,55.44,1.12,-0.91 google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,3.81,33.51,43.97,0.53,39.39,40.55,11.06,1.03 Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,3.83,28.96,26.58,-1.88,34.59,53.36,6.52,1.91 NbAiLab/nb-gpt-j-6B-alpaca (few-shot),6055,50,1024,False,False,2607,3.87,13.28,60.17,1.52,37.23,46.68,-0.03,0.02 +tiiuae/Falcon3-1B-Instruct (few-shot),1669,131,8192,True,False,9270,3.87,26.41,25.99,1.64,21.39,58.09,3.51,1.05 Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,3.89,18.57,40.23,0.21,29.49,53.29,2.59,-0.84 AI-Sweden-Models/gpt-sw3-356m (few-shot),471,64,2048,True,False,5758,3.92,23.77,34.29,1.57,33.7,51.36,-0.96,0.3 mhenrichsen/danskgpt-tiny (few-shot),1100,32,2048,True,False,8597,3.95,23.92,31.93,0.46,30.81,52.68,-0.85,-1.24 NbAiLab/nb-llama-3.2-1B (few-shot),1236,128,131072,True,False,3424,3.97,4.49,73.13,2.5,22.14,47.1,0.88,-1.21 NbAiLab/nb-llama-3.2-3B (few-shot),3213,128,131072,True,False,1880,4.0,1.37,72.06,8.44,0.45,51.36,10.37,2.74 -AI-Sweden-Models/gpt-sw3-126m-instruct (few-shot),186,64,2048,True,False,7717,4.06,23.05,12.47,0.08,20.43,59.8,0.72,0.11 +AI-Sweden-Models/gpt-sw3-126m-instruct (few-shot),186,64,2048,True,False,7717,4.05,23.05,12.47,0.08,20.43,59.8,0.72,0.11 allenai/OLMo-1B (few-shot),1177,50,2051,True,False,8536,4.07,29.39,38.95,-1.35,17.85,43.75,-0.22,0.75 PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,4.08,36.29,39.68,0.96,32.64,28.38,1.35,-0.19 HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,4.25,18.22,11.52,1.72,27.27,45.57,0.69,0.68 @@ -190,12 +191,12 @@ HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.33,19.15,-3 PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,4.39,14.09,23.71,1.74,32.0,28.3,0.94,-0.48 HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.42,13.64,9.34,2.2,26.06,37.49,-0.0,0.78 HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.5,17.09,7.41,0.47,11.73,38.3,0.3,0.06 -PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.53,16.28,17.38,-0.45,17.78,27.12,-1.38,0.94 +PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.52,16.28,17.38,-0.45,17.78,27.12,-1.38,0.94 PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.57,22.09,14.15,-0.04,21.6,27.08,-0.65,0.48 NbAiLab/nb-gpt-j-6B-v2 (few-shot),6051,50,1024,False,False,2556,4.61,0.31,27.42,0.07,17.82,27.09,-0.67,0.86 NbAiLab/nb-gpt-j-6B@sharded (few-shot),-1,50,1024,True,False,2630,4.76,0.01,33.5,-0.02,4.79,26.97,-0.11,0.56 peter-sk/gpt-neox-da (few-shot),1515,50,1024,True,False,6025,4.78,0.26,4.75,-0.6,0.06,41.84,-0.41,0.52 -ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.82,0.0,0.0,0.0,0.0,40.82,1.19,1.55 +ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.81,0.0,0.0,0.0,0.0,40.82,1.19,1.55 NorGLM/NorGPT-369M (few-shot),-1,64,1024,True,False,19896,4.83,1.47,5.5,-2.19,0.1,37.4,-0.53,0.25 Sigurdur/icebreaker (few-shot),110,32,1024,False,False,48619,4.88,0.0,-3.6,0.0,0.0,39.68,-0.2,-0.25 ai-forever/mGPT (few-shot),-1,100,1024,True,False,11734,4.91,0.0,0.0,0.49,6.24,31.89,-0.37,0.36 diff --git a/swedish-nlg.md b/swedish-nlg.md index 036d48e5..ca21f006 100644 --- a/swedish-nlg.md +++ b/swedish-nlg.md @@ -3,7 +3,7 @@ layout: leaderboard title: Swedish NLG 🇸🇪 --- -
Last updated: 10/01/2025 12:30:05 CET
+
Last updated: 11/01/2025 11:03:14 CET
@@ -155,29 +155,6 @@ title: Swedish NLG 🇸🇪 14.0.3 14.0.3 - - meta-llama/Meta-Llama-3-70B (few-shot, val) - 70554 - 128 - 8192 - True - 312 ± 55 / 177 ± 51 - 1.42 - 74.61 ± 2.99 / 56.50 ± 6.30 - 78.61 ± 1.40 / 78.64 ± 1.53 - 63.20 ± 3.34 / 80.61 ± 2.52 - 61.98 ± 1.65 / 66.85 ± 1.42 - 67.60 ± 0.41 / 22.47 ± 0.82 - 61.55 ± 1.68 / 71.02 ± 1.21 - 66.21 ± 3.22 / 73.40 ± 2.77 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - meta-llama/Llama-3.3-70B-Instruct (few-shot) 70554 @@ -201,6 +178,29 @@ title: Swedish NLG 🇸🇪 14.0.3 14.0.3 + + meta-llama/Meta-Llama-3-70B (few-shot, val) + 70554 + 128 + 8192 + True + 312 ± 55 / 177 ± 51 + 1.43 + 74.61 ± 2.99 / 56.50 ± 6.30 + 78.61 ± 1.40 / 78.64 ± 1.53 + 63.20 ± 3.34 / 80.61 ± 2.52 + 61.98 ± 1.65 / 66.85 ± 1.42 + 67.60 ± 0.41 / 22.47 ± 0.82 + 61.55 ± 1.68 / 71.02 ± 1.21 + 66.21 ± 3.22 / 73.40 ± 2.77 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + gpt-4o-2024-05-13 (zero-shot, val) unknown @@ -415,7 +415,7 @@ title: Swedish NLG 🇸🇪 8192 True 1,673 ± 583 / 275 ± 85 - 1.84 + 1.83 77.06 ± 2.72 / 67.75 ± 5.69 53.56 ± 7.15 / 67.07 ± 3.93 47.50 ± 3.37 / 71.31 ± 2.69 @@ -622,7 +622,7 @@ title: Swedish NLG 🇸🇪 4096 True 1,904 ± 475 / 361 ± 121 - 2.10 + 2.09 48.92 ± 3.54 / 30.97 ± 2.58 62.08 ± 12.16 / 66.00 ± 9.89 68.93 ± 11.34 / 82.77 ± 8.76 @@ -851,7 +851,7 @@ title: Swedish NLG 🇸🇪 128 131072 True - 1,005 ± 330 / 196 ± 74 + 1,473 ± 377 / 283 ± 96 2.30 55.80 ± 2.68 / 34.65 ± 1.98 79.23 ± 0.48 / 76.86 ± 0.80 @@ -1052,29 +1052,6 @@ title: Swedish NLG 🇸🇪 13.1.0 13.1.0 - - CohereForAI/aya-expanse-8b (few-shot) - 8028 - 256 - 8192 - False - 2,686 ± 685 / 491 ± 164 - 2.36 - 57.38 ± 1.93 / 29.69 ± 4.23 - 78.43 ± 0.93 / 74.54 ± 2.40 - 14.52 ± 2.43 / 45.18 ± 4.21 - 53.14 ± 1.81 / 63.00 ± 0.50 - 65.69 ± 0.22 / 19.95 ± 0.16 - 37.32 ± 0.70 / 52.95 ± 0.50 - 38.28 ± 1.31 / 53.70 ± 0.97 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - bineric/NorskGPT-Llama3-8b (few-shot) 8030 @@ -1144,6 +1121,29 @@ title: Swedish NLG 🇸🇪 12.3.2 12.3.2 + + CohereForAI/aya-expanse-8b (few-shot) + 8028 + 256 + 8192 + False + 2,686 ± 685 / 491 ± 164 + 2.37 + 57.38 ± 1.93 / 29.69 ± 4.23 + 78.43 ± 0.93 / 74.54 ± 2.40 + 14.52 ± 2.43 / 45.18 ± 4.21 + 53.14 ± 1.81 / 63.00 ± 0.50 + 65.69 ± 0.22 / 19.95 ± 0.16 + 37.32 ± 0.70 / 52.95 ± 0.50 + 38.28 ± 1.31 / 53.70 ± 0.97 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + mlabonne/NeuralBeagle14-7B (few-shot, val) 7242 @@ -1288,7 +1288,7 @@ title: Swedish NLG 🇸🇪 128 8192 True - 1,007 ± 316 / 162 ± 45 + 1,483 ± 377 / 287 ± 97 2.41 69.67 ± 1.30 / 52.94 ± 4.01 59.93 ± 4.70 / 67.54 ± 3.04 @@ -1541,7 +1541,7 @@ title: Swedish NLG 🇸🇪 128 8192 True - 1,335 ± 338 / 260 ± 88 + 1,477 ± 376 / 285 ± 97 2.47 59.92 ± 2.46 / 40.98 ± 4.90 80.91 ± 0.41 / 78.09 ± 1.22 @@ -2041,29 +2041,6 @@ title: Swedish NLG 🇸🇪 11.0.0 11.0.0 - - timpal0l/Mistral-7B-v0.1-flashback-v2-instruct (few-shot) - 7242 - 32 - 32768 - False - 5,172 ± 813 / 1,647 ± 518 - 2.66 - 46.74 ± 4.30 / 33.57 ± 4.51 - 77.06 ± 1.82 / 79.02 ± 1.37 - 14.00 ± 1.59 / 53.89 ± 3.10 - 56.74 ± 0.52 / 63.45 ± 0.49 - 62.56 ± 0.85 / 15.85 ± 0.34 - 30.87 ± 1.35 / 47.77 ± 1.01 - 15.79 ± 1.57 / 35.66 ± 0.84 - 12.5.2 - 12.3.2 - 12.3.2 - 12.4.0 - 12.4.0 - 12.3.2 - 12.3.2 - bineric/NorskGPT-Llama-13B-v0.1 (few-shot) 13016 @@ -2110,6 +2087,29 @@ title: Swedish NLG 🇸🇪 12.4.0 12.4.0 + + timpal0l/Mistral-7B-v0.1-flashback-v2-instruct (few-shot) + 7242 + 32 + 32768 + False + 5,172 ± 813 / 1,647 ± 518 + 2.67 + 46.74 ± 4.30 / 33.57 ± 4.51 + 77.06 ± 1.82 / 79.02 ± 1.37 + 14.00 ± 1.59 / 53.89 ± 3.10 + 56.74 ± 0.52 / 63.45 ± 0.49 + 62.56 ± 0.85 / 15.85 ± 0.34 + 30.87 ± 1.35 / 47.77 ± 1.01 + 15.79 ± 1.57 / 35.66 ± 0.84 + 12.5.2 + 12.3.2 + 12.3.2 + 12.4.0 + 12.4.0 + 12.3.2 + 12.3.2 + meta-llama/Llama-3.2-3B-Instruct (few-shot) 3213 @@ -2622,7 +2622,7 @@ title: Swedish NLG 🇸🇪 32 4096 True - 3,194 ± 687 / 650 ± 216 + 8,681 ± 1,650 / 2,177 ± 717 2.97 46.15 ± 2.77 / 24.28 ± 3.74 67.17 ± 1.93 / 70.99 ± 1.64 @@ -3122,29 +3122,6 @@ title: Swedish NLG 🇸🇪 9.3.1 9.3.1 - - meta-llama/Llama-3.2-1B-Instruct (few-shot) - 1236 - 128 - 131073 - False - 7,436 ± 1,846 / 1,508 ± 479 - 3.23 - 41.60 ± 2.74 / 37.22 ± 3.26 - 71.86 ± 2.01 / 71.15 ± 2.16 - 3.72 ± 1.40 / 48.04 ± 1.96 - 43.57 ± 1.35 / 52.90 ± 1.42 - 56.69 ± 0.79 / 13.27 ± 0.37 - 14.64 ± 0.91 / 35.56 ± 0.80 - 3.10 ± 0.87 / 26.60 ± 0.68 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - 13.0.0 - LumiOpen/Viking-13B (few-shot) 14030 @@ -3169,27 +3146,27 @@ title: Swedish NLG 🇸🇪 12.10.5 - stabilityai/stablelm-2-1_6b (few-shot) - 1645 - 100 - 4096 - True - 7,259 ± 2,120 / 1,240 ± 432 - 3.25 - 38.00 ± 4.39 / 29.74 ± 5.04 - 75.15 ± 0.55 / 61.46 ± 0.82 - 1.04 ± 2.08 / 34.49 ± 1.17 - 53.11 ± 0.53 / 58.91 ± 0.32 - 55.63 ± 1.02 / 13.45 ± 0.61 - 8.72 ± 0.93 / 30.92 ± 0.72 - 3.19 ± 0.62 / 26.88 ± 0.62 - 12.10.8 - 12.10.8 - 12.10.8 - 12.10.8 - 12.10.8 - 12.10.8 - 12.10.8 + meta-llama/Llama-3.2-1B-Instruct (few-shot) + 1236 + 128 + 131073 + False + 7,436 ± 1,846 / 1,508 ± 479 + 3.24 + 41.60 ± 2.74 / 37.22 ± 3.26 + 71.86 ± 2.01 / 71.15 ± 2.16 + 3.72 ± 1.40 / 48.04 ± 1.96 + 43.57 ± 1.35 / 52.90 ± 1.42 + 56.69 ± 0.79 / 13.27 ± 0.37 + 14.64 ± 0.91 / 35.56 ± 0.80 + 3.10 ± 0.87 / 26.60 ± 0.68 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 + 13.0.0 four-two-labs/lynx-micro (few-shot) @@ -3214,6 +3191,29 @@ title: Swedish NLG 🇸🇪 14.1.1 14.1.1 + + stabilityai/stablelm-2-1_6b (few-shot) + 1645 + 100 + 4096 + True + 7,259 ± 2,120 / 1,240 ± 432 + 3.26 + 38.00 ± 4.39 / 29.74 ± 5.04 + 75.15 ± 0.55 / 61.46 ± 0.82 + 1.04 ± 2.08 / 34.49 ± 1.17 + 53.11 ± 0.53 / 58.91 ± 0.32 + 55.63 ± 1.02 / 13.45 ± 0.61 + 8.72 ± 0.93 / 30.92 ± 0.72 + 3.19 ± 0.62 / 26.88 ± 0.62 + 12.10.8 + 12.10.8 + 12.10.8 + 12.10.8 + 12.10.8 + 12.10.8 + 12.10.8 + ibm-granite/granite-3.0-3b-a800m-base (few-shot) 3374 @@ -3451,7 +3451,7 @@ title: Swedish NLG 🇸🇪 2048 True 4,544 ± 1,000 / 1,106 ± 359 - 3.38 + 3.39 19.04 ± 2.67 / 19.98 ± 2.64 73.34 ± 1.34 / 68.41 ± 2.31 2.90 ± 1.74 / 44.43 ± 4.49 @@ -3635,7 +3635,7 @@ title: Swedish NLG 🇸🇪 32768 True 3,248 ± 739 / 761 ± 252 - 3.50 + 3.49 37.26 ± 4.28 / 29.89 ± 5.96 5.20 ± 7.35 / 30.65 ± 4.97 1.85 ± 1.54 / 33.71 ± 0.46 @@ -3911,7 +3911,7 @@ title: Swedish NLG 🇸🇪 4096 True 6,513 ± 1,241 / 1,282 ± 644 - 3.75 + 3.74 21.42 ± 6.21 / 20.27 ± 5.32 45.75 ± 7.94 / 50.33 ± 6.88 -0.25 ± 1.25 / 44.95 ± 2.93 @@ -4042,6 +4042,29 @@ title: Swedish NLG 🇸🇪 10.0.1 10.0.1 + + tiiuae/Falcon3-1B-Instruct (few-shot) + 1669 + 131 + 8192 + True + 9,270 ± 2,690 / 1,434 ± 437 + 3.87 + 26.41 ± 2.74 / 25.18 ± 2.46 + 25.99 ± 3.51 / 35.36 ± 4.53 + 1.64 ± 1.90 / 37.52 ± 2.69 + 21.39 ± 1.66 / 26.70 ± 1.83 + 58.09 ± 0.53 / 13.29 ± 0.62 + 3.51 ± 1.35 / 27.14 ± 1.02 + 1.05 ± 1.05 / 25.62 ± 0.84 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + Qwen/Qwen1.5-0.5B-Chat (few-shot) 620 @@ -4164,7 +4187,7 @@ title: Swedish NLG 🇸🇪 2048 True 7,717 ± 1,553 / 2,013 ± 625 - 4.06 + 4.05 23.05 ± 2.31 / 24.35 ± 1.99 12.47 ± 7.10 / 23.03 ± 8.78 0.08 ± 0.16 / 33.34 ± 0.30 @@ -4440,7 +4463,7 @@ title: Swedish NLG 🇸🇪 2048 True 2,331 ± 787 / 301 ± 97 - 4.53 + 4.52 16.28 ± 4.73 / 16.34 ± 4.84 17.38 ± 10.26 / 34.60 ± 5.85 -0.45 ± 0.67 / 36.19 ± 2.92 @@ -4555,7 +4578,7 @@ title: Swedish NLG 🇸🇪 8192 True 7,692 ± 1,423 / 1,960 ± 644 - 4.82 + 4.81 0.00 ± 0.00 / 0.00 ± 0.00 0.00 ± 0.00 / 19.16 ± 0.14 0.00 ± 0.00 / 33.30 ± 0.27 diff --git a/swedish-nlu.csv b/swedish-nlu.csv index 51b6ae8d..0609efb9 100644 --- a/swedish-nlu.csv +++ b/swedish-nlu.csv @@ -14,12 +14,12 @@ AI-Sweden-Models/bert-large-nordic-pile-1M-steps,369,64,512,True,False,6571,1.45 KB/bert-base-swedish-cased,125,50,512,True,False,16181,1.46,81.95,75.58,78.86,38.56 intfloat/multilingual-e5-large,560,250,512,True,False,6732,1.46,80.36,79.65,63.15,46.99 ltg/norbert3-large,354,50,508,True,False,5048,1.46,79.01,75.32,69.11,48.88 +KBLab/bert-base-swedish-cased,125,50,512,True,False,16164,1.47,81.23,75.73,78.6,38.56 google/rembert,576,250,256,True,False,11736,1.47,78.23,75.99,72.17,46.0 meta-llama/Llama-3.3-70B-Instruct (few-shot),70554,128,131072,True,False,1353,1.47,71.98,81.15,64.46,51.22 +microsoft/mdeberta-v3-base,279,251,512,True,False,20637,1.47,78.84,75.24,72.3,44.74 AI-Nordics/bert-large-swedish-cased,335,31,512,True,False,7199,1.48,78.61,77.47,72.87,43.11 -KBLab/bert-base-swedish-cased,125,50,512,True,False,16164,1.48,81.23,75.73,78.6,38.56 "gpt-4o-2024-05-13 (zero-shot, val)",-1,200,8191,True,False,637,1.48,75.06,74.85,65.23,53.02 -microsoft/mdeberta-v3-base,279,251,512,True,False,20637,1.48,78.84,75.24,72.3,44.74 "gpt-3.5-turbo-0613 (few-shot, val)",-1,100,4095,True,False,921,1.5,73.04,72.77,58.06,58.02 google/gemma-2-27b-it (few-shot),27227,256,8193,True,False,1516,1.51,62.59,80.73,61.37,58.76 Qwen/QwQ-32B-Preview (few-shot),32764,152,32768,True,False,2258,1.52,69.73,78.76,57.57,56.43 @@ -40,9 +40,9 @@ AI-Sweden-Models/Llama-3-8B-instruct (few-shot),8030,128,8192,False,False,1472,1 KennethEnevoldsen/dfm-sentence-encoder-large-1,355,50,512,True,False,6245,1.66,71.65,74.92,63.43,46.2 "152334H/miqu-1-70b-sf (few-shot, val)",68977,32,32764,True,False,2126,1.68,62.96,75.25,53.28,56.42 KennethEnevoldsen/dfm-sentence-encoder-large-2,355,50,512,True,False,6569,1.68,71.86,74.67,62.77,44.77 -setu4993/LaBSE,471,501,512,True,False,25418,1.7,77.78,73.58,60.36,41.71 +setu4993/LaBSE,471,501,512,True,False,25418,1.69,77.78,73.58,60.36,41.71 vesteinn/FoBERT,124,50,512,True,False,15623,1.7,78.58,73.41,71.14,31.62 -KennethEnevoldsen/dfm-sentence-encoder-medium-3,178,120,512,True,False,14050,1.72,81.35,71.16,63.89,37.18 +KennethEnevoldsen/dfm-sentence-encoder-medium-3,178,120,512,True,False,14050,1.71,81.35,71.16,63.89,37.18 google/gemma-2-9b-it (few-shot),9242,256,8193,True,False,2062,1.72,52.5,78.51,61.28,55.22 NbAiLab/nb-bert-base,178,120,512,True,False,14050,1.73,80.38,71.21,64.03,35.33 google/gemma-2-9b (few-shot),9242,256,8193,True,False,2038,1.73,50.43,80.55,50.86,59.35 @@ -88,10 +88,10 @@ nvidia/mistral-nemo-minitron-8b-base (few-shot),8414,131,8192,True,False,2470,2. "birgermoell/Flashback-Bellman (few-shot, val)",7242,32,32768,False,True,2887,2.03,55.29,78.29,18.45,58.42 meta-llama/Llama-3.1-8B (few-shot),8030,128,131072,True,False,2986,2.04,62.19,80.31,30.29,42.78 ZurichNLP/unsup-simcse-xlm-roberta-base,278,250,512,True,False,34520,2.05,75.49,71.12,36.69,33.55 -meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1335,2.05,59.92,80.91,26.39,47.69 +meta-llama/Meta-Llama-3-8B (few-shot),8030,128,8192,True,False,1477,2.05,59.92,80.91,26.39,47.69 Geotrend/bert-base-25lang-cased,151,85,512,True,False,13908,2.06,75.62,62.5,38.18,40.96 "RJuro/munin-neuralbeagle-SkoleGPTOpenOrca-7b (few-shot, val)",7242,32,32768,False,True,3008,2.06,59.36,72.04,22.38,57.96 -meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1005,2.06,55.8,79.23,32.67,46.88 +meta-llama/Llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1473,2.06,55.8,79.23,32.67,46.88 "birgermoell/Rapid-Cycling (few-shot, val)",7242,32,32768,False,True,2346,2.07,53.66,77.72,16.22,59.75 Mabeck/Heidrun-Mistral-7B-chat (few-shot),7242,32,32768,False,False,1419,2.08,55.06,77.5,17.47,58.67 "birgermoell/BeagleCatMunin-Flashback-Bellman (few-shot, val)",7242,32,32768,False,True,2890,2.08,52.96,76.99,14.27,59.92 @@ -99,14 +99,14 @@ google-bert/bert-base-multilingual-uncased,167,106,512,True,False,13993,2.08,70. danish-foundation-models/munin-7b-v0.1dev0 (few-shot),7242,32,8192,True,False,6113,2.09,47.1,73.05,30.29,57.39 birgermoell/roberta-swedish-scandi,125,50,512,True,False,15385,2.1,68.55,69.96,52.88,27.99 "merge-crew/da-sv-ties (few-shot, val)",7242,32,32768,True,True,2457,2.1,48.36,76.57,20.94,59.07 -Geotrend/bert-base-en-da-cased,111,33,512,True,False,14062,2.12,74.88,61.89,40.22,39.95 -Geotrend/bert-base-en-fr-de-no-da-cased,118,42,512,True,False,13973,2.12,76.55,61.6,37.44,39.32 -Geotrend/bert-base-en-no-cased,111,33,512,True,False,14081,2.12,75.33,61.8,36.62,39.95 +Geotrend/bert-base-en-da-cased,111,33,512,True,False,14062,2.11,74.88,61.89,40.22,39.95 +Geotrend/bert-base-en-fr-de-no-da-cased,118,42,512,True,False,13973,2.11,76.55,61.6,37.44,39.32 +Geotrend/bert-base-en-no-cased,111,33,512,True,False,14081,2.11,75.33,61.8,36.62,39.95 Mabeck/Heidrun-Mistral-7B-base (few-shot),7242,32,32768,True,False,3823,2.12,48.43,79.43,17.37,57.05 +Twitter/twhin-bert-base,279,250,512,True,False,11514,2.12,70.17,66.62,46.72,31.38 +"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,2.12,62.45,77.69,68.93,12.11 "meta-llama/Llama-2-70b-chat-hf (few-shot, val)",68977,32,4096,True,False,1979,2.12,55.91,64.52,23.85,58.88 CohereForAI/aya-expanse-8b (few-shot),8028,256,8192,False,False,2686,2.13,57.38,78.43,14.52,53.14 -Twitter/twhin-bert-base,279,250,512,True,False,11514,2.13,70.17,66.62,46.72,31.38 -"gpt-4o-mini-2024-07-18 (few-shot, val)",-1,200,8191,True,False,784,2.13,62.45,77.69,68.93,12.11 "merge-crew/da-sv-dare-ties-density-0.6 (few-shot, val)",7242,32,32768,True,True,2515,2.13,45.12,78.74,19.74,60.15 "timpal0l/BeagleCatMunin2 (few-shot, val)",7242,32,32768,False,True,2477,2.13,60.87,73.72,6.78,58.75 utter-project/EuroLLM-9B-Instruct (few-shot),9152,128,4096,False,False,1483,2.13,40.59,76.02,33.98,56.98 @@ -119,33 +119,33 @@ mgoin/Nemotron-4-340B-Instruct-hf-FP8 (few-shot),341029,256,4096,True,False,1904 mhenrichsen/danskgpt-chat-v2.1 (few-shot),-1,32,32768,True,False,5085,2.15,54.37,75.98,17.98,55.07 AI-Sweden-Models/Llama-3-8B (few-shot),8030,128,8192,True,False,4141,2.16,36.45,81.12,26.8,58.16 ibm-granite/granite-3.0-8b-base (few-shot),8171,49,4097,True,False,2515,2.16,44.8,75.92,24.84,56.71 -meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1007,2.16,69.67,59.93,27.63,49.84 +meta-llama/Meta-Llama-3-8B-Instruct (few-shot),8030,128,8192,True,False,1483,2.16,69.67,59.93,27.63,49.84 Geotrend/bert-base-da-cased,104,23,512,True,False,15432,2.17,74.13,62.18,36.93,37.59 NorwAI/NorwAI-Mixtral-8x7B (few-shot),46998,68,32768,True,False,2368,2.17,57.01,80.12,43.04,30.44 bineric/NorskGPT-Llama3-8b (few-shot),8030,128,8192,False,False,3382,2.17,63.19,76.06,5.34,56.7 Nexusflow/Starling-LM-7B-beta (few-shot),7242,32,4096,False,False,4136,2.18,56.28,77.51,23.25,47.09 +jonfd/electra-small-nordic,22,96,128,True,False,5989,2.18,71.07,66.42,69.19,11.85 meta-llama/Llama-2-13b-chat-hf (few-shot),13016,32,4096,True,False,2849,2.18,49.9,77.19,14.67,57.12 mhenrichsen/hestenettetLM (few-shot),7242,32,32768,True,False,1151,2.18,53.0,79.7,4.32,59.03 mistralai/Ministral-8B-Instruct-2410 (few-shot),8020,131,32768,True,False,1302,2.18,54.76,73.32,16.17,57.94 mistralai/Mistral-7B-v0.1 (few-shot),7242,32,32768,True,False,1446,2.18,53.34,80.0,4.61,58.99 mistralai/Mixtral-8x7B-Instruct-v0.1 (few-shot),46703,32,32768,True,False,5535,2.18,46.15,80.33,32.89,46.51 alpindale/Mistral-7B-v0.2-hf (few-shot),7242,32,32768,True,False,1841,2.19,48.96,78.9,10.82,58.91 -jonfd/electra-small-nordic,22,96,128,True,False,5989,2.19,71.07,66.42,69.19,11.85 mistralai/Mistral-7B-v0.3 (few-shot),7248,33,32768,True,False,1364,2.19,49.18,79.08,11.06,58.98 "KennethEnevoldsen/munin_mistral-7b (few-shot, val)",7242,32,32768,False,True,2543,2.21,52.34,77.66,6.0,60.16 ThatsGroes/munin-SkoleGPTOpenOrca-7b-16bit (few-shot),7242,32,32768,False,False,3006,2.21,44.64,77.98,16.57,57.31 ibm-granite/granite-3.0-8b-instruct (few-shot),8171,49,4096,True,False,1118,2.21,44.94,76.78,16.96,56.83 jhu-clsp/bernice,278,250,128,True,False,5567,2.21,71.34,70.91,53.52,16.41 ltg/norbert3-small,41,50,508,True,False,13515,2.22,74.22,63.8,37.77,31.45 -vesteinn/DanskBERT,124,50,512,True,False,15749,2.24,72.33,67.77,33.79,32.71 +vesteinn/DanskBERT,124,50,512,True,False,15749,2.23,72.33,67.77,33.79,32.71 bineric/NorskGPT-Mistral-7b (few-shot),7242,32,32768,False,False,1440,2.25,58.4,74.3,0.0,59.16 danish-foundation-models/munin-7b-alpha (few-shot),7242,32,32768,True,False,6116,2.25,42.23,78.8,15.47,56.75 meta-llama/Llama-3.2-3B (few-shot),3213,128,131073,True,False,3713,2.25,51.06,77.76,5.88,57.43 timpal0l/Mistral-7B-v0.1-flashback-v2-instruct (few-shot),7242,32,32768,False,False,5172,2.25,46.74,77.06,14.0,56.74 RuterNorway/Llama-2-13b-chat-norwegian (few-shot),-1,32,4096,False,False,3254,2.27,50.85,74.17,7.51,57.32 microsoft/xlm-align-base,278,250,512,True,False,14744,2.27,78.6,73.67,15.41,32.41 +meta-llama/Llama-2-7b-hf (few-shot),6738,32,4096,True,False,930,2.28,44.11,79.05,7.34,57.49 DDSC/roberta-base-scandinavian,125,50,512,True,False,14491,2.29,58.84,72.28,37.61,30.59 -meta-llama/Llama-2-7b-hf (few-shot),6738,32,4096,True,False,930,2.29,44.11,79.05,7.34,57.49 occiglot/occiglot-7b-eu5 (few-shot),7242,32,32768,True,False,2219,2.29,49.02,76.56,2.18,58.98 senseable/WestLake-7B-v2 (few-shot),7242,32,32768,False,False,5993,2.32,58.9,67.74,16.52,49.41 microsoft/infoxlm-base,278,250,512,True,False,34735,2.33,79.43,71.48,7.26,33.72 @@ -154,11 +154,11 @@ CohereForAI/c4ai-command-r-v01 (few-shot),34981,256,8192,False,False,1919,2.34,4 bineric/NorskGPT-Llama-13B-v0.1 (few-shot),-1,32,4096,False,False,2856,2.34,49.26,79.05,0.22,56.78 distilbert/distilbert-base-multilingual-cased,135,120,512,True,False,26355,2.35,70.08,59.66,33.71,31.48 ibm-granite/granite-8b-code-base-4k (few-shot),8055,49,4096,True,False,2313,2.35,59.77,74.45,3.97,50.18 +sentence-transformers/paraphrase-xlm-r-multilingual-v1,278,250,512,True,False,20154,2.35,70.22,71.33,39.6,18.65 +clips/mfaq,278,250,128,True,False,5591,2.36,76.31,73.32,32.29,16.12 meta-llama/Llama-3.2-3B-Instruct (few-shot),3213,128,131073,False,False,10424,2.36,43.74,76.98,16.01,48.38 occiglot/occiglot-7b-eu5-instruct (few-shot),7242,32,32768,False,False,2088,2.36,47.67,71.73,7.9,57.78 -sentence-transformers/paraphrase-xlm-r-multilingual-v1,278,250,512,True,False,20154,2.36,70.22,71.33,39.6,18.65 LumiOpen/Viking-33B@1000B (few-shot),33119,131,4099,True,False,2080,2.37,42.35,77.68,8.08,54.57 -clips/mfaq,278,250,128,True,False,5591,2.37,76.31,73.32,32.29,16.12 ibm-granite/granite-8b-code-instruct-4k (few-shot),8055,49,4096,True,False,5617,2.37,52.85,73.93,8.27,48.49 Geotrend/distilbert-base-25lang-cased,109,85,512,True,False,26099,2.38,70.56,60.69,30.83,31.41 Twitter/twhin-bert-large,561,250,512,True,False,9707,2.38,74.26,63.35,16.07,36.77 @@ -176,8 +176,8 @@ AI-Sweden-Models/gpt-sw3-40b (few-shot),39927,64,2048,True,False,409,2.43,32.0,8 Geotrend/distilbert-base-da-cased,61,23,512,True,False,28950,2.43,69.25,58.47,29.8,30.61 Geotrend/distilbert-base-en-da-cased,69,33,512,True,False,26196,2.43,69.62,59.42,29.01,31.82 MaLA-LM/emma-500-llama2-7b (few-shot),6738,32,4096,True,False,6275,2.43,41.49,75.64,0.66,57.48 +mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,2.43,47.92,62.9,19.95,52.51 dbmdz/bert-base-historic-multilingual-cased,111,32,512,True,False,20047,2.44,68.83,64.25,28.62,28.78 -mistralai/Mistral-7B-Instruct-v0.2 (few-shot),7242,32,32768,False,False,2370,2.44,47.92,62.9,19.95,52.51 01-ai/Yi-1.5-6B (few-shot),6061,64,4097,True,False,2867,2.45,45.55,70.71,4.83,55.25 Geotrend/distilbert-base-en-no-cased,69,33,512,True,False,26597,2.45,69.28,59.53,29.36,30.42 neph1/bellman-7b-mistral-instruct-v0.2 (few-shot),7242,32,32768,False,False,2518,2.45,54.38,55.84,16.05,53.22 @@ -193,7 +193,7 @@ google/gemma-2-2b (few-shot),2614,256,8193,True,False,5235,2.53,30.45,76.36,6.06 mideind/IceBERT-xlmr-ic3,278,250,512,True,False,11004,2.53,70.57,66.01,10.2,30.71 sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2,118,250,512,True,False,29201,2.53,66.5,72.19,28.75,15.91 bineric/NorskGPT-Llama-7B-v0.1 (few-shot),6738,32,4096,False,False,5384,2.54,53.95,60.91,0.32,55.28 -microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,3194,2.54,46.15,67.17,5.3,51.12 +microsoft/Phi-3-mini-4k-instruct (few-shot),3821,32,4096,True,False,8681,2.54,46.15,67.17,5.3,51.12 stabilityai/stablelm-2-1_6b (few-shot),1645,100,4096,True,False,7259,2.55,38.0,75.15,1.04,53.11 meta-llama/Llama-2-7b-chat-hf (few-shot),6738,32,4096,False,False,2643,2.58,39.72,66.18,6.74,54.05 norallm/normistral-7b-warm-instruct (few-shot),-1,33,2048,True,False,6194,2.58,51.45,63.64,5.8,48.95 @@ -207,11 +207,11 @@ NbAiLab/nb-llama-3.1-70B (few-shot),70554,128,131072,True,False,1220,2.62,69.54, Qwen/Qwen1.5-4B-Chat (few-shot),3950,152,32768,False,False,4347,2.64,40.19,64.08,5.43,53.21 "claude-3-5-haiku-20241022 (zero-shot, val)",-1,-1,200000,True,False,277,2.64,57.06,59.89,9.3,39.97 ibm-granite/granite-3b-code-instruct-2k (few-shot),3483,49,2048,True,False,9059,2.65,50.1,65.67,4.55,42.83 -meta-llama/Llama-3.2-1B-Instruct (few-shot),1236,128,131073,False,False,7436,2.67,41.6,71.86,3.72,43.57 +meta-llama/Llama-3.2-1B-Instruct (few-shot),1236,128,131073,False,False,7436,2.66,41.6,71.86,3.72,43.57 HPLT/gpt-13b-nordic-prerelease (few-shot),14030,131,4099,True,False,3520,2.69,32.19,72.26,2.39,48.92 ibm-granite/granite-7b-base (few-shot),6738,32,2048,True,False,4405,2.69,33.34,72.0,0.25,52.53 -sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking,135,120,512,True,False,33753,2.7,65.5,68.33,14.81,16.11 -sentence-transformers/quora-distilbert-multilingual,135,120,512,True,False,26458,2.7,65.5,68.36,14.81,16.11 +sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking,135,120,512,True,False,33753,2.69,65.5,68.33,14.81,16.11 +sentence-transformers/quora-distilbert-multilingual,135,120,512,True,False,26458,2.69,65.5,68.36,14.81,16.11 LumiOpen/Viking-7B (few-shot),7550,131,4096,True,False,1431,2.71,30.64,72.02,1.08,48.72 meta-llama/Llama-3.2-1B (few-shot),1236,128,131073,True,False,7577,2.71,29.89,74.33,1.06,46.89 allenai/OLMo-7B (few-shot),6888,50,2051,True,False,5403,2.72,37.36,72.08,-0.86,45.16 @@ -222,11 +222,11 @@ HuggingFaceTB/SmolLM2-1.7B (few-shot),1711,49,8192,True,False,16249,2.78,35.96,6 AI-Sweden-Models/gpt-sw3-20b-instruct (few-shot),20918,64,2048,True,False,1831,2.79,15.7,68.23,12.39,52.04 google/gemma-2b (few-shot),2506,256,8192,True,False,6087,2.81,14.67,75.45,3.82,51.73 ltg/norbert3-xs,15,50,508,True,False,14208,2.81,67.53,59.27,2.83,24.11 -microsoft/Phi-3-mini-128k-instruct (few-shot),3821,32,130947,True,False,7312,2.82,42.36,51.53,3.11,51.11 +microsoft/Phi-3-mini-128k-instruct (few-shot),3821,32,130947,True,False,7312,2.81,42.36,51.53,3.11,51.11 ibm-granite/granite-3.0-3b-a800m-base (few-shot),3374,49,4096,True,False,10504,2.85,36.01,57.18,1.52,51.04 +google/gemma-7b-it (few-shot),8538,256,8192,False,False,1792,2.86,59.26,28.63,11.43,46.67 openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot),7453,251,4096,False,False,1254,2.86,35.02,51.8,6.15,50.85 sarnikowski/convbert-medium-small-da-cased,24,29,512,True,False,13821,2.86,58.01,57.67,13.4,24.92 -google/gemma-7b-it (few-shot),8538,256,8192,False,False,1792,2.87,59.26,28.63,11.43,46.67 nvidia/mistral-nemo-minitron-8b-instruct (few-shot),8414,131,8192,True,False,3161,2.87,48.51,78.68,29.18,0.0 AI-Sweden-Models/gpt-sw3-1.3b-instruct (few-shot),1445,64,2048,True,False,4544,2.88,19.04,73.34,2.9,47.45 KBLab/albert-base-swedish-cased-alpha,14,50,512,True,False,15925,2.88,47.19,56.57,20.92,23.86 @@ -234,9 +234,9 @@ NbAiLab/nb-llama-3.1-8B (few-shot),8030,128,131072,True,False,1297,2.88,53.65,80 allenai/OLMo-7B-Twin-2T (few-shot),6888,50,2051,True,False,5484,2.88,20.49,70.04,2.28,45.85 ibm-granite/granite-7b-instruct (few-shot),6738,32,4096,True,False,3136,2.88,29.08,65.51,0.5,46.52 sarnikowski/electra-small-discriminator-da-256-cased,13,29,512,True,False,20340,2.92,52.79,57.93,14.72,20.54 +HPLT/gpt-7b-nordic-prerelease (few-shot),7550,131,4096,True,False,1382,2.93,27.07,61.96,2.65,46.16 four-two-labs/lynx-micro (few-shot),2506,256,8192,False,False,12062,2.93,69.19,78.58,3.67,0.0 -HPLT/gpt-7b-nordic-prerelease (few-shot),7550,131,4096,True,False,1382,2.94,27.07,61.96,2.65,46.16 -state-spaces/mamba-2.8b-hf (few-shot),2768,50,32769,True,False,2722,2.96,23.25,71.7,-0.82,40.48 +state-spaces/mamba-2.8b-hf (few-shot),2768,50,32769,True,False,2722,2.95,23.25,71.7,-0.82,40.48 sarnikowski/convbert-small-da-cased,13,29,512,True,False,14273,3.0,55.06,53.7,12.38,22.53 AI-Sweden-Models/gpt-sw3-1.3b (few-shot),1445,64,2048,True,False,4608,3.02,6.08,71.38,1.17,45.55 AI-Sweden-Models/gpt-sw3-6.7b-v2-instruct (few-shot),7111,64,2048,True,False,1473,3.02,14.58,56.6,10.92,50.18 @@ -250,36 +250,37 @@ jannikskytt/MeDa-Bert,111,32,511,True,False,16114,3.1,48.32,53.98,3.33,23.15 AI-Sweden-Models/gpt-sw3-6.7b (few-shot),7111,64,2048,True,False,2285,3.11,18.83,53.68,3.49,49.81 sentence-transformers/distiluse-base-multilingual-cased-v1,135,120,512,True,False,34042,3.14,49.86,60.06,3.18,16.08 NbAiLab/nb-llama-3.1-8B-Instruct (few-shot),8030,128,131072,True,False,1296,3.15,32.54,43.55,1.93,44.8 -sentence-transformers/distiluse-base-multilingual-cased-v2,135,120,512,True,False,33247,3.17,51.67,62.71,2.32,8.76 -sentence-transformers/distiluse-base-multilingual-cased,135,120,512,True,False,19206,3.17,51.67,63.04,2.32,8.93 +sentence-transformers/distiluse-base-multilingual-cased-v2,135,120,512,True,False,33247,3.16,51.67,62.71,2.32,8.76 +sentence-transformers/distiluse-base-multilingual-cased,135,120,512,True,False,19206,3.16,51.67,63.04,2.32,8.93 +Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,3.2,20.94,52.54,0.34,43.55 dbmdz/bert-mini-historic-multilingual-cased,12,32,512,True,False,47122,3.2,50.07,56.1,5.05,14.49 Maltehb/aelaectra-danish-electra-small-cased,14,32,128,True,False,4593,3.21,57.82,55.68,19.26,0.0 -Qwen/Qwen1.5-1.8B-Chat (few-shot),1837,152,32768,False,False,8304,3.21,20.94,52.54,0.34,43.55 Qwen/Qwen1.5-1.8B (few-shot),1837,152,32768,True,False,5666,3.21,18.01,51.91,1.49,44.83 mhenrichsen/danskgpt-tiny-chat (few-shot),1100,32,2048,False,False,1745,3.21,27.31,45.94,-0.97,35.57 google/gemma-2b-it (few-shot),2506,256,8192,False,False,6471,3.22,33.51,43.97,0.53,39.39 NbAiLab/nb-gpt-j-6B-alpaca (few-shot),6055,50,1024,False,False,2607,3.25,13.28,60.17,1.52,37.23 ibm-granite/granite-3.0-1b-a400m-base (few-shot),1385,49,4096,True,False,7808,3.25,23.26,55.06,1.81,35.49 -PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,3.29,36.29,39.68,0.96,32.64 +PleIAs/Pleias-1.2b-Preview (few-shot),1195,66,2048,True,False,10756,3.28,36.29,39.68,0.96,32.64 jjzha/dajobbert-base-uncased,110,32,512,True,False,16243,3.31,42.99,55.49,4.69,14.22 AI-Sweden-Models/gpt-sw3-356m-instruct (few-shot),471,64,2048,True,False,5855,3.32,14.84,59.0,0.06,34.37 -openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.33,37.17,20.2,6.13,46.66 -Maltehb/aelaectra-danish-electra-small-uncased,14,32,128,True,False,5995,3.38,39.17,57.71,17.1,0.11 +openGPT-X/Teuken-7B-instruct-commercial-v0.4 (few-shot),7453,251,4096,True,False,1438,3.32,37.17,20.2,6.13,46.66 +Maltehb/aelaectra-danish-electra-small-uncased,14,32,128,True,False,5995,3.37,39.17,57.71,17.1,0.11 NbAiLab/nb-llama-3.2-1B (few-shot),1236,128,131072,True,False,3424,3.4,4.49,73.13,2.5,22.14 -RuterNorway/Llama-2-7b-chat-norwegian (few-shot),-1,32,4096,False,False,10890,3.45,22.38,31.11,0.09,44.36 +RuterNorway/Llama-2-7b-chat-norwegian (few-shot),-1,32,4096,False,False,10890,3.44,22.38,31.11,0.09,44.36 PleIAs/Pleias-3b-Preview (few-shot),3212,66,4096,True,False,6513,3.47,21.42,45.75,-0.25,32.71 +Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,3.52,28.96,26.58,-1.88,34.59 Qwen/Qwen1.5-4B (few-shot),3950,152,32768,True,False,3248,3.52,37.26,5.2,1.85,54.15 -Qwen/Qwen1.5-0.5B (few-shot),620,152,32768,True,False,11371,3.53,28.96,26.58,-1.88,34.59 AI-Sweden-Models/gpt-sw3-356m (few-shot),471,64,2048,True,False,5758,3.54,23.77,34.29,1.57,33.7 Qwen/Qwen1.5-0.5B-Chat (few-shot),620,152,32768,False,False,11740,3.54,18.57,40.23,0.21,29.49 3ebdola/Dialectal-Arabic-XLM-R-Base,278,250,512,True,False,12783,3.55,42.78,44.95,1.43,8.71 mhenrichsen/danskgpt-tiny (few-shot),1100,32,2048,True,False,8597,3.58,23.92,31.93,0.46,30.81 allenai/OLMo-1B (few-shot),1177,50,2051,True,False,8536,3.59,29.39,38.95,-1.35,17.85 dbmdz/bert-tiny-historic-multilingual-cased,5,32,512,True,False,78027,3.62,26.87,57.41,-1.06,5.54 +tiiuae/Falcon3-1B-Instruct (few-shot),1669,131,8192,True,False,9270,3.68,26.41,25.99,1.64,21.39 NbAiLab/nb-llama-3.2-3B (few-shot),3213,128,131072,True,False,1880,3.72,1.37,72.06,8.44,0.45 RabotaRu/HRBert-mini,80,200,512,True,False,54951,3.76,24.61,52.31,1.32,2.86 alexanderfalk/danbert-small-cased,83,52,512,True,False,30013,3.77,22.47,53.88,1.55,1.12 -PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,3.84,14.09,23.71,1.74,32.0 +PleIAs/Pleias-Nano (few-shot),1195,66,2048,True,False,2519,3.83,14.09,23.71,1.74,32.0 fresh-xlm-roberta-base,278,250,512,True,False,2214,3.92,11.91,51.11,0.86,2.0 fresh-electra-small,14,31,512,True,False,7840,3.93,10.54,55.54,-0.15,0.02 HuggingFaceTB/SmolLM2-360M (few-shot),362,49,8192,True,False,22023,3.98,18.22,11.52,1.72,27.27 @@ -287,17 +288,17 @@ PleIAs/Pleias-Pico (few-shot),353,66,2048,True,False,2331,4.01,16.28,17.38,-0.45 HuggingFaceTB/SmolLM2-360M-Instruct (few-shot),362,49,8192,True,False,21777,4.04,13.64,9.34,2.2,26.06 AI-Sweden-Models/gpt-sw3-126m-instruct (few-shot),186,64,2048,True,False,7717,4.05,23.05,12.47,0.08,20.43 PleIAs/Pleias-350m-Preview (few-shot),353,66,2048,True,False,10242,4.05,22.09,14.15,-0.04,21.6 -NbAiLab/nb-gpt-j-6B-v2 (few-shot),6051,50,1024,False,False,2556,4.07,0.31,27.42,0.07,17.82 -NorwAI/NorwAI-Mistral-7B-pretrain (few-shot),7537,68,3969,True,False,3024,4.13,9.75,17.76,1.22,14.98 +NbAiLab/nb-gpt-j-6B-v2 (few-shot),6051,50,1024,False,False,2556,4.06,0.31,27.42,0.07,17.82 +NorwAI/NorwAI-Mistral-7B-pretrain (few-shot),7537,68,3969,True,False,3024,4.12,9.75,17.76,1.22,14.98 HuggingFaceTB/SmolLM2-135M-Instruct (few-shot),135,49,8192,True,False,25602,4.19,17.09,7.41,0.47,11.73 -NbAiLab/nb-gpt-j-6B@sharded (few-shot),-1,50,1024,True,False,2630,4.26,0.01,33.5,-0.02,4.79 -AI-Sweden-Models/gpt-sw3-126m (few-shot),186,64,2048,True,False,8958,4.33,5.66,8.15,-0.81,16.4 -RJuro/kanelsnegl-v0.1 (few-shot),7242,32,512,True,False,5847,4.35,0.0,34.63,0.0,0.0 -RJuro/kanelsnegl-v0.2 (few-shot),7242,32,512,True,False,1373,4.35,0.0,28.62,0.0,0.0 +NbAiLab/nb-gpt-j-6B@sharded (few-shot),-1,50,1024,True,False,2630,4.25,0.01,33.5,-0.02,4.79 +AI-Sweden-Models/gpt-sw3-126m (few-shot),186,64,2048,True,False,8958,4.32,5.66,8.15,-0.81,16.4 +RJuro/kanelsnegl-v0.1 (few-shot),7242,32,512,True,False,5847,4.34,0.0,34.63,0.0,0.0 +RJuro/kanelsnegl-v0.2 (few-shot),7242,32,512,True,False,1373,4.34,0.0,28.62,0.0,0.0 HuggingFaceTB/SmolLM2-135M (few-shot),135,49,8192,True,False,26346,4.4,19.15,-3.03,0.06,14.18 -ai-forever/mGPT (few-shot),-1,100,1024,True,False,11734,4.71,0.0,0.0,0.49,6.24 -peter-sk/gpt-neox-da (few-shot),1515,50,1024,True,False,6025,4.74,0.26,4.75,-0.6,0.06 -NorGLM/NorGPT-369M (few-shot),-1,64,1024,True,False,19896,4.76,1.47,5.5,-2.19,0.1 -Sigurdur/qa-icebreaker (few-shot),110,32,1024,False,False,44889,4.82,0.0,-0.1,0.0,0.0 -ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.82,0.0,0.0,0.0,0.0 -Sigurdur/icebreaker (few-shot),110,32,1024,False,False,48619,4.87,0.0,-3.6,0.0,0.0 +ai-forever/mGPT (few-shot),-1,100,1024,True,False,11734,4.7,0.0,0.0,0.49,6.24 +peter-sk/gpt-neox-da (few-shot),1515,50,1024,True,False,6025,4.73,0.26,4.75,-0.6,0.06 +NorGLM/NorGPT-369M (few-shot),-1,64,1024,True,False,19896,4.75,1.47,5.5,-2.19,0.1 +Sigurdur/qa-icebreaker (few-shot),110,32,1024,False,False,44889,4.81,0.0,-0.1,0.0,0.0 +ssmits/Falcon2-5.5B-multilingual (few-shot),5465,65,8192,True,False,7692,4.81,0.0,0.0,0.0,0.0 +Sigurdur/icebreaker (few-shot),110,32,1024,False,False,48619,4.86,0.0,-3.6,0.0,0.0 diff --git a/swedish-nlu.md b/swedish-nlu.md index cac91a03..da70552f 100644 --- a/swedish-nlu.md +++ b/swedish-nlu.md @@ -3,7 +3,7 @@ layout: leaderboard title: Swedish NLU 🇸🇪 --- -
Last updated: 10/01/2025 12:30:01 CET
+
Last updated: 11/01/2025 11:03:10 CET
@@ -289,6 +289,23 @@ title: Swedish NLU 🇸🇪 0.0.0 0.0.0 + + KBLab/bert-base-swedish-cased + 125 + 50 + 512 + True + 16,164 ± 2,392 / 4,574 ± 1,478 + 1.47 + 81.23 ± 1.58 / 75.95 ± 1.72 + 75.73 ± 0.72 / 73.61 ± 1.47 + 78.60 ± 0.98 / 88.95 ± 0.57 + 38.56 ± 1.53 / 43.79 ± 1.43 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + google/rembert 576 @@ -324,34 +341,34 @@ title: Swedish NLU 🇸🇪 14.0.3 - AI-Nordics/bert-large-swedish-cased - 335 - 31 + microsoft/mdeberta-v3-base + 279 + 251 512 True - 7,199 ± 1,139 / 2,051 ± 651 - 1.48 - 78.61 ± 1.45 / 72.84 ± 1.51 - 77.47 ± 0.80 / 75.77 ± 2.13 - 72.87 ± 2.36 / 85.57 ± 1.43 - 43.11 ± 0.99 / 49.29 ± 1.05 + 20,637 ± 3,925 / 4,497 ± 1,502 + 1.47 + 78.84 ± 2.19 / 72.86 ± 2.04 + 75.24 ± 0.99 / 72.06 ± 2.67 + 72.30 ± 1.04 / 85.77 ± 0.65 + 44.74 ± 1.04 / 50.62 ± 0.85 0.0.0 0.0.0 0.0.0 0.0.0 - KBLab/bert-base-swedish-cased - 125 - 50 + AI-Nordics/bert-large-swedish-cased + 335 + 31 512 True - 16,164 ± 2,392 / 4,574 ± 1,478 + 7,199 ± 1,139 / 2,051 ± 651 1.48 - 81.23 ± 1.58 / 75.95 ± 1.72 - 75.73 ± 0.72 / 73.61 ± 1.47 - 78.60 ± 0.98 / 88.95 ± 0.57 - 38.56 ± 1.53 / 43.79 ± 1.43 + 78.61 ± 1.45 / 72.84 ± 1.51 + 77.47 ± 0.80 / 75.77 ± 2.13 + 72.87 ± 2.36 / 85.57 ± 1.43 + 43.11 ± 0.99 / 49.29 ± 1.05 0.0.0 0.0.0 0.0.0 @@ -374,23 +391,6 @@ title: Swedish NLU 🇸🇪 14.0.3 14.0.3 - - microsoft/mdeberta-v3-base - 279 - 251 - 512 - True - 20,637 ± 3,925 / 4,497 ± 1,502 - 1.48 - 78.84 ± 2.19 / 72.86 ± 2.04 - 75.24 ± 0.99 / 72.06 ± 2.67 - 72.30 ± 1.04 / 85.77 ± 0.65 - 44.74 ± 1.04 / 50.62 ± 0.85 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - gpt-3.5-turbo-0613 (few-shot, val) unknown @@ -738,7 +738,7 @@ title: Swedish NLU 🇸🇪 512 True 25,418 ± 6,435 / 4,536 ± 1,452 - 1.70 + 1.69 77.78 ± 1.69 / 72.08 ± 1.81 73.58 ± 1.37 / 70.43 ± 2.49 60.36 ± 2.98 / 79.72 ± 1.52 @@ -772,7 +772,7 @@ title: Swedish NLU 🇸🇪 512 True 14,050 ± 3,278 / 2,749 ± 894 - 1.72 + 1.71 81.35 ± 1.26 / 79.18 ± 1.23 71.16 ± 1.21 / 69.78 ± 3.24 63.89 ± 1.18 / 81.45 ± 0.75 @@ -1553,7 +1553,7 @@ title: Swedish NLU 🇸🇪 128 8192 True - 1,335 ± 338 / 260 ± 88 + 1,477 ± 376 / 285 ± 97 2.05 59.92 ± 2.46 / 40.98 ± 4.90 80.91 ± 0.41 / 78.09 ± 1.22 @@ -1604,7 +1604,7 @@ title: Swedish NLU 🇸🇪 128 131072 True - 1,005 ± 330 / 196 ± 74 + 1,473 ± 377 / 283 ± 96 2.06 55.80 ± 2.68 / 34.65 ± 1.98 79.23 ± 0.48 / 76.86 ± 0.80 @@ -1741,7 +1741,7 @@ title: Swedish NLU 🇸🇪 512 True 14,062 ± 3,216 / 2,733 ± 885 - 2.12 + 2.11 74.88 ± 1.45 / 69.57 ± 1.83 61.89 ± 0.90 / 60.17 ± 3.06 40.22 ± 2.03 / 68.89 ± 2.06 @@ -1758,7 +1758,7 @@ title: Swedish NLU 🇸🇪 512 True 13,973 ± 3,205 / 2,725 ± 884 - 2.12 + 2.11 76.55 ± 1.28 / 70.38 ± 1.01 61.60 ± 1.38 / 62.28 ± 3.13 37.44 ± 6.65 / 66.67 ± 4.88 @@ -1775,7 +1775,7 @@ title: Swedish NLU 🇸🇪 512 True 14,081 ± 3,231 / 2,748 ± 891 - 2.12 + 2.11 75.33 ± 0.99 / 69.89 ± 0.52 61.80 ± 1.76 / 58.93 ± 3.28 36.62 ± 5.98 / 66.91 ± 3.69 @@ -1802,6 +1802,40 @@ title: Swedish NLU 🇸🇪 11.0.0 11.0.0 + + Twitter/twhin-bert-base + 279 + 250 + 512 + True + 11,514 ± 2,041 / 2,862 ± 918 + 2.12 + 70.17 ± 0.99 / 64.19 ± 1.42 + 66.62 ± 1.71 / 61.90 ± 2.61 + 46.72 ± 3.65 / 72.15 ± 2.62 + 31.38 ± 1.36 / 35.79 ± 1.33 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + + + gpt-4o-mini-2024-07-18 (few-shot, val) + unknown + 200 + 8191 + True + 784 ± 310 / 95 ± 28 + 2.12 + 62.45 ± 2.79 / 46.68 ± 4.08 + 77.69 ± 2.28 / 78.73 ± 2.17 + 68.93 ± 3.67 / 84.15 ± 2.01 + 12.11 ± 4.65 / 31.14 ± 3.37 + 14.0.0 + 14.0.0 + 14.0.0 + 14.0.0 + meta-llama/Llama-2-70b-chat-hf (few-shot, val) 68977 @@ -1836,40 +1870,6 @@ title: Swedish NLU 🇸🇪 13.0.0 13.0.0 - - Twitter/twhin-bert-base - 279 - 250 - 512 - True - 11,514 ± 2,041 / 2,862 ± 918 - 2.13 - 70.17 ± 0.99 / 64.19 ± 1.42 - 66.62 ± 1.71 / 61.90 ± 2.61 - 46.72 ± 3.65 / 72.15 ± 2.62 - 31.38 ± 1.36 / 35.79 ± 1.33 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - - - gpt-4o-mini-2024-07-18 (few-shot, val) - unknown - 200 - 8191 - True - 784 ± 310 / 95 ± 28 - 2.13 - 62.45 ± 2.79 / 46.68 ± 4.08 - 77.69 ± 2.28 / 78.73 ± 2.17 - 68.93 ± 3.67 / 84.15 ± 2.01 - 12.11 ± 4.65 / 31.14 ± 3.37 - 14.0.0 - 14.0.0 - 14.0.0 - 14.0.0 - merge-crew/da-sv-dare-ties-density-0.6 (few-shot, val) 7242 @@ -2080,7 +2080,7 @@ title: Swedish NLU 🇸🇪 128 8192 True - 1,007 ± 316 / 162 ± 45 + 1,483 ± 377 / 287 ± 97 2.16 69.67 ± 1.30 / 52.94 ± 4.01 59.93 ± 4.70 / 67.54 ± 3.04 @@ -2159,6 +2159,23 @@ title: Swedish NLU 🇸🇪 14.0.4 14.0.4 + + jonfd/electra-small-nordic + 22 + 96 + 128 + True + 5,989 ± 120 / 3,809 ± 1,230 + 2.18 + 71.07 ± 1.59 / 65.46 ± 1.28 + 66.42 ± 0.72 / 57.57 ± 1.23 + 69.19 ± 0.66 / 84.26 ± 0.36 + 11.85 ± 4.94 / 13.02 ± 5.55 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + meta-llama/Llama-2-13b-chat-hf (few-shot) 13016 @@ -2261,23 +2278,6 @@ title: Swedish NLU 🇸🇪 12.5.1 12.5.1 - - jonfd/electra-small-nordic - 22 - 96 - 128 - True - 5,989 ± 120 / 3,809 ± 1,230 - 2.19 - 71.07 ± 1.59 / 65.46 ± 1.28 - 66.42 ± 0.72 / 57.57 ± 1.23 - 69.19 ± 0.66 / 84.26 ± 0.36 - 11.85 ± 4.94 / 13.02 ± 5.55 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - mistralai/Mistral-7B-v0.3 (few-shot) 7248 @@ -2387,7 +2387,7 @@ title: Swedish NLU 🇸🇪 512 True 15,749 ± 2,665 / 4,014 ± 1,281 - 2.24 + 2.23 72.33 ± 0.82 / 67.15 ± 0.85 67.77 ± 1.19 / 62.98 ± 2.57 33.79 ± 7.61 / 64.01 ± 6.84 @@ -2499,6 +2499,23 @@ title: Swedish NLU 🇸🇪 0.0.0 0.0.0 + + meta-llama/Llama-2-7b-hf (few-shot) + 6738 + 32 + 4096 + True + 930 ± 310 / 128 ± 43 + 2.28 + 44.11 ± 4.26 / 31.64 ± 4.48 + 79.05 ± 1.08 / 75.52 ± 2.66 + 7.34 ± 3.19 / 43.83 ± 5.31 + 57.49 ± 0.95 / 63.16 ± 0.77 + 9.2.0 + 9.2.0 + 9.2.0 + 12.5.1 + DDSC/roberta-base-scandinavian 125 @@ -2516,23 +2533,6 @@ title: Swedish NLU 🇸🇪 0.0.0 0.0.0 - - meta-llama/Llama-2-7b-hf (few-shot) - 6738 - 32 - 4096 - True - 930 ± 310 / 128 ± 43 - 2.29 - 44.11 ± 4.26 / 31.64 ± 4.48 - 79.05 ± 1.08 / 75.52 ± 2.66 - 7.34 ± 3.19 / 43.83 ± 5.31 - 57.49 ± 0.95 / 63.16 ± 0.77 - 9.2.0 - 9.2.0 - 9.2.0 - 12.5.1 - occiglot/occiglot-7b-eu5 (few-shot) 7242 @@ -2669,6 +2669,40 @@ title: Swedish NLU 🇸🇪 13.0.0 13.0.0 + + sentence-transformers/paraphrase-xlm-r-multilingual-v1 + 278 + 250 + 512 + True + 20,154 ± 4,438 / 3,890 ± 1,256 + 2.35 + 70.22 ± 1.49 / 63.97 ± 1.48 + 71.33 ± 1.20 / 65.44 ± 3.64 + 39.60 ± 5.87 / 66.60 ± 3.19 + 18.65 ± 1.15 / 24.75 ± 0.98 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + + + clips/mfaq + 278 + 250 + 128 + True + 5,591 ± 187 / 3,349 ± 1,105 + 2.36 + 76.31 ± 1.29 / 70.91 ± 1.27 + 73.32 ± 1.13 / 70.21 ± 3.74 + 32.29 ± 10.98 / 62.21 ± 5.02 + 16.12 ± 5.80 / 19.52 ± 6.73 + 0.0.0 + 0.0.0 + 0.0.0 + 0.0.0 + meta-llama/Llama-3.2-3B-Instruct (few-shot) 3213 @@ -2703,23 +2737,6 @@ title: Swedish NLU 🇸🇪 12.3.1 12.4.0 - - sentence-transformers/paraphrase-xlm-r-multilingual-v1 - 278 - 250 - 512 - True - 20,154 ± 4,438 / 3,890 ± 1,256 - 2.36 - 70.22 ± 1.49 / 63.97 ± 1.48 - 71.33 ± 1.20 / 65.44 ± 3.64 - 39.60 ± 5.87 / 66.60 ± 3.19 - 18.65 ± 1.15 / 24.75 ± 0.98 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - LumiOpen/Viking-33B@1000B (few-shot) 33119 @@ -2737,23 +2754,6 @@ title: Swedish NLU 🇸🇪 12.9.0 12.9.0 - - clips/mfaq - 278 - 250 - 128 - True - 5,591 ± 187 / 3,349 ± 1,105 - 2.37 - 76.31 ± 1.29 / 70.91 ± 1.27 - 73.32 ± 1.13 / 70.21 ± 3.74 - 32.29 ± 10.98 / 62.21 ± 5.02 - 16.12 ± 5.80 / 19.52 ± 6.73 - 0.0.0 - 0.0.0 - 0.0.0 - 0.0.0 - ibm-granite/granite-8b-code-instruct-4k (few-shot) 8055 @@ -3043,6 +3043,23 @@ title: Swedish NLU 🇸🇪 13.0.0 13.0.0 + + mistralai/Mistral-7B-Instruct-v0.2 (few-shot) + 7242 + 32 + 32768 + False + 2,370 ± 416 / 711 ± 242 + 2.43 + 47.92 ± 2.66 / 33.00 ± 3.24 + 62.90 ± 2.44 / 70.61 ± 1.19 + 19.95 ± 2.24 / 56.49 ± 2.10 + 52.51 ± 0.36 / 61.42 ± 0.52 + 9.2.0 + 9.2.0 + 9.3.1 + 12.4.0 + dbmdz/bert-base-historic-multilingual-cased 111 @@ -3060,23 +3077,6 @@ title: Swedish NLU 🇸🇪 0.0.0 0.0.0 - - mistralai/Mistral-7B-Instruct-v0.2 (few-shot) - 7242 - 32 - 32768 - False - 2,370 ± 416 / 711 ± 242 - 2.44 - 47.92 ± 2.66 / 33.00 ± 3.24 - 62.90 ± 2.44 / 70.61 ± 1.19 - 19.95 ± 2.24 / 56.49 ± 2.10 - 52.51 ± 0.36 / 61.42 ± 0.52 - 9.2.0 - 9.2.0 - 9.3.1 - 12.4.0 - 01-ai/Yi-1.5-6B (few-shot) 6061 @@ -3338,7 +3338,7 @@ title: Swedish NLU 🇸🇪 32 4096 True - 3,194 ± 687 / 650 ± 216 + 8,681 ± 1,650 / 2,177 ± 717 2.54 46.15 ± 2.77 / 24.28 ± 3.74 67.17 ± 1.93 / 70.99 ± 1.64 @@ -3577,7 +3577,7 @@ title: Swedish NLU 🇸🇪 131073 False 7,436 ± 1,846 / 1,508 ± 479 - 2.67 + 2.66 41.60 ± 2.74 / 37.22 ± 3.26 71.86 ± 2.01 / 71.15 ± 2.16 3.72 ± 1.40 / 48.04 ± 1.96 @@ -3628,7 +3628,7 @@ title: Swedish NLU 🇸🇪 512 True 33,753 ± 8,349 / 5,937 ± 1,946 - 2.70 + 2.69 65.50 ± 1.20 / 59.72 ± 1.22 68.33 ± 1.03 / 64.03 ± 2.47 14.81 ± 6.63 / 55.50 ± 4.28 @@ -3645,7 +3645,7 @@ title: Swedish NLU 🇸🇪 512 True 26,458 ± 5,992 / 5,274 ± 1,731 - 2.70 + 2.69 65.50 ± 1.20 / 59.72 ± 1.22 68.36 ± 1.18 / 63.94 ± 2.47 14.81 ± 6.63 / 55.50 ± 4.28 @@ -3832,7 +3832,7 @@ title: Swedish NLU 🇸🇪 130947 True 7,312 ± 1,668 / 1,609 ± 525 - 2.82 + 2.81 42.36 ± 1.67 / 21.33 ± 2.90 51.53 ± 6.32 / 62.14 ± 3.43 3.11 ± 1.60 / 47.93 ± 2.93 @@ -3859,6 +3859,23 @@ title: Swedish NLU 🇸🇪 13.0.0 13.0.0 + + google/gemma-7b-it (few-shot) + 8538 + 256 + 8192 + False + 1,792 ± 249 / 668 ± 203 + 2.86 + 59.26 ± 2.00 / 52.73 ± 2.71 + 28.63 ± 1.24 / 50.95 ± 0.75 + 11.43 ± 1.88 / 53.31 ± 1.74 + 46.67 ± 1.97 / 53.24 ± 1.72 + 12.7.0 + 12.7.0 + 12.7.0 + 12.7.0 + openGPT-X/Teuken-7B-instruct-research-v0.4 (few-shot) 7453 @@ -3893,23 +3910,6 @@ title: Swedish NLU 🇸🇪 0.0.0 0.0.0 - - google/gemma-7b-it (few-shot) - 8538 - 256 - 8192 - False - 1,792 ± 249 / 668 ± 203 - 2.87 - 59.26 ± 2.00 / 52.73 ± 2.71 - 28.63 ± 1.24 / 50.95 ± 0.75 - 11.43 ± 1.88 / 53.31 ± 1.74 - 46.67 ± 1.97 / 53.24 ± 1.72 - 12.7.0 - 12.7.0 - 12.7.0 - 12.7.0 - nvidia/mistral-nemo-minitron-8b-instruct (few-shot) 8414 @@ -4029,6 +4029,23 @@ title: Swedish NLU 🇸🇪 0.0.0 0.0.0 + + HPLT/gpt-7b-nordic-prerelease (few-shot) + 7550 + 131 + 4096 + True + 1,382 ± 337 / 257 ± 91 + 2.93 + 27.07 ± 6.33 / 25.24 ± 4.89 + 61.96 ± 2.69 / 67.81 ± 2.27 + 2.65 ± 1.46 / 40.25 ± 4.08 + 46.16 ± 0.91 / 52.35 ± 0.87 + 12.5.2 + 12.3.2 + 12.3.2 + 12.3.2 + four-two-labs/lynx-micro (few-shot) 2506 @@ -4046,23 +4063,6 @@ title: Swedish NLU 🇸🇪 14.1.2 14.1.1 - - HPLT/gpt-7b-nordic-prerelease (few-shot) - 7550 - 131 - 4096 - True - 1,382 ± 337 / 257 ± 91 - 2.94 - 27.07 ± 6.33 / 25.24 ± 4.89 - 61.96 ± 2.69 / 67.81 ± 2.27 - 2.65 ± 1.46 / 40.25 ± 4.08 - 46.16 ± 0.91 / 52.35 ± 0.87 - 12.5.2 - 12.3.2 - 12.3.2 - 12.3.2 - state-spaces/mamba-2.8b-hf (few-shot) 2768 @@ -4070,7 +4070,7 @@ title: Swedish NLU 🇸🇪 32769 True 2,722 ± 495 / 766 ± 250 - 2.96 + 2.95 23.25 ± 1.99 / 20.55 ± 2.20 71.70 ± 1.09 / 71.01 ± 2.36 -0.82 ± 2.23 / 40.80 ± 4.30 @@ -4308,7 +4308,7 @@ title: Swedish NLU 🇸🇪 512 True 33,247 ± 8,123 / 6,017 ± 1,977 - 3.17 + 3.16 51.67 ± 1.46 / 53.62 ± 1.02 62.71 ± 0.81 / 57.24 ± 1.73 2.32 ± 1.83 / 48.77 ± 1.62 @@ -4325,7 +4325,7 @@ title: Swedish NLU 🇸🇪 512 True 19,206 ± 4,451 / 3,658 ± 1,187 - 3.17 + 3.16 51.67 ± 1.46 / 53.62 ± 1.02 63.04 ± 0.70 / 56.50 ± 1.19 2.32 ± 1.83 / 48.77 ± 1.62 @@ -4335,6 +4335,23 @@ title: Swedish NLU 🇸🇪 12.6.1 12.6.1 + + Qwen/Qwen1.5-1.8B-Chat (few-shot) + 1837 + 152 + 32768 + False + 8,304 ± 1,846 / 1,933 ± 617 + 3.20 + 20.94 ± 3.73 / 18.26 ± 2.84 + 52.54 ± 3.33 / 60.44 ± 3.13 + 0.34 ± 1.22 / 36.61 ± 1.57 + 43.55 ± 1.14 / 50.53 ± 1.40 + 12.5.2 + 11.0.0 + 12.1.0 + 12.5.0 + dbmdz/bert-mini-historic-multilingual-cased 12 @@ -4369,23 +4386,6 @@ title: Swedish NLU 🇸🇪 0.0.0 0.0.0 - - Qwen/Qwen1.5-1.8B-Chat (few-shot) - 1837 - 152 - 32768 - False - 8,304 ± 1,846 / 1,933 ± 617 - 3.21 - 20.94 ± 3.73 / 18.26 ± 2.84 - 52.54 ± 3.33 / 60.44 ± 3.13 - 0.34 ± 1.22 / 36.61 ± 1.57 - 43.55 ± 1.14 / 50.53 ± 1.40 - 12.5.2 - 11.0.0 - 12.1.0 - 12.5.0 - Qwen/Qwen1.5-1.8B (few-shot) 1837 @@ -4478,7 +4478,7 @@ title: Swedish NLU 🇸🇪 2048 True 10,756 ± 3,589 / 1,157 ± 670 - 3.29 + 3.28 36.29 ± 4.00 / 31.64 ± 3.95 39.68 ± 11.06 / 47.78 ± 9.30 0.96 ± 1.25 / 36.56 ± 2.57 @@ -4529,7 +4529,7 @@ title: Swedish NLU 🇸🇪 4096 True 1,438 ± 410 / 233 ± 79 - 3.33 + 3.32 37.17 ± 2.59 / 25.25 ± 1.61 20.20 ± 4.93 / 21.73 ± 5.47 6.13 ± 1.52 / 39.55 ± 1.91 @@ -4546,7 +4546,7 @@ title: Swedish NLU 🇸🇪 128 True 5,995 ± 135 / 3,839 ± 1,247 - 3.38 + 3.37 39.17 ± 4.06 / 36.74 ± 3.78 57.71 ± 1.40 / 53.54 ± 0.59 17.10 ± 2.57 / 57.41 ± 1.03 @@ -4580,7 +4580,7 @@ title: Swedish NLU 🇸🇪 4096 False 10,890 ± 2,686 / 2,186 ± 750 - 3.45 + 3.44 22.38 ± 3.00 / 22.09 ± 2.85 31.11 ± 12.17 / 36.84 ± 11.52 0.09 ± 0.67 / 33.42 ± 0.30 @@ -4608,36 +4608,36 @@ title: Swedish NLU 🇸🇪 14.0.4 - Qwen/Qwen1.5-4B (few-shot) - 3950 + Qwen/Qwen1.5-0.5B (few-shot) + 620 152 32768 True - 3,248 ± 739 / 761 ± 252 + 11,371 ± 2,924 / 2,122 ± 692 3.52 - 37.26 ± 4.28 / 29.89 ± 5.96 - 5.20 ± 7.35 / 30.65 ± 4.97 - 1.85 ± 1.54 / 33.71 ± 0.46 - 54.15 ± 0.58 / 60.15 ± 0.59 + 28.96 ± 2.39 / 26.49 ± 3.14 + 26.58 ± 5.12 / 28.64 ± 5.35 + -1.88 ± 1.46 / 35.45 ± 2.92 + 34.59 ± 1.06 / 40.95 ± 1.11 12.5.2 - 9.3.2 + 10.0.1 12.1.0 12.1.0 - Qwen/Qwen1.5-0.5B (few-shot) - 620 + Qwen/Qwen1.5-4B (few-shot) + 3950 152 32768 True - 11,371 ± 2,924 / 2,122 ± 692 - 3.53 - 28.96 ± 2.39 / 26.49 ± 3.14 - 26.58 ± 5.12 / 28.64 ± 5.35 - -1.88 ± 1.46 / 35.45 ± 2.92 - 34.59 ± 1.06 / 40.95 ± 1.11 + 3,248 ± 739 / 761 ± 252 + 3.52 + 37.26 ± 4.28 / 29.89 ± 5.96 + 5.20 ± 7.35 / 30.65 ± 4.97 + 1.85 ± 1.54 / 33.71 ± 0.46 + 54.15 ± 0.58 / 60.15 ± 0.59 12.5.2 - 10.0.1 + 9.3.2 12.1.0 12.1.0 @@ -4743,6 +4743,23 @@ title: Swedish NLU 🇸🇪 0.0.0 0.0.0 + + tiiuae/Falcon3-1B-Instruct (few-shot) + 1669 + 131 + 8192 + True + 9,270 ± 2,690 / 1,434 ± 437 + 3.68 + 26.41 ± 2.74 / 25.18 ± 2.46 + 25.99 ± 3.51 / 35.36 ± 4.53 + 1.64 ± 1.90 / 37.52 ± 2.69 + 21.39 ± 1.66 / 26.70 ± 1.83 + 14.1.2 + 14.1.2 + 14.1.2 + 14.1.2 + NbAiLab/nb-llama-3.2-3B (few-shot) 3213 @@ -4801,7 +4818,7 @@ title: Swedish NLU 🇸🇪 2048 True 2,519 ± 841 / 323 ± 104 - 3.84 + 3.83 14.09 ± 5.11 / 15.95 ± 3.72 23.71 ± 6.92 / 32.87 ± 7.00 1.74 ± 1.73 / 38.94 ± 3.25 @@ -4937,7 +4954,7 @@ title: Swedish NLU 🇸🇪 1024 False 2,556 ± 580 / 681 ± 214 - 4.07 + 4.06 0.31 ± 0.55 / 0.29 ± 0.50 27.42 ± 12.16 / 38.74 ± 10.05 0.07 ± 1.06 / 35.80 ± 1.73 @@ -4954,7 +4971,7 @@ title: Swedish NLU 🇸🇪 3969 True 3,024 ± 496 / 909 ± 301 - 4.13 + 4.12 9.75 ± 3.30 / 9.18 ± 3.19 17.76 ± 4.89 / 28.16 ± 7.50 1.22 ± 0.95 / 43.54 ± 3.79 @@ -4988,7 +5005,7 @@ title: Swedish NLU 🇸🇪 1024 True 2,630 ± 605 / 684 ± 217 - 4.26 + 4.25 0.01 ± 0.02 / 0.11 ± 0.12 33.50 ± 13.13 / 39.30 ± 11.93 -0.02 ± 0.60 / 34.92 ± 2.99 @@ -5005,7 +5022,7 @@ title: Swedish NLU 🇸🇪 2048 True 8,958 ± 1,815 / 2,240 ± 696 - 4.33 + 4.32 5.66 ± 4.11 / 8.37 ± 3.24 8.15 ± 8.87 / 24.31 ± 7.12 -0.81 ± 1.16 / 36.81 ± 2.47 @@ -5022,7 +5039,7 @@ title: Swedish NLU 🇸🇪 512 True 5,847 ± 1,029 / 1,640 ± 525 - 4.35 + 4.34 0.00 ± 0.00 / 0.00 ± 0.00 34.63 ± 9.69 / 40.92 ± 6.88 0.00 ± 0.00 / 33.30 ± 0.27 @@ -5039,7 +5056,7 @@ title: Swedish NLU 🇸🇪 512 True 1,373 ± 120 / 709 ± 172 - 4.35 + 4.34 0.00 ± 0.00 / 0.00 ± 0.00 28.62 ± 12.67 / 35.36 ± 8.35 0.00 ± 0.00 / 33.30 ± 0.27 @@ -5073,7 +5090,7 @@ title: Swedish NLU 🇸🇪 1024 True 11,734 ± 3,124 / 2,174 ± 720 - 4.71 + 4.70 0.00 ± 0.00 / 0.00 ± 0.00 0.00 ± 0.00 / 19.32 ± 0.16 0.49 ± 1.29 / 39.12 ± 3.92 @@ -5090,7 +5107,7 @@ title: Swedish NLU 🇸🇪 1024 True 6,025 ± 1,442 / 1,342 ± 431 - 4.74 + 4.73 0.26 ± 0.16 / 0.26 ± 0.14 4.75 ± 2.54 / 27.85 ± 1.59 -0.60 ± 1.56 / 40.53 ± 2.93 @@ -5107,7 +5124,7 @@ title: Swedish NLU 🇸🇪 1024 True 19,896 ± 5,099 / 3,848 ± 1,251 - 4.76 + 4.75 1.47 ± 1.90 / 1.32 ± 1.69 5.50 ± 4.49 / 28.77 ± 3.76 -2.19 ± 1.29 / 40.52 ± 3.02 @@ -5124,7 +5141,7 @@ title: Swedish NLU 🇸🇪 1024 False 44,889 ± 6,944 / 13,506 ± 4,256 - 4.82 + 4.81 0.00 ± 0.00 / 0.00 ± 0.00 -0.10 ± 1.04 / 22.69 ± 0.52 0.00 ± 0.00 / 33.30 ± 0.27 @@ -5141,7 +5158,7 @@ title: Swedish NLU 🇸🇪 8192 True 7,692 ± 1,423 / 1,960 ± 644 - 4.82 + 4.81 0.00 ± 0.00 / 0.00 ± 0.00 0.00 ± 0.00 / 19.16 ± 0.14 0.00 ± 0.00 / 33.30 ± 0.27 @@ -5158,7 +5175,7 @@ title: Swedish NLU 🇸🇪 1024 False 48,619 ± 7,681 / 13,831 ± 4,404 - 4.87 + 4.86 0.00 ± 0.00 / 0.00 ± 0.00 -3.60 ± 3.63 / 20.29 ± 1.99 0.00 ± 0.00 / 33.30 ± 0.27