From d75f69cd1d5bdd5af633764365075bdbd0135d17 Mon Sep 17 00:00:00 2001
From: Le Dong <74060032+ledong0110@users.noreply.github.com>
Date: Fri, 6 Sep 2024 00:36:09 +0700
Subject: [PATCH] 5 inputoutput result of tables (#7)
---
_config.yml | 2 +-
_data/categories.yml | 1 +
_data/lang_tasks.yml | 2 +
.../vi/bias_toxicity/question_answering.yml | 146 ++
.../vi/bias_toxicity/summarization.yml | 146 ++
.../vi/bias_toxicity/translation.yml | 146 ++
.../vi/chain_of_thought/reasoning.yml | 64 +
.../fairness_aware/information_retrieval.yml | 146 ++
.../vi/fairness_aware/language_modeling.yml | 236 +++
.../vi/fairness_aware/question_answering.yml | 82 +
.../vi/fairness_aware/sentiment_analysis.yml | 222 +++
.../vi/fairness_aware/text_classification.yml | 222 +++
.../vi/fairness_aware/toxicity_detection.yml | 222 +++
.../vi/few_shot/information_retrieval.yml | 164 ++
_data/leaderboard/vi/few_shot/knowledge.yml | 147 ++
.../vi/few_shot/language_modeling.yml | 236 +++
_data/leaderboard/vi/few_shot/reasoning.yml | 192 +++
.../vi/few_shot/sentiment_analysis.yml | 200 +++
.../vi/few_shot/text_classification.yml | 200 +++
.../vi/few_shot/toxicity_detection.yml | 200 +++
_data/leaderboard/vi/few_shot/translation.yml | 164 ++
.../vi/medium_prompt/question_answering.yml | 82 +
.../vi/medium_prompt/summarization.yml | 274 ++++
_data/leaderboard/vi/models.yml | 11 +
.../vi/randomized_choice/knowledge.yml | 100 ++
.../information_retrieval.yml | 146 ++
.../vi/robustness_aware/knowledge.yml | 147 ++
.../robustness_aware/question_answering.yml | 92 ++
.../robustness_aware/sentiment_analysis.yml | 200 +++
.../vi/robustness_aware/summarization.yml | 308 ++++
.../robustness_aware/text_classification.yml | 200 +++
.../robustness_aware/toxicity_detection.yml | 200 +++
.../vi/robustness_aware/translation.yml | 164 ++
.../vi/weaker_prompt/question_answering.yml | 82 +
.../vi/weaker_prompt/summarization.yml | 274 ++++
.../vi/zero_shot/information_retrieval.yml | 146 ++
_data/leaderboard/vi/zero_shot/knowledge.yml | 131 ++
.../vi/zero_shot/language_modeling.yml | 236 +++
.../vi/zero_shot/question_answering.yml | 82 +
_data/leaderboard/vi/zero_shot/reasoning.yml | 171 +++
.../vi/zero_shot/sentiment_analysis.yml | 200 +++
.../vi/zero_shot/summarization.yml | 291 ++++
.../vi/zero_shot/text_classification.yml | 200 +++
.../vi/zero_shot/toxicity_detection.yml | 178 +++
.../ind/bias-toxicity/question-answering.md | 217 ++-
_pages/ind/bias-toxicity/summarization.md | 217 ++-
_pages/ind/bias-toxicity/translation.md | 330 +---
_pages/ind/chain-of-thought/reasoning.md | 115 +-
.../fairness-aware/information-retrieval.md | 167 +--
.../ind/fairness-aware/language-modeling.md | 242 ++-
.../ind/fairness-aware/question-answering.md | 127 +-
.../ind/fairness-aware/sentiment-analysis.md | 243 ++-
.../ind/fairness-aware/text-classification.md | 223 +--
.../ind/fairness-aware/toxicity-detection.md | 243 ++-
_pages/ind/few-shot/information-retrieval.md | 198 +--
_pages/ind/few-shot/knowledge.md | 234 +--
_pages/ind/few-shot/language-modeling.md | 242 ++-
_pages/ind/few-shot/reasoning.md | 197 +--
_pages/ind/few-shot/sentiment-analysis.md | 232 ++-
_pages/ind/few-shot/text-classification.md | 230 ++-
_pages/ind/few-shot/toxicity-detection.md | 230 ++-
_pages/ind/few-shot/translation.md | 178 +--
.../ind/medium-prompt/question-answering.md | 97 +-
_pages/ind/medium-prompt/summarization.md | 269 ++--
_pages/ind/randomized-choice/knowledge.md | 160 +-
.../robustness-aware/information-retrieval.md | 187 +--
_pages/ind/robustness-aware/knowledge.md | 232 +--
.../robustness-aware/question-answering.md | 118 +-
.../robustness-aware/sentiment-analysis.md | 230 ++-
_pages/ind/robustness-aware/summarization.md | 326 ++--
.../robustness-aware/text-classification.md | 230 ++-
.../robustness-aware/toxicity-detection.md | 230 ++-
_pages/ind/robustness-aware/translation.md | 198 +--
.../ind/weaker-prompt/question-answering.md | 113 +-
_pages/ind/weaker-prompt/summarization.md | 269 ++--
_pages/ind/zero-shot/information-retrieval.md | 187 +--
_pages/ind/zero-shot/knowledge.md | 224 +--
_pages/ind/zero-shot/language-modeling.md | 247 ++-
_pages/ind/zero-shot/question-answering.md | 127 +-
_pages/ind/zero-shot/reasoning.md | 165 +-
_pages/ind/zero-shot/sentiment-analysis.md | 223 +--
_pages/ind/zero-shot/summarization.md | 307 ++--
_pages/ind/zero-shot/text-classification.md | 217 ++-
_pages/ind/zero-shot/toxicity-detection.md | 197 +--
_pages/kr/bias-toxicity/question-answering.md | 217 ++-
_pages/kr/bias-toxicity/summarization.md | 217 ++-
_pages/kr/bias-toxicity/translation.md | 330 +---
_pages/kr/chain-of-thought/reasoning.md | 115 +-
.../fairness-aware/information-retrieval.md | 167 +--
_pages/kr/fairness-aware/language-modeling.md | 242 ++-
.../kr/fairness-aware/question-answering.md | 127 +-
.../kr/fairness-aware/sentiment-analysis.md | 243 ++-
.../kr/fairness-aware/text-classification.md | 223 +--
.../kr/fairness-aware/toxicity-detection.md | 243 ++-
_pages/kr/few-shot/information-retrieval.md | 198 +--
_pages/kr/few-shot/knowledge.md | 234 +--
_pages/kr/few-shot/language-modeling.md | 242 ++-
_pages/kr/few-shot/reasoning.md | 197 +--
_pages/kr/few-shot/sentiment-analysis.md | 232 ++-
_pages/kr/few-shot/text-classification.md | 230 ++-
_pages/kr/few-shot/toxicity-detection.md | 230 ++-
_pages/kr/few-shot/translation.md | 178 +--
_pages/kr/medium-prompt/question-answering.md | 97 +-
_pages/kr/medium-prompt/summarization.md | 269 ++--
_pages/kr/randomized-choice/knowledge.md | 160 +-
.../robustness-aware/information-retrieval.md | 187 +--
_pages/kr/robustness-aware/knowledge.md | 232 +--
.../kr/robustness-aware/question-answering.md | 118 +-
.../kr/robustness-aware/sentiment-analysis.md | 230 ++-
_pages/kr/robustness-aware/summarization.md | 326 ++--
.../robustness-aware/text-classification.md | 230 ++-
.../kr/robustness-aware/toxicity-detection.md | 230 ++-
_pages/kr/robustness-aware/translation.md | 198 +--
_pages/kr/weaker-prompt/question-answering.md | 113 +-
_pages/kr/weaker-prompt/summarization.md | 269 ++--
_pages/kr/zero-shot/information-retrieval.md | 187 +--
_pages/kr/zero-shot/knowledge.md | 224 +--
_pages/kr/zero-shot/language-modeling.md | 247 ++-
_pages/kr/zero-shot/question-answering.md | 127 +-
_pages/kr/zero-shot/reasoning.md | 165 +-
_pages/kr/zero-shot/sentiment-analysis.md | 223 +--
_pages/kr/zero-shot/summarization.md | 307 ++--
_pages/kr/zero-shot/text-classification.md | 217 ++-
_pages/kr/zero-shot/toxicity-detection.md | 197 +--
_pages/leaderboard.md | 1 +
_pages/vi/bias-toxicity/question-answering.md | 217 ++-
_pages/vi/bias-toxicity/summarization.md | 217 ++-
_pages/vi/bias-toxicity/translation.md | 330 +---
_pages/vi/chain-of-thought/reasoning.md | 115 +-
.../fairness-aware/information-retrieval.md | 167 +--
_pages/vi/fairness-aware/language-modeling.md | 242 ++-
.../vi/fairness-aware/question-answering.md | 127 +-
.../vi/fairness-aware/sentiment-analysis.md | 243 ++-
.../vi/fairness-aware/text-classification.md | 223 +--
.../vi/fairness-aware/toxicity-detection.md | 243 ++-
_pages/vi/few-shot/information-retrieval.md | 198 +--
_pages/vi/few-shot/knowledge.md | 234 +--
_pages/vi/few-shot/language-modeling.md | 242 ++-
_pages/vi/few-shot/reasoning.md | 197 +--
_pages/vi/few-shot/sentiment-analysis.md | 232 ++-
_pages/vi/few-shot/text-classification.md | 230 ++-
_pages/vi/few-shot/toxicity-detection.md | 230 ++-
_pages/vi/few-shot/translation.md | 178 +--
_pages/vi/medium-prompt/question-answering.md | 97 +-
_pages/vi/medium-prompt/summarization.md | 269 ++--
_pages/vi/randomized-choice/knowledge.md | 160 +-
.../robustness-aware/information-retrieval.md | 187 +--
_pages/vi/robustness-aware/knowledge.md | 232 +--
.../vi/robustness-aware/question-answering.md | 118 +-
.../vi/robustness-aware/sentiment-analysis.md | 230 ++-
_pages/vi/robustness-aware/summarization.md | 326 ++--
.../robustness-aware/text-classification.md | 230 ++-
.../vi/robustness-aware/toxicity-detection.md | 230 ++-
_pages/vi/robustness-aware/translation.md | 198 +--
_pages/vi/weaker-prompt/question-answering.md | 113 +-
_pages/vi/weaker-prompt/summarization.md | 269 ++--
_pages/vi/zero-shot/information-retrieval.md | 187 +--
_pages/vi/zero-shot/knowledge.md | 224 +--
_pages/vi/zero-shot/language-modeling.md | 247 ++-
_pages/vi/zero-shot/question-answering.md | 127 +-
_pages/vi/zero-shot/reasoning.md | 165 +-
_pages/vi/zero-shot/sentiment-analysis.md | 223 +--
_pages/vi/zero-shot/summarization.md | 307 ++--
_pages/vi/zero-shot/text-classification.md | 217 ++-
_pages/vi/zero-shot/toxicity-detection.md | 197 +--
_site/contact/index.html | 26 +-
_site/demo/index.html | 26 +-
_site/index.html | 42 +-
.../ind/bias-toxicity/question-answering.html | 165 +-
.../ind/bias-toxicity/summarization.html | 165 +-
.../ind/bias-toxicity/translation.html | 278 +---
.../ind/chain-of-thought/reasoning.html | 91 +-
.../fairness-aware/information-retrieval.html | 131 +-
.../ind/fairness-aware/language-modeling.html | 182 +--
.../fairness-aware/question-answering.html | 111 +-
.../fairness-aware/sentiment-analysis.html | 191 +--
.../fairness-aware/text-classification.html | 175 +--
.../fairness-aware/toxicity-detection.html | 191 +--
.../ind/few-shot/information-retrieval.html | 158 +-
_site/leaderboard/ind/few-shot/knowledge.html | 150 +-
.../ind/few-shot/language-modeling.html | 182 +--
_site/leaderboard/ind/few-shot/reasoning.html | 169 +--
.../ind/few-shot/sentiment-analysis.html | 178 +--
.../ind/few-shot/text-classification.html | 178 +--
.../ind/few-shot/toxicity-detection.html | 178 +--
.../leaderboard/ind/few-shot/translation.html | 142 +-
.../ind/medium-prompt/question-answering.html | 81 +-
.../ind/medium-prompt/summarization.html | 181 +--
.../ind/randomized-choice/knowledge.html | 112 +-
.../information-retrieval.html | 147 +-
.../ind/robustness-aware/knowledge.html | 148 +-
.../robustness-aware/question-answering.html | 102 +-
.../robustness-aware/sentiment-analysis.html | 178 +--
.../ind/robustness-aware/summarization.html | 238 +--
.../robustness-aware/text-classification.html | 178 +--
.../robustness-aware/toxicity-detection.html | 178 +--
.../ind/robustness-aware/translation.html | 158 +-
.../ind/weaker-prompt/question-answering.html | 97 +-
.../ind/weaker-prompt/summarization.html | 181 +--
.../ind/zero-shot/information-retrieval.html | 147 +-
.../leaderboard/ind/zero-shot/knowledge.html | 140 +-
.../ind/zero-shot/language-modeling.html | 183 +--
.../ind/zero-shot/question-answering.html | 111 +-
.../leaderboard/ind/zero-shot/reasoning.html | 141 +-
.../ind/zero-shot/sentiment-analysis.html | 175 +--
.../ind/zero-shot/summarization.html | 219 +--
.../ind/zero-shot/text-classification.html | 165 +-
.../ind/zero-shot/toxicity-detection.html | 149 +-
_site/leaderboard/index.html | 247 +--
.../kr/bias-toxicity/question-answering.html | 165 +-
.../kr/bias-toxicity/summarization.html | 165 +-
.../kr/bias-toxicity/translation.html | 278 +---
.../kr/chain-of-thought/reasoning.html | 91 +-
.../fairness-aware/information-retrieval.html | 131 +-
.../kr/fairness-aware/language-modeling.html | 182 +--
.../kr/fairness-aware/question-answering.html | 111 +-
.../kr/fairness-aware/sentiment-analysis.html | 191 +--
.../fairness-aware/text-classification.html | 175 +--
.../kr/fairness-aware/toxicity-detection.html | 191 +--
.../kr/few-shot/information-retrieval.html | 158 +-
_site/leaderboard/kr/few-shot/knowledge.html | 150 +-
.../kr/few-shot/language-modeling.html | 182 +--
_site/leaderboard/kr/few-shot/reasoning.html | 169 +--
.../kr/few-shot/sentiment-analysis.html | 178 +--
.../kr/few-shot/text-classification.html | 178 +--
.../kr/few-shot/toxicity-detection.html | 178 +--
.../leaderboard/kr/few-shot/translation.html | 142 +-
.../kr/medium-prompt/question-answering.html | 81 +-
.../kr/medium-prompt/summarization.html | 181 +--
.../kr/randomized-choice/knowledge.html | 112 +-
.../information-retrieval.html | 147 +-
.../kr/robustness-aware/knowledge.html | 148 +-
.../robustness-aware/question-answering.html | 102 +-
.../robustness-aware/sentiment-analysis.html | 178 +--
.../kr/robustness-aware/summarization.html | 238 +--
.../robustness-aware/text-classification.html | 178 +--
.../robustness-aware/toxicity-detection.html | 178 +--
.../kr/robustness-aware/translation.html | 158 +-
.../kr/weaker-prompt/question-answering.html | 97 +-
.../kr/weaker-prompt/summarization.html | 181 +--
.../kr/zero-shot/information-retrieval.html | 147 +-
_site/leaderboard/kr/zero-shot/knowledge.html | 140 +-
.../kr/zero-shot/language-modeling.html | 183 +--
.../kr/zero-shot/question-answering.html | 111 +-
_site/leaderboard/kr/zero-shot/reasoning.html | 141 +-
.../kr/zero-shot/sentiment-analysis.html | 175 +--
.../kr/zero-shot/summarization.html | 219 +--
.../kr/zero-shot/text-classification.html | 165 +-
.../kr/zero-shot/toxicity-detection.html | 149 +-
.../vi/bias-toxicity/question-answering.html | 895 +++++++++--
.../vi/bias-toxicity/summarization.html | 895 +++++++++--
.../vi/bias-toxicity/translation.html | 968 +++++++++---
.../vi/chain-of-thought/reasoning.html | 333 +++-
.../fairness-aware/information-retrieval.html | 707 +++++++--
.../vi/fairness-aware/language-modeling.html | 998 ++++++++++--
.../vi/fairness-aware/question-answering.html | 475 ++++--
.../vi/fairness-aware/sentiment-analysis.html | 921 ++++++++++--
.../fairness-aware/text-classification.html | 865 +++++++++--
.../vi/fairness-aware/toxicity-detection.html | 921 ++++++++++--
.../vi/few-shot/information-retrieval.html | 766 ++++++++--
_site/leaderboard/vi/few-shot/knowledge.html | 795 ++++++++--
.../vi/few-shot/language-modeling.html | 998 ++++++++++--
_site/leaderboard/vi/few-shot/reasoning.html | 863 +++++++++--
.../vi/few-shot/sentiment-analysis.html | 908 +++++++++--
.../vi/few-shot/text-classification.html | 908 +++++++++--
.../vi/few-shot/toxicity-detection.html | 908 +++++++++--
.../leaderboard/vi/few-shot/translation.html | 714 +++++++--
.../vi/medium-prompt/question-answering.html | 421 +++++-
.../vi/medium-prompt/summarization.html | 1277 ++++++++++++++--
.../vi/randomized-choice/knowledge.html | 476 +++++-
.../information-retrieval.html | 755 ++++++++--
.../vi/robustness-aware/knowledge.html | 793 ++++++++--
.../robustness-aware/question-answering.html | 430 +++++-
.../robustness-aware/sentiment-analysis.html | 908 +++++++++--
.../vi/robustness-aware/summarization.html | 1334 ++++++++++++++---
.../robustness-aware/text-classification.html | 908 +++++++++--
.../robustness-aware/toxicity-detection.html | 908 +++++++++--
.../vi/robustness-aware/translation.html | 766 ++++++++--
.../vi/weaker-prompt/question-answering.html | 461 +++++-
.../vi/weaker-prompt/summarization.html | 1277 ++++++++++++++--
.../vi/zero-shot/information-retrieval.html | 755 ++++++++--
_site/leaderboard/vi/zero-shot/knowledge.html | 785 ++++++++--
.../vi/zero-shot/language-modeling.html | 1035 +++++++++++--
.../vi/zero-shot/question-answering.html | 475 ++++--
_site/leaderboard/vi/zero-shot/reasoning.html | 803 ++++++++--
.../vi/zero-shot/sentiment-analysis.html | 865 +++++++++--
.../vi/zero-shot/summarization.html | 1315 +++++++++++++---
.../vi/zero-shot/text-classification.html | 895 +++++++++--
.../vi/zero-shot/toxicity-detection.html | 847 +++++++++--
_site/prompt/index.html | 26 +-
290 files changed, 47731 insertions(+), 31121 deletions(-)
create mode 100644 _data/leaderboard/vi/bias_toxicity/question_answering.yml
create mode 100644 _data/leaderboard/vi/bias_toxicity/summarization.yml
create mode 100644 _data/leaderboard/vi/bias_toxicity/translation.yml
create mode 100644 _data/leaderboard/vi/chain_of_thought/reasoning.yml
create mode 100644 _data/leaderboard/vi/fairness_aware/information_retrieval.yml
create mode 100644 _data/leaderboard/vi/fairness_aware/language_modeling.yml
create mode 100644 _data/leaderboard/vi/fairness_aware/question_answering.yml
create mode 100644 _data/leaderboard/vi/fairness_aware/sentiment_analysis.yml
create mode 100644 _data/leaderboard/vi/fairness_aware/text_classification.yml
create mode 100644 _data/leaderboard/vi/fairness_aware/toxicity_detection.yml
create mode 100644 _data/leaderboard/vi/few_shot/information_retrieval.yml
create mode 100644 _data/leaderboard/vi/few_shot/knowledge.yml
create mode 100644 _data/leaderboard/vi/few_shot/language_modeling.yml
create mode 100644 _data/leaderboard/vi/few_shot/reasoning.yml
create mode 100644 _data/leaderboard/vi/few_shot/sentiment_analysis.yml
create mode 100644 _data/leaderboard/vi/few_shot/text_classification.yml
create mode 100644 _data/leaderboard/vi/few_shot/toxicity_detection.yml
create mode 100644 _data/leaderboard/vi/few_shot/translation.yml
create mode 100644 _data/leaderboard/vi/medium_prompt/question_answering.yml
create mode 100644 _data/leaderboard/vi/medium_prompt/summarization.yml
create mode 100644 _data/leaderboard/vi/models.yml
create mode 100644 _data/leaderboard/vi/randomized_choice/knowledge.yml
create mode 100644 _data/leaderboard/vi/robustness_aware/information_retrieval.yml
create mode 100644 _data/leaderboard/vi/robustness_aware/knowledge.yml
create mode 100644 _data/leaderboard/vi/robustness_aware/question_answering.yml
create mode 100644 _data/leaderboard/vi/robustness_aware/sentiment_analysis.yml
create mode 100644 _data/leaderboard/vi/robustness_aware/summarization.yml
create mode 100644 _data/leaderboard/vi/robustness_aware/text_classification.yml
create mode 100644 _data/leaderboard/vi/robustness_aware/toxicity_detection.yml
create mode 100644 _data/leaderboard/vi/robustness_aware/translation.yml
create mode 100644 _data/leaderboard/vi/weaker_prompt/question_answering.yml
create mode 100644 _data/leaderboard/vi/weaker_prompt/summarization.yml
create mode 100644 _data/leaderboard/vi/zero_shot/information_retrieval.yml
create mode 100644 _data/leaderboard/vi/zero_shot/knowledge.yml
create mode 100644 _data/leaderboard/vi/zero_shot/language_modeling.yml
create mode 100644 _data/leaderboard/vi/zero_shot/question_answering.yml
create mode 100644 _data/leaderboard/vi/zero_shot/reasoning.yml
create mode 100644 _data/leaderboard/vi/zero_shot/sentiment_analysis.yml
create mode 100644 _data/leaderboard/vi/zero_shot/summarization.yml
create mode 100644 _data/leaderboard/vi/zero_shot/text_classification.yml
create mode 100644 _data/leaderboard/vi/zero_shot/toxicity_detection.yml
diff --git a/_config.yml b/_config.yml
index 0bd5c0c..e97265b 100644
--- a/_config.yml
+++ b/_config.yml
@@ -2,7 +2,7 @@ title: MELT
description: "Multilingual Evaluation Toolkits"
# disabled because we are using a custom domain
-baseurl: https://ai.stanford.edu/~sttruong/melt
+# baseurl: https://ai.stanford.edu/~sttruong/melt
color-primary: "#B1040E"
color-light: "#E50808"
diff --git a/_data/categories.yml b/_data/categories.yml
index c28e596..771e26e 100644
--- a/_data/categories.yml
+++ b/_data/categories.yml
@@ -1,6 +1,7 @@
- zero-shot
- few-shot
- weaker-prompt
+- medium-prompt
- fairness-aware
- robustness-aware
- chain-of-thought
diff --git a/_data/lang_tasks.yml b/_data/lang_tasks.yml
index 889ad7b..bea5e77 100644
--- a/_data/lang_tasks.yml
+++ b/_data/lang_tasks.yml
@@ -3,6 +3,7 @@ vi:
zero-shot: true
few-shot: false
weaker-prompt: true
+ medium-prompt: true
fairness-aware: true
robustness-aware: true
chain-of-thought: false
@@ -12,6 +13,7 @@ vi:
zero-shot: true
few-shot: false
weaker-prompt: true
+ medium-prompt: true
fairness-aware: false
robustness-aware: true
chain-of-thought: false
diff --git a/_data/leaderboard/vi/bias_toxicity/question_answering.yml b/_data/leaderboard/vi/bias_toxicity/question_answering.yml
new file mode 100644
index 0000000..103ac14
--- /dev/null
+++ b/_data/leaderboard/vi/bias_toxicity/question_answering.yml
@@ -0,0 +1,146 @@
+XQuAD:
+ URA-LLaMa 70B:
+ DRR: null
+ DRG: 0.39
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.41
+ SAG_std: 0.00
+ Tox: 0.02
+ Tox_std: 0.00
+ URA-LLaMa 13B:
+ DRR: null
+ DRG: 0.39
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.45
+ SAG_std: 0.01
+ Tox: 0.02
+ Tox_std: 0.00
+ URA-LLaMa 7B:
+ DRR: null
+ DRG: 0.43
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.48
+ SAG_std: 0.00
+ Tox: 0.03
+ Tox_std: 0.00
+ LLaMa-2 13B:
+ DRR: null
+ DRG: 0.35
+ DRG_std: 0.03
+ SAR: null
+ SAG: 0.46
+ SAG_std: 0.00
+ Tox: 0.01
+ Tox_std: 0.00
+ LLaMa-2 7B:
+ DRR: null
+ DRG: 0.46
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.42
+ SAG_std: 0.00
+ Tox: 0.01
+ Tox_std: 0.00
+ Vietcuna 7B:
+ DRR: null
+ DRG: 0.50
+ DRG_std: 0.00
+ SAR: null
+ SAG: null
+ SAG_std: null
+ Tox: 0.04
+ Tox_std: 0.00
+ GPT-3.5:
+ DRR: null
+ DRG: 0.43
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.48
+ SAG_std: 0.00
+ Tox: 0.02
+ Tox_std: 0.00
+ GPT-4:
+ DRR: null
+ DRG: 0.40
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.45
+ SAG_std: 0.00
+ Tox: 0.02
+ Tox_std: 0.00
+MLQA:
+ URA-LLaMa 70B:
+ DRR: null
+ DRG: 0.14
+ DRG_std: 0.02
+ SAR: null
+ SAG: 0.42
+ SAG_std: 0.03
+ Tox: 0.02
+ Tox_std: 0.00
+ URA-LLaMa 13B:
+ DRR: null
+ DRG: 0.17
+ DRG_std: 0.1
+ SAR: null
+ SAG: 0.38
+ SAG_std: 0.00
+ Tox: 0.02
+ Tox_std: 0.00
+ URA-LLaMa 7B:
+ DRR: null
+ DRG: 0.18
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.37
+ SAG_std: 0.01
+ Tox: 0.02
+ Tox_std: 0.00
+ LLaMa-2 13B:
+ DRR: null
+ DRG: 0.27
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.43
+ SAG_std: 0.00
+ Tox: 0.01
+ Tox_std: 0.00
+ LLaMa-2 7B:
+ DRR: null
+ DRG: 0.21
+ DRG_std: 0.06
+ SAR: null
+ SAG: 0.45
+ SAG_std: 0.00
+ Tox: 0.01
+ Tox_std: 0.00
+ Vietcuna 7B:
+ DRR: null
+ DRG: 0.23
+ DRG_std: 0.09
+ SAR: null
+ SAG: 0.49
+ SAG_std: 0.01
+ Tox: 0.04
+ Tox_std: 0.00
+ GPT-3.5:
+ DRR: null
+ DRG: 0.18
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.40
+ SAG_std: 0.00
+ Tox: 0.02
+ Tox_std: 0.00
+ GPT-4:
+ DRR: null
+ DRG: 0.16
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.41
+ SAG_std: 0.01
+ Tox: 0.02
+ Tox_std: 0.00
\ No newline at end of file
diff --git a/_data/leaderboard/vi/bias_toxicity/summarization.yml b/_data/leaderboard/vi/bias_toxicity/summarization.yml
new file mode 100644
index 0000000..70e2f28
--- /dev/null
+++ b/_data/leaderboard/vi/bias_toxicity/summarization.yml
@@ -0,0 +1,146 @@
+VietNews:
+ URA-LLaMa 70B:
+ DRR: null
+ DRG: 0.21
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.31
+ SAG_std: 0.01
+ Tox: 0.05
+ Tox_std: 0.00
+ URA-LLaMa 13B:
+ DRR: null
+ DRG: 0.20
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.29
+ SAG_std: 0.01
+ Tox: 0.04
+ Tox_std: 0.00
+ URA-LLaMa 7B:
+ DRR: null
+ DRG: 0.24
+ DRG_std: 0.02
+ SAR: null
+ SAG: 0.33
+ SAG_std: 0.01
+ Tox: 0.04
+ Tox_std: 0.00
+ LLaMa-2 13B:
+ DRR: null
+ DRG: 0.26
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.38
+ SAG_std: 0.01
+ Tox: 0.01
+ Tox_std: 0.00
+ LLaMa-2 7B:
+ DRR: null
+ DRG: 0.28
+ DRG_std: 0.02
+ SAR: null
+ SAG: 0.39
+ SAG_std: 0.01
+ Tox: 0.01
+ Tox_std: 0.00
+ Vietcuna 7B:
+ DRR: null
+ DRG: 0.21
+ DRG_std: 0.02
+ SAR: null
+ SAG: 0.32
+ SAG_std: 0.02
+ Tox: 0.04
+ Tox_std: 0.00
+ GPT-3.5:
+ DRR: null
+ DRG: 0.22
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.29
+ SAG_std: 0.01
+ Tox: 0.04
+ Tox_std: 0.00
+ GPT-4:
+ DRR: null
+ DRG: 0.19
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.28
+ SAG_std: 0.01
+ Tox: 0.06
+ Tox_std: 0.00
+WikiLingua:
+ URA-LLaMa 70B:
+ DRR: null
+ DRG: 0.03
+ DRG_std: 0.02
+ SAR: null
+ SAG: 0.25
+ SAG_std: 0.02
+ Tox: 0.03
+ Tox_std: 0.00
+ URA-LLaMa 13B:
+ DRR: null
+ DRG: 0.07
+ DRG_std: 0.04
+ SAR: null
+ SAG: 0.31
+ SAG_std: 0.03
+ Tox: 0.02
+ Tox_std: 0.00
+ URA-LLaMa 7B:
+ DRR: null
+ DRG: 0.07
+ DRG_std: 0.02
+ SAR: null
+ SAG: 0.38
+ SAG_std: 0.02
+ Tox: 0.03
+ Tox_std: 0.00
+ LLaMa-2 13B:
+ DRR: null
+ DRG: 0.17
+ DRG_std: 0.08
+ SAR: null
+ SAG: 0.50
+ SAG_std: 0.02
+ Tox: 0.01
+ Tox_std: 0.00
+ LLaMa-2 7B:
+ DRR: null
+ DRG: 0.39
+ DRG_std: 0.05
+ SAR: null
+ SAG: 0.50
+ SAG_std: 0.02
+ Tox: 0.01
+ Tox_std: 0.00
+ Vietcuna 7B:
+ DRR: null
+ DRG: 0.17
+ DRG_std: 0.04
+ SAR: null
+ SAG: 0.39
+ SAG_std: 0.03
+ Tox: 0.03
+ Tox_std: 0.00
+ GPT-3.5:
+ DRR: null
+ DRG: 0.03
+ DRG_std: 0.02
+ SAR: null
+ SAG: 0.28
+ SAG_std: 0.01
+ Tox: 0.02
+ Tox_std: 0.00
+ GPT-4:
+ DRR: null
+ DRG: 0.09
+ DRG_std: 0.02
+ SAR: null
+ SAG: 0.28
+ SAG_std: 0.01
+ Tox: 0.02
+ Tox_std: 0.00
\ No newline at end of file
diff --git a/_data/leaderboard/vi/bias_toxicity/translation.yml b/_data/leaderboard/vi/bias_toxicity/translation.yml
new file mode 100644
index 0000000..62f6b38
--- /dev/null
+++ b/_data/leaderboard/vi/bias_toxicity/translation.yml
@@ -0,0 +1,146 @@
+PhoMT (En - Vi):
+ URA-LLaMa 70B:
+ DRR: null
+ DRG: 0.03
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.30
+ SAG_std: 0.01
+ Tox: 0.05
+ Tox_std: 0.00
+ URA-LLaMa 13B:
+ DRR: null
+ DRG: 0.09
+ DRG_std: 0.00
+ SAR: null
+ SAG: 0.33
+ SAG_std: 0.01
+ Tox: 0.05
+ Tox_std: 0.00
+ URA-LLaMa 7B:
+ DRR: null
+ DRG: 0.13
+ DRG_std: 0.00
+ SAR: null
+ SAG: 0.33
+ SAG_std: 0.01
+ Tox: 0.05
+ Tox_std: 0.00
+ LLaMa-2 13B:
+ DRR: null
+ DRG: 0.08
+ DRG_std: 0.00
+ SAR: null
+ SAG: 0.33
+ SAG_std: 0.02
+ Tox: 0.05
+ Tox_std: 0.00
+ LLaMa-2 7B:
+ DRR: null
+ DRG: 0.17
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.29
+ SAG_std: 0.01
+ Tox: 0.04
+ Tox_std: 0.00
+ Vietcuna 7B:
+ DRR: null
+ DRG: 0.18
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.36
+ SAG_std: 0.01
+ Tox: 0.04
+ Tox_std: 0.00
+ GPT-3.5:
+ DRR: null
+ DRG: 0.11
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.34
+ SAG_std: 0.01
+ Tox: 0.05
+ Tox_std: 0.00
+ GPT-4:
+ DRR: null
+ DRG: 0.09
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.34
+ SAG_std: 0.01
+ Tox: 0.05
+ Tox_std: 0.00
+OPUS100 (En - Vi):
+ URA-LLaMa 70B:
+ DRR: null
+ DRG: 0.27
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.47
+ SAG_std: 0.01
+ Tox: 0.06
+ Tox_std: 0.00
+ URA-LLaMa 13B:
+ DRR: null
+ DRG: 0.27
+ DRG_std: 0.01
+ SAR: null
+ SAG: 0.43
+ SAG_std: 0.02
+ Tox: 0.07
+ Tox_std: 0.00
+ URA-LLaMa 7B:
+ DRR: null
+ DRG: 0.18
+ DRG_std: 0.03
+ SAR: null
+ SAG: 0.47
+ SAG_std: 0.01
+ Tox: 0.07
+ Tox_std: 0.00
+ LLaMa-2 13B:
+ DRR: null
+ DRG: 0.31
+ DRG_std: 0.02
+ SAR: null
+ SAG: 0.47
+ SAG_std: 0.01
+ Tox: 0.06
+ Tox_std: 0.00
+ LLaMa-2 7B:
+ DRR: null
+ DRG: 0.21
+ DRG_std: 0.02
+ SAR: null
+ SAG: 0.45
+ SAG_std: 0.02
+ Tox: 0.05
+ Tox_std: 0.00
+ Vietcuna 7B:
+ DRR: null
+ DRG: 0.16
+ DRG_std: 0.03
+ SAR: null
+ SAG: 0.43
+ SAG_std: 0.02
+ Tox: 0.07
+ Tox_std: 0.00
+ GPT-3.5:
+ DRR: null
+ DRG: 0.16
+ DRG_std: 0.03
+ SAR: null
+ SAG: 0.43
+ SAG_std: 0.03
+ Tox: 0.07
+ Tox_std: 0.00
+ GPT-4:
+ DRR: null
+ DRG: 0.14
+ DRG_std: 0.03
+ SAR: null
+ SAG: 0.41
+ SAG_std: 0.01
+ Tox: 0.07
+ Tox_std: 0.00
\ No newline at end of file
diff --git a/_data/leaderboard/vi/chain_of_thought/reasoning.yml b/_data/leaderboard/vi/chain_of_thought/reasoning.yml
new file mode 100644
index 0000000..f236873
--- /dev/null
+++ b/_data/leaderboard/vi/chain_of_thought/reasoning.yml
@@ -0,0 +1,64 @@
+MATH:
+ URA-LLaMa 70B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.12
+ F1_std: 0.01
+ Equ: 0.18
+ Equ_std: 0.02
+ URA-LLaMa 13B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.23
+ F1_std: 0.01
+ Equ: 0.17
+ Equ_std: 0.01
+ URA-LLaMa 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.23
+ F1_std: 0.01
+ Equ: 0.09
+ Equ_std: 0.01
+ LLaMa-2 13B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.12
+ F1_std: 0.01
+ Equ: 0.18
+ Equ_std: 0.02
+ LLaMa-2 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.10
+ F1_std: 0.00
+ Equ: 0.12
+ Equ_std: 0.02
+ Vietcuna 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.13
+ F1_std: 0.01
+ Equ: 0.10
+ Equ_std: 0.01
+ MixSUra:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.17
+ F1_std: 0.01
+ Equ: 0.33
+ Equ_std: 0.00
+ GPT-3.5:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.32
+ F1_std: 0.01
+ Equ: 0.78
+ Equ_std: 0.02
+ GPT-4:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.32
+ F1_std: 0.01
+ Equ: 0.79
+ Equ_std: 0.02
\ No newline at end of file
diff --git a/_data/leaderboard/vi/fairness_aware/information_retrieval.yml b/_data/leaderboard/vi/fairness_aware/information_retrieval.yml
new file mode 100644
index 0000000..e7043fe
--- /dev/null
+++ b/_data/leaderboard/vi/fairness_aware/information_retrieval.yml
@@ -0,0 +1,146 @@
+mMARCO:
+ URA-LLaMa 70B:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ URA-LLaMa 13B:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ URA-LLaMa 7B:
+ M@10: 0.10
+ M@10_std: 0.00
+ M@10B: 0.10
+ M@10B_std: 0.00
+ N@10: 0.14
+ N@10_std: 0.00
+ N@10B: 0.14
+ N@10B_std: 0.00
+ LLaMa-2 13B:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ LLaMa-2 7B:
+ M@10: 0.05
+ M@10_std: 0.00
+ M@10B: 0.10
+ M@10B_std: 0.00
+ N@10: 0.07
+ N@10_std: 0.00
+ N@10B: 0.16
+ N@10B_std: 0.00
+ Vietcuna 7B:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ GPT-3.5:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ GPT-4:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+mRobust04:
+ URA-LLaMa 70B:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ URA-LLaMa 13B:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ URA-LLaMa 7B:
+ M@10: 0.01
+ M@10_std: 0.00
+ M@10B: 0.01
+ M@10B_std: 0.00
+ N@10: 0.00
+ N@10_std: 0.00
+ N@10B: 0.00
+ N@10B_std: 0.00
+ LLaMa-2 13B:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ LLaMa-2 7B:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ Vietcuna 7B:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ GPT-3.5:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ GPT-4:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
\ No newline at end of file
diff --git a/_data/leaderboard/vi/fairness_aware/language_modeling.yml b/_data/leaderboard/vi/fairness_aware/language_modeling.yml
new file mode 100644
index 0000000..69ccccb
--- /dev/null
+++ b/_data/leaderboard/vi/fairness_aware/language_modeling.yml
@@ -0,0 +1,236 @@
+MLQA-MLM:
+ URA-LLaMa 70B:
+ EM: 0.01
+ EM_std: 0.00
+ CER: 0.58
+ CER_std: 0.01
+ WER: 0.70
+ WER_std: 0.01
+ CED: 653.57
+ CED_std: 12.05
+ WED: 150.64
+ WED_std: 2.73
+ PLX: 1.25
+ PLX_std: 0.06
+ URA-LLaMa 13B:
+ EM: 0.02
+ EM_std: 0.00
+ CER: 0.40
+ CER_std: 0.01
+ WER: 0.56
+ WER_std: 0.01
+ CED: 518.38
+ CED_std: 11.19
+ WED: 125.24
+ WED_std: 2.66
+ PLX: 1.48
+ PLX_std: 0.11
+ URA-LLaMa 7B:
+ EM: 0.01
+ EM_std: 0.00
+ CER: 0.40
+ CER_std: 0.01
+ WER: 0.55
+ WER_std: 0.01
+ CED: 492.93
+ CED_std: 11.32
+ WED: 117.82
+ WED_std: 2.72
+ PLX: 1.22
+ PLX_std: 0.01
+ LLaMa-2 13B:
+ EM: 0.01
+ EM_std: 0.00
+ CER: 0.76
+ CER_std: 0.00
+ WER: 0.89
+ WER_std: 0.00
+ CED: 782.03
+ CED_std: 11.71
+ WED: 192.66
+ WED_std: 2.83
+ PLX: 1.27
+ PLX_std: 0.04
+ LLaMa-2 7B:
+ EM: 0.00
+ EM_std: 0.00
+ CER: 0.79
+ CER_std: 0.00
+ WER: 0.96
+ WER_std: 0.00
+ CED: 761.38
+ CED_std: 10.65
+ WED: 197.18
+ WED_std: 2.66
+ PLX: 1.75
+ PLX_std: 0.20
+ Vietcuna 7B:
+ EM: 0.00
+ EM_std: 0.00
+ CER: 1.04
+ CER_std: 0.00
+ WER: 1.06
+ WER_std: 0.00
+ CED: 940.71
+ CED_std: 12.48
+ WED: 208.05
+ WED_std: 2.81
+ PLX: 1.40
+ PLX_std: 0.00
+ MixSUra:
+ EM: 0.00
+ EM_std: null
+ CER: 0.56
+ CER_std: null
+ WER: 0.63
+ WER_std: null
+ CED: 535.76
+ CED_std: null
+ WED: 133.64
+ WED_std: null
+ PLX: 1.00
+ PLX_std: null
+ GPT-3.5:
+ EM: 0.03
+ EM_std: 0.00
+ CER: 0.29
+ CER_std: 0.01
+ WER: 0.46
+ WER_std: 0.01
+ CED: 398.19
+ CED_std: 11.01
+ WED: 96.42
+ WED_std: 2.54
+ PLX: null
+ PLX_std: null
+ GPT-4:
+ EM: 0.06
+ EM_std: 0.00
+ CER: 0.36
+ CER_std: 0.01
+ WER: 0.41
+ WER_std: 0.01
+ CED: 347.82
+ CED_std: 10.23
+ WED: 86.96
+ WED_std: 2.41
+ PLX: null
+ PLX_std: null
+VSEC:
+ URA-LLaMa 70B:
+ EM: 0.30
+ EM_std: 0.00
+ CER: 0.11
+ CER_std: 0.00
+ WER: 0.14
+ WER_std: 0.00
+ CED: 15.19
+ CED_std: 0.42
+ WED: 4.12
+ WED_std: 0.11
+ PLX: 1.13
+ PLX_std: 0.00
+ URA-LLaMa 13B:
+ EM: 0.32
+ EM_std: 0.00
+ CER: 0.07
+ CER_std: 0.00
+ WER: 0.21
+ WER_std: 0.00
+ CED: 2.98
+ CED_std: 0.11
+ WED: 1.24
+ WED_std: 0.03
+ PLX: 1.15
+ PLX_std: 0.00
+ URA-LLaMa 7B:
+ EM: 0.20
+ EM_std: 0.00
+ CER: 0.54
+ CER_std: 0.01
+ WER: 0.67
+ WER_std: 0.01
+ CED: 41.77
+ CED_std: 1.57
+ WED: 10.12
+ WED_std: 0.35
+ PLX: 1.07
+ PLX_std: 0.00
+ LLaMa-2 13B:
+ EM: 0.15
+ EM_std: 0.00
+ CER: 0.07
+ CER_std: 0.00
+ WER: 0.22
+ WER_std: 0.00
+ CED: 3.39
+ CED_std: 0.16
+ WED: 1.52
+ WED_std: 0.04
+ PLX: 1.01
+ PLX_std: 0.00
+ LLaMa-2 7B:
+ EM: 0.12
+ EM_std: 0.00
+ CER: 0.35
+ CER_std: 0.01
+ WER: 0.48
+ WER_std: 0.01
+ CED: 47.54
+ CED_std: 0.85
+ WED: 11.82
+ WED_std: 0.19
+ PLX: 1.06
+ PLX_std: 0.00
+ Vietcuna 7B:
+ EM: 0.06
+ EM_std: 0.00
+ CER: 4.78
+ CER_std: 0.06
+ WER: 4.80
+ WER_std: 0.06
+ CED: 634.48
+ CED_std: 8.58
+ WED: 145.12
+ WED_std: 1.94
+ PLX: 1.46
+ PLX_std: 0.01
+ MixSUra:
+ EM: 0.07
+ EM_std: null
+ CER: 0.20
+ CER_std: null
+ WER: 0.29
+ WER_std: null
+ CED: 25.96
+ CED_std: null
+ WED: 8.79
+ WED_std: null
+ PLX: 1.00
+ PLX_std: null
+ GPT-3.5:
+ EM: 0.59
+ EM_std: 0.00
+ CER: 0.06
+ CER_std: 0.00
+ WER: 0.19
+ WER_std: 0.00
+ CED: 1.99
+ CED_std: 0.08
+ WED: 0.74
+ WED_std: 0.02
+ PLX: null
+ PLX_std: null
+ GPT-4:
+ EM: 0.67
+ EM_std: 0.00
+ CER: 0.01
+ CER_std: 0.00
+ WER: 0.02
+ WER_std: 0.00
+ CED: 1.30
+ CED_std: 0.04
+ WED: 0.54
+ WED_std: 0.01
+ PLX: null
+ PLX_std: null
\ No newline at end of file
diff --git a/_data/leaderboard/vi/fairness_aware/question_answering.yml b/_data/leaderboard/vi/fairness_aware/question_answering.yml
new file mode 100644
index 0000000..32056c4
--- /dev/null
+++ b/_data/leaderboard/vi/fairness_aware/question_answering.yml
@@ -0,0 +1,82 @@
+XQuAD:
+ URA-LLaMa 70B:
+ EM: 0.04
+ EM_std: 0.00
+ F1: 0.27
+ F1_std: 0.00
+ URA-LLaMa 13B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.13
+ F1_std: 0.00
+ URA-LLaMa 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.13
+ F1_std: 0.00
+ LLaMa-2 13B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.03
+ F1_std: 0.00
+ LLaMa-2 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.04
+ F1_std: 0.00
+ Vietcuna 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.00
+ F1_std: 0.00
+ GPT-3.5:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.24
+ F1_std: 0.00
+ GPT-4:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.26
+ F1_std: 0.00
+MLQA:
+ URA-LLaMa 70B:
+ EM: 0.03
+ EM_std: 0.00
+ F1: 0.25
+ F1_std: 0.00
+ URA-LLaMa 13B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.14
+ F1_std: 0.00
+ URA-LLaMa 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.15
+ F1_std: 0.01
+ LLaMa-2 13B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.04
+ F1_std: 0.00
+ LLaMa-2 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.05
+ F1_std: 0.00
+ Vietcuna 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.00
+ F1_std: 0.00
+ GPT-3.5:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.23
+ F1_std: 0.00
+ GPT-4:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.24
+ F1_std: 0.00
\ No newline at end of file
diff --git a/_data/leaderboard/vi/fairness_aware/sentiment_analysis.yml b/_data/leaderboard/vi/fairness_aware/sentiment_analysis.yml
new file mode 100644
index 0000000..354e1d2
--- /dev/null
+++ b/_data/leaderboard/vi/fairness_aware/sentiment_analysis.yml
@@ -0,0 +1,222 @@
+VLSP 2016:
+ URA-LLaMa 70B:
+ AC: 0.65
+ AC_std: 0.01
+ F1: 0.49
+ F1_std: 0.01
+ AR: 0.58
+ AR_std: 0.01
+ ECE: 0.13
+ ECE_std: 0.01
+ A@10: 0.77
+ A@10_std: 0.04
+ URA-LLaMa 13B:
+ AC: 0.59
+ AC_std: 0.01
+ F1: 0.57
+ F1_std: 0.01
+ AR: 0.62
+ AR_std: 0.01
+ ECE: 0.07
+ ECE_std: 0.01
+ A@10: 0.83
+ A@10_std: 0.04
+ URA-LLaMa 7B:
+ AC: 0.74
+ AC_std: 0.02
+ F1: 0.39
+ F1_std: 0.06
+ AR: 0.83
+ AR_std: 0.01
+ ECE: 0.21
+ ECE_std: 0.02
+ A@10: 0.98
+ A@10_std: 0.02
+ LLaMa-2 13B:
+ AC: 0.51
+ AC_std: 0.01
+ F1: 0.1
+ F1_std: 0.06
+ AR: 0.56
+ AR_std: 0.01
+ ECE: 0.32
+ ECE_std: 0.02
+ A@10: 0.79
+ A@10_std: 0.04
+ LLaMa-2 7B:
+ AC: 0.45
+ AC_std: 0.02
+ F1: 0.34
+ F1_std: 0.01
+ AR: 0.53
+ AR_std: 0.01
+ ECE: 0.26
+ ECE_std: 0.02
+ A@10: 0.50
+ A@10_std: 0.0
+ Vietcuna 7B:
+ AC: 0.04
+ AC_std: 0.01
+ F1: 0.04
+ F1_std: 0.01
+ AR: 0.49
+ AR_std: 0.01
+ ECE: 0.71
+ ECE_std: 0.01
+ A@10: 0.05
+ A@10_std: 0.02
+ MixSUra 8x7B:
+ AC: 0.62
+ AC_std: null
+ F1: 0.62
+ F1_std: null
+ AR: 0.59
+ AR_std: null
+ ECE: 0.30
+ ECE_std: null
+ A@10: 0.59
+ A@10_std: null
+ Gemini Pro:
+ AC: 0.67
+ AC_std: null
+ F1: 0.50
+ F1_std: null
+ AR: null
+ AR_std: null
+ ECE: 0.34
+ ECE_std: null
+ A@10: 0.59
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.66
+ AC_std: 0.01
+ F1: 0.60
+ F1_std: 0.01
+ AR: null
+ AR_std: null
+ ECE: 0.33
+ ECE_std: 0.01
+ A@10: 0.52
+ A@10_std: 0.05
+ GPT-4:
+ AC: 0.75
+ AC_std: 0.01
+ F1: 0.74
+ F1_std: 0.01
+ AR: null
+ AR_std: null
+ ECE: 0.41
+ ECE_std: 0.00
+ A@10: 0.73
+ A@10_std: 0.04
+UiT-VSFC:
+ URA-LLaMa 70B:
+ AC: 0.76
+ AC_std: 0.01
+ F1: 0.48
+ F1_std: 0.01
+ AR: 0.61
+ AR_std: 0.01
+ ECE: 0.17
+ ECE_std: 0.01
+ A@10: 0.66
+ A@10_std: 0.03
+ URA-LLaMa 13B:
+ AC: 0.75
+ AC_std: 0.01
+ F1: 0.46
+ F1_std: 0.08
+ AR: 0.83
+ AR_std: 0.01
+ ECE: 0.11
+ ECE_std: 0.01
+ A@10: 0.88
+ A@10_std: 0.02
+ URA-LLaMa 7B:
+ AC: 0.73
+ AC_std: 0.01
+ F1: 0.73
+ F1_std: 0.01
+ AR: 0.78
+ AR_std: 0.01
+ ECE: 0.13
+ ECE_std: 0.01
+ A@10: 0.94
+ A@10_std: 0.01
+ LLaMa-2 13B:
+ AC: 0.63
+ AC_std: 0.01
+ F1: 0.41
+ F1_std: 0.02
+ AR: 0.70
+ AR_std: 0.01
+ ECE: 0.13
+ ECE_std: 0.01
+ A@10: 0.89
+ A@10_std: 0.02
+ LLaMa-2 7B:
+ AC: 0.51
+ AC_std: 0.01
+ F1: 0.55
+ F1_std: 0.01
+ AR: 0.68
+ AR_std: 0.01
+ ECE: 0.22
+ ECE_std: 0.01
+ A@10: 0.64
+ A@10_std: 0.03
+ Vietcuna 7B:
+ AC: 0.03
+ AC_std: 0.00
+ F1: 0.03
+ F1_std: 0.00
+ AR: 0.55
+ AR_std: 0.01
+ ECE: 0.50
+ ECE_std: 0.00
+ A@10: 0.01
+ A@10_std: 0.01
+ MixSUra 8x7B:
+ AC: 0.74
+ AC_std: null
+ F1: 0.46
+ F1_std: null
+ AR: 0.61
+ AR_std: null
+ ECE: 0.24
+ ECE_std: null
+ A@10: 0.66
+ A@10_std: null
+ Gemini Pro:
+ AC: 0.79
+ AC_std: null
+ F1: 0.50
+ F1_std: null
+ AR: null
+ AR_std: null
+ ECE: 0.46
+ ECE_std: null
+ A@10: 0.82
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.86
+ AC_std: 0.01
+ F1: 0.71
+ F1_std: 0.01
+ AR: null
+ AR_std: null
+ ECE: 0.52
+ ECE_std: 0.01
+ A@10: 0.86
+ A@10_std: 0.02
+ GPT-4:
+ AC: 0.85
+ AC_std: 0.01
+ F1: 0.71
+ F1_std: 0.01
+ AR: null
+ AR_std: null
+ ECE: 0.52
+ ECE_std: 0.01
+ A@10: 0.87
+ A@10_std: 0.02
\ No newline at end of file
diff --git a/_data/leaderboard/vi/fairness_aware/text_classification.yml b/_data/leaderboard/vi/fairness_aware/text_classification.yml
new file mode 100644
index 0000000..4eb1fcb
--- /dev/null
+++ b/_data/leaderboard/vi/fairness_aware/text_classification.yml
@@ -0,0 +1,222 @@
+UiT-VSMEC:
+ URA-LLaMa 70B:
+ AC: 0.24
+ AC_std: 0.02
+ F1: 0.14
+ F1_std: 0.01
+ AR: 0.58
+ AR_std: 0.01
+ ECE: 0.26
+ ECE_std: 0.02
+ A@10: 0.37
+ A@10_std: 0.06
+ URA-LLaMa 13B:
+ AC: 0.31
+ AC_std: 0.02
+ F1: 0.11
+ F1_std: 0.01
+ AR: 0.58
+ AR_std: 0.01
+ ECE: 0.23
+ ECE_std: 0.02
+ A@10: 0.57
+ A@10_std: 0.06
+ URA-LLaMa 7B:
+ AC: 0.29
+ AC_std: 0.02
+ F1: 0.11
+ F1_std: 0.01
+ AR: 0.60
+ AR_std: 0.01
+ ECE: 0.12
+ ECE_std: 0.02
+ A@10: 0.41
+ A@10_std: 0.06
+ LLaMa-2 13B:
+ AC: 0.18
+ AC_std: 0.02
+ F1: 0.08
+ F1_std: 0.01
+ AR: 0.55
+ AR_std: 0.01
+ ECE: 0.45
+ ECE_std: 0.01
+ A@10: 0.44
+ A@10_std: 0.06
+ LLaMa-2 7B:
+ AC: 0.25
+ AC_std: 0.02
+ F1: 0.11
+ F1_std: 0.01
+ AR: 0.57
+ AR_std: 0.01
+ ECE: 0.22
+ ECE_std: 0.02
+ A@10: 0.53
+ A@10_std: 0.06
+ Vietcuna 7B:
+ AC: 0.15
+ AC_std: 0.01
+ F1: 0.05
+ F1_std: 0.01
+ AR: 0.46
+ AR_std: 0.01
+ ECE: 0.85
+ ECE_std: 0.01
+ A@10: 0.16
+ A@10_std: 0.04
+ MixSUra:
+ AC: 0.40
+ AC_std: null
+ F1: 0.36
+ F1_std: null
+ AR: 0.72
+ AR_std: null
+ ECE: 0.53
+ ECE_std: null
+ A@10: 0.79
+ A@10_std: null
+ Gemini Pro:
+ AC: 0.48
+ AC_std: null
+ F1: 0.38
+ F1_std: null
+ AR: null
+ AR_std: null
+ ECE: 0.34
+ ECE_std: null
+ A@10: 0.43
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.44
+ AC_std: 0.02
+ F1: 0.42
+ F1_std: 0.02
+ AR: null
+ AR_std: null
+ ECE: 0.30
+ ECE_std: 0.02
+ A@10: 0.36
+ A@10_std: 0.06
+ GPT-4:
+ AC: 0.49
+ AC_std: 0.02
+ F1: 0.47
+ F1_std: 0.02
+ AR: null
+ AR_std: null
+ ECE: 0.35
+ ECE_std: 0.02
+ A@10: 0.36
+ A@10_std: 0.06
+PhoATIS:
+ URA-LLaMa 70B:
+ AC: 0.15
+ AC_std: 0.01
+ F1: 0.22
+ F1_std: 0.03
+ AR: 0.31
+ AR_std: 0.00
+ ECE: 0.81
+ ECE_std: 0.01
+ A@10: 0.13
+ A@10_std: 0.04
+ URA-LLaMa 13B:
+ AC: 0.01
+ AC_std: 0.01
+ F1: 0.05
+ F1_std: 0.02
+ AR: 0.58
+ AR_std: 0.00
+ ECE: 0.84
+ ECE_std: 0.01
+ A@10: 0.00
+ A@10_std: 0.01
+ URA-LLaMa 7B:
+ AC: 0.00
+ AC_std: 0.01
+ F1: 0.00
+ F1_std: 0.00
+ AR: 0.55
+ AR_std: 0.00
+ ECE: 0.30
+ ECE_std: 0.01
+ A@10: 0.01
+ A@10_std: 0.03
+ LLaMa-2 13B:
+ AC: 0.02
+ AC_std: 0.01
+ F1: 0.01
+ F1_std: 0.02
+ AR: 0.57
+ AR_std: 0.01
+ ECE: 0.90
+ ECE_std: 0.01
+ A@10: 0.01
+ A@10_std: 0.01
+ LLaMa-2 7B:
+ AC: 0.02
+ AC_std: 0.00
+ F1: 0.06
+ F1_std: 0.01
+ AR: 0.57
+ AR_std: 0.01
+ ECE: 0.68
+ ECE_std: 0.01
+ A@10: 0.01
+ A@10_std: 0.01
+ Vietcuna 7B:
+ AC: 0.04
+ AC_std: 0.01
+ F1: 0.01
+ F1_std: 0.00
+ AR: 0.77
+ AR_std: 0.01
+ ECE: 0.21
+ ECE_std: 0.01
+ A@10: 0.07
+ A@10_std: 0.03
+ MixSUra:
+ AC: 0.81
+ AC_std: null
+ F1: 0.58
+ F1_std: null
+ AR: 0.96
+ AR_std: null
+ ECE: 0.14
+ ECE_std: null
+ A@10: 0.91
+ A@10_std: null
+ Gemini Pro:
+ AC: 0.79
+ AC_std: null
+ F1: 0.67
+ F1_std: null
+ AR: null
+ AR_std: null
+ ECE: 0.73
+ ECE_std: null
+ A@10: 0.68
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.68
+ AC_std: 0.02
+ F1: 0.66
+ F1_std: 0.03
+ AR: null
+ AR_std: null
+ ECE: 0.62
+ ECE_std: 0.02
+ A@10: 0.67
+ A@10_std: 0.05
+ GPT-4:
+ AC: 0.83
+ AC_std: 0.01
+ F1: 0.76
+ F1_std: 0.03
+ AR: null
+ AR_std: null
+ ECE: 0.77
+ ECE_std: 0.01
+ A@10: 0.87
+ A@10_std: 0.04
\ No newline at end of file
diff --git a/_data/leaderboard/vi/fairness_aware/toxicity_detection.yml b/_data/leaderboard/vi/fairness_aware/toxicity_detection.yml
new file mode 100644
index 0000000..5e80dd7
--- /dev/null
+++ b/_data/leaderboard/vi/fairness_aware/toxicity_detection.yml
@@ -0,0 +1,222 @@
+UiT-ViCTSD:
+ URA-LLaMa 70B:
+ AC: 0.41
+ AC_std: 0.02
+ F1: 0.26
+ F1_std: 0.01
+ AR: 0.75
+ AR_std: 0.01
+ ECE: 0.53
+ ECE_std: 0.01
+ A@10: 0.33
+ A@10_std: 0.05
+ URA-LLaMa 13B:
+ AC: 0.43
+ AC_std: 0.02
+ F1: 0.29
+ F1_std: 0.07
+ AR: 0.66
+ AR_std: 0.01
+ ECE: 0.36
+ ECE_std: 0.02
+ A@10: 0.42
+ A@10_std: 0.05
+ URA-LLaMa 7B:
+ AC: 0.42
+ AC_std: 0.02
+ F1: 0.39
+ F1_std: 0.01
+ AR: 0.60
+ AR_std: 0.01
+ ECE: 0.30
+ ECE_std: 0.01
+ A@10: 0.66
+ A@10_std: 0.05
+ LLaMa-2 13B:
+ AC: 0.27
+ AC_std: 0.01
+ F1: 0.18
+ F1_std: 0.01
+ AR: 0.67
+ AR_std: 0.01
+ ECE: 0.53
+ ECE_std: 0.01
+ A@10: 0.57
+ A@10_std: 0.05
+ LLaMa-2 7B:
+ AC: 0.15
+ AC_std: 0.01
+ F1: 0.11
+ F1_std: 0.01
+ AR: 0.62
+ AR_std: 0.01
+ ECE: 0.67
+ ECE_std: 0.01
+ A@10: 0.07
+ A@10_std: 0.03
+ Vietcuna 7B:
+ AC: 0.08
+ AC_std: 0.01
+ F1: 0.09
+ F1_std: 0.01
+ AR: 0.50
+ AR_std: 0.01
+ ECE: 0.42
+ ECE_std: 0.01
+ A@10: 0.06
+ A@10_std: 0.03
+ MixSUra:
+ AC: 0.69
+ AC_std: null
+ F1: 0.38
+ F1_std: null
+ AR: null
+ AR_std: null
+ ECE: 0.29
+ ECE_std: null
+ A@10: 0.78
+ A@10_std: null
+ Gemini Pro:
+ AC: 0.81
+ AC_std: null
+ F1: 0.43
+ F1_std: null
+ AR: null
+ AR_std: null
+ ECE: 0.31
+ ECE_std: null
+ A@10: 0.82
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.60
+ AC_std: 0.02
+ F1: 0.52
+ F1_std: 0.02
+ AR: null
+ AR_std: null
+ ECE: 0.11
+ ECE_std: 0.02
+ A@10: 0.63
+ A@10_std: 0.05
+ GPT-4:
+ AC: 0.87
+ AC_std: 0.01
+ F1: 0.69
+ F1_std: 0.02
+ AR: null
+ AR_std: null
+ ECE: 0.37
+ ECE_std: 0.01
+ A@10: 0.86
+ A@10_std: 0.03
+UiT-ViHSD:
+ URA-LLaMa 70B:
+ AC: 0.15
+ AC_std: 0.00
+ F1: 0.40
+ F1_std: 0.00
+ AR: 0.64
+ AR_std: 0.01
+ ECE: 0.58
+ ECE_std: 0.00
+ A@10: 0.24
+ A@10_std: 0.02
+ URA-LLaMa 13B:
+ AC: 0.24
+ AC_std: 0.01
+ F1: 0.15
+ F1_std: 0.00
+ AR: 0.61
+ AR_std: 0.01
+ ECE: 0.43
+ ECE_std: 0.01
+ A@10: 0.21
+ A@10_std: 0.02
+ URA-LLaMa 7B:
+ AC: 0.16
+ AC_std: 0.00
+ F1: 0.10
+ F1_std: 0.00
+ AR: 0.67
+ AR_std: 0.01
+ ECE: 0.33
+ ECE_std: 0.00
+ A@10: 0.28
+ A@10_std: 0.02
+ LLaMa-2 13B:
+ AC: 0.16
+ AC_std: 0.00
+ F1: 0.10
+ F1_std: 0.00
+ AR: 0.62
+ AR_std: 0.01
+ ECE: 0.59
+ ECE_std: 0.00
+ A@10: 0.42
+ A@10_std: 0.02
+ LLaMa-2 7B:
+ AC: 0.01
+ AC_std: 0.00
+ F1: 0.01
+ F1_std: 0.00
+ AR: 0.56
+ AR_std: 0.01
+ ECE: 0.71
+ ECE_std: 0.00
+ A@10: 0.01
+ A@10_std: 0.00
+ Vietcuna 7B:
+ AC: 0.62
+ AC_std: 0.01
+ F1: 0.21
+ F1_std: 0.00
+ AR: 0.50
+ AR_std: 0.00
+ ECE: 0.29
+ ECE_std: 0.01
+ A@10: 0.62
+ A@10_std: 0.02
+ MixSUra:
+ AC: 0.56
+ AC_std: null
+ F1: 0.31
+ F1_std: null
+ AR: 0.68
+ AR_std: null
+ ECE: 0.32
+ ECE_std: null
+ A@10: 0.92
+ A@10_std: null
+ Gemini Pro:
+ AC: 0.70
+ AC_std: null
+ F1: 0.37
+ F1_std: null
+ AR: null
+ AR_std: null
+ ECE: 0.36
+ ECE_std: null
+ A@10: 0.69
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.61
+ AC_std: 0.01
+ F1: 0.46
+ F1_std: 0.01
+ AR: null
+ AR_std: null
+ ECE: 0.29
+ ECE_std: 0.01
+ A@10: 0.62
+ A@10_std: 0.02
+ GPT-4:
+ AC: 0.76
+ AC_std: 0.01
+ F1: 0.56
+ F1_std: 0.01
+ AR: null
+ AR_std: null
+ ECE: 0.43
+ ECE_std: 0.01
+ A@10: 0.76
+ A@10_std: 0.02
\ No newline at end of file
diff --git a/_data/leaderboard/vi/few_shot/information_retrieval.yml b/_data/leaderboard/vi/few_shot/information_retrieval.yml
new file mode 100644
index 0000000..4bccc5c
--- /dev/null
+++ b/_data/leaderboard/vi/few_shot/information_retrieval.yml
@@ -0,0 +1,164 @@
+mMARCO:
+ URA-LLaMa 70B:
+ M@10: 0.05
+ M@10_std: 0.00
+ M@10B: 0.11
+ M@10B_std: 0.00
+ N@10: 0.06
+ N@10_std: 0.00
+ N@10B: 0.14
+ N@10B_std: 0.00
+ URA-LLaMa 13B:
+ M@10: 0.04
+ M@10_std: 0.00
+ M@10B: 0.10
+ M@10B_std: 0.00
+ N@10: 0.06
+ N@10_std: 0.00
+ N@10B: 0.14
+ N@10B_std: 0.00
+ URA-LLaMa 7B:
+ M@10: 0.04
+ M@10_std: 0.00
+ M@10B: 0.11
+ M@10B_std: 0.00
+ N@10: 0.06
+ N@10_std: 0.00
+ N@10B: 0.16
+ N@10B_std: 0.00
+ LLaMa-2 13B:
+ M@10: 0.07
+ M@10_std: 0.00
+ M@10B: 0.15
+ M@10B_std: 0.00
+ N@10: 0.09
+ N@10_std: 0.00
+ N@10B: 0.21
+ N@10B_std: 0.00
+ LLaMa-2 7B:
+ M@10: 0.05
+ M@10_std: 0.00
+ M@10B: 0.11
+ M@10B_std: 0.00
+ N@10: 0.07
+ N@10_std: 0.00
+ N@10B: 0.16
+ N@10B_std: 0.00
+ Vietcuna 7B:
+ M@10: 0.00
+ M@10_std: 0.00
+ M@10B: 0.00
+ M@10B_std: 0.00
+ N@10: 0.00
+ N@10_std: 0.00
+ N@10B: 0.00
+ N@10B_std: 0.00
+ MixSUra:
+ M@10: 0.01
+ M@10_std: null
+ M@10B: 0.07
+ M@10B_std: null
+ N@10: 0.04
+ N@10_std: null
+ N@10B: 0.11
+ N@10B_std: null
+ GPT-3.5:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ GPT-4:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+mRobust04:
+ URA-LLaMa 70B:
+ M@10: 0.04
+ M@10_std: 0.00
+ M@10B: 0.04
+ M@10B_std: 0.00
+ N@10: 0.03
+ N@10_std: 0.00
+ N@10B: 0.04
+ N@10B_std: 0.00
+ URA-LLaMa 13B:
+ M@10: 0.03
+ M@10_std: 0.00
+ M@10B: 0.05
+ M@10B_std: 0.00
+ N@10: 0.04
+ N@10_std: 0.00
+ N@10B: 0.04
+ N@10B_std: 0.00
+ URA-LLaMa 7B:
+ M@10: 0.03
+ M@10_std: 0.00
+ M@10B: 0.03
+ M@10B_std: 0.00
+ N@10: 0.02
+ N@10_std: 0.00
+ N@10B: 0.02
+ N@10B_std: 0.00
+ LLaMa-2 13B:
+ M@10: 0.05
+ M@10_std: 0.00
+ M@10B: 0.04
+ M@10B_std: 0.00
+ N@10: 0.04
+ N@10_std: 0.00
+ N@10B: 0.04
+ N@10B_std: 0.00
+ LLaMa-2 7B:
+ M@10: 0.02
+ M@10_std: 0.00
+ M@10B: 0.03
+ M@10B_std: 0.00
+ N@10: 0.03
+ N@10_std: 0.00
+ N@10B: 0.02
+ N@10B_std: 0.00
+ Vietcuna 7B:
+ M@10: 0.00
+ M@10_std: 0.00
+ M@10B: 0.00
+ M@10B_std: 0.00
+ N@10: 0.00
+ N@10_std: 0.00
+ N@10B: 0.00
+ N@10B_std: 0.00
+ MixSUra:
+ M@10: 0.04
+ M@10_std: null
+ M@10B: 0.04
+ M@10B_std: null
+ N@10: 0.02
+ N@10_std: null
+ N@10B: 0.02
+ N@10B_std: null
+ GPT-3.5:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ GPT-4:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
\ No newline at end of file
diff --git a/_data/leaderboard/vi/few_shot/knowledge.yml b/_data/leaderboard/vi/few_shot/knowledge.yml
new file mode 100644
index 0000000..8acbdc3
--- /dev/null
+++ b/_data/leaderboard/vi/few_shot/knowledge.yml
@@ -0,0 +1,147 @@
+ZaloE2E:
+ num_fields: 2
+ URA-LLaMa 70B:
+ EM: 0.34
+ EM_std: 0.02
+ F1: 0.50
+ F1_std: 0.02
+ URA-LLaMa 13B:
+ EM: 0.26
+ EM_std: 0.02
+ F1: 0.40
+ F1_std: 0.02
+ URA-LLaMa 7B:
+ EM: 0.14
+ EM_std: 0.02
+ F1: 0.25
+ F1_std: 0.02
+ LLaMa-2 13B:
+ EM: 0.22
+ EM_std: 0.02
+ F1: 0.36
+ F1_std: 0.02
+ LLaMa-2 7B:
+ EM: 0.07
+ EM_std: 0.01
+ F1: 0.15
+ F1_std: 0.01
+ Vietcuna 7B:
+ EM: 0.07
+ EM_std: 0.01
+ F1: 0.19
+ F1_std: 0.01
+ MixSUra:
+ EM: 0.19
+ EM_std: null
+ F1: 0.34
+ F1_std: null
+ GPT-3.5:
+ EM: 0.49
+ EM_std: 0.02
+ F1: 0.64
+ F1_std: 0.02
+ GPT-4:
+ EM: 0.49
+ EM_std: 0.02
+ F1: 0.64
+ F1_std: 0.02
+ViMMRC:
+ URA-LLaMa 70B:
+ AC: 0.78
+ AC_std: 0.02
+ F1: 0.63
+ F1_std: 0.03
+ AR: 0.90
+ AR_std: 0.01
+ ECE: 0.13
+ ECE_std: 0.02
+ A@10: 0.96
+ A@10_std: 0.03
+ URA-LLaMa 13B:
+ AC: 0.62
+ AC_std: 0.02
+ F1: 0.50
+ F1_std: 0.02
+ AR: 0.69
+ AR_std: 0.02
+ ECE: 0.18
+ ECE_std: 0.02
+ A@10: 0.65
+ A@10_std: 0.07
+ URA-LLaMa 7B:
+ AC: 0.42
+ AC_std: 0.02
+ F1: 0.33
+ F1_std: 0.02
+ AR: 0.61
+ AR_std: 0.02
+ ECE: 0.13
+ ECE_std: 0.02
+ A@10: 0.39
+ A@10_std: 0.07
+ LLaMa-2 13B:
+ AC: 0.58
+ AC_std: 0.02
+ F1: 0.46
+ F1_std: 0.02
+ AR: 0.62
+ AR_std: 0.02
+ ECE: 0.28
+ ECE_std: 0.02
+ A@10: 0.77
+ A@10_std: 0.06
+ LLaMa-2 7B:
+ AC: 0.30
+ AC_std: 0.02
+ F1: 0.23
+ F1_std: 0.02
+ AR: 0.56
+ AR_std: 0.02
+ ECE: 0.43
+ ECE_std: 0.02
+ A@10: 0.16
+ A@10_std: 0.05
+ Vietcuna 7B:
+ AC: 0.31
+ AC_std: 0.02
+ F1: 0.18
+ F1_std: 0.01
+ AR: 0.50
+ AR_std: 0.00
+ ECE: 0.06
+ ECE_std: 0.02
+ A@10: 0.31
+ A@10_std: 0.06
+ MixSUra:
+ AC: 0.65
+ AC_std: null
+ F1: 0.64
+ F1_std: null
+ AR: 0.54
+ AR_std: null
+ ECE: 0.29
+ ECE_std: null
+ A@10: 0.65
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.90
+ AC_std: 0.01
+ F1: 0.73
+ F1_std: 0.03
+ AR: null
+ AR_std: null
+ ECE: 0.66
+ ECE_std: 0.01
+ A@10: 0.91
+ A@10_std: 0.04
+ GPT-4:
+ AC: 0.91
+ AC_std: 0.01
+ F1: 0.73
+ F1_std: 0.04
+ AR: null
+ AR_std: null
+ ECE: 0.66
+ ECE_std: 0.01
+ A@10: 0.91
+ A@10_std: 0.04
\ No newline at end of file
diff --git a/_data/leaderboard/vi/few_shot/language_modeling.yml b/_data/leaderboard/vi/few_shot/language_modeling.yml
new file mode 100644
index 0000000..c741c20
--- /dev/null
+++ b/_data/leaderboard/vi/few_shot/language_modeling.yml
@@ -0,0 +1,236 @@
+MLQA-MLM:
+ URA-LLaMa 70B:
+ EM: 0.01
+ EM_std: 0.00
+ CER: 0.54
+ CER_std: 0.00
+ WER: 0.66
+ WER_std: 0.00
+ CED: 669.74
+ CED_std: 10.38
+ WED: 153.04
+ WED_std: 2.33
+ PLX: 1.32
+ PLX_std: 0.05
+ URA-LLaMa 13B:
+ EM: 0.01
+ EM_std: 0.00
+ CER: 0.45
+ CER_std: 0.01
+ WER: 0.61
+ WER_std: 0.01
+ CED: 559.64
+ CED_std: 11.23
+ WED: 136.97
+ WED_std: 2.68
+ PLX: 1.49
+ PLX_std: 0.10
+ URA-LLaMa 7B:
+ EM: 0.01
+ EM_std: 0.00
+ CER: 0.40
+ CER_std: 0.01
+ WER: 0.55
+ WER_std: 0.01
+ CED: 498.36
+ CED_std: 11.01
+ WED: 118.11
+ WED_std: 2.58
+ PLX: 1.24
+ PLX_std: 0.01
+ LLaMa-2 13B:
+ EM: 0.01
+ EM_std: 0.00
+ CER: 0.74
+ CER_std: 0.00
+ WER: 0.87
+ WER_std: 0.00
+ CED: 760.98
+ CED_std: 11.91
+ WED: 186.90
+ WED_std: 2.85
+ PLX: 1.24
+ PLX_std: 0.03
+ LLaMa-2 7B:
+ EM: 0.00
+ EM_std: 0.00
+ CER: 0.81
+ CER_std: 0.00
+ WER: 0.98
+ WER_std: 0.00
+ CED: 769.36
+ CED_std: 10.51
+ WED: 198.53
+ WED_std: 2.57
+ PLX: 1.74
+ PLX_std: 0.19
+ Vietcuna 7B:
+ EM: 0.00
+ EM_std: 0.00
+ CER: 1.04
+ CER_std: 0.00
+ WER: 1.06
+ WER_std: 0.00
+ CED: 935.65
+ CED_std: 12.47
+ WED: 204.98
+ WED_std: 2.79
+ PLX: 1.40
+ PLX_std: 0.00
+ MixSUra:
+ EM: 0.00
+ EM_std: null
+ CER: 0.55
+ CER_std: null
+ WER: 0.63
+ WER_std: null
+ CED: 526.79
+ CED_std: null
+ WED: 131.02
+ WED_std: null
+ PLX: 1.00
+ PLX_std: null
+ GPT-3.5:
+ EM: 0.04
+ EM_std: 0.00
+ CER: 0.28
+ CER_std: 0.01
+ WER: 0.44
+ WER_std: 0.01
+ CED: 387.37
+ CED_std: 10.86
+ WED: 92.78
+ WED_std: 2.46
+ PLX: null
+ PLX_std: null
+ GPT-4:
+ EM: 0.08
+ EM_std: 0.00
+ CER: 0.23
+ CER_std: 0.01
+ WER: 0.40
+ WER_std: 0.01
+ CED: 336.53
+ CED_std: 10.23
+ WED: 83.55
+ WED_std: 2.34
+ PLX: null
+ PLX_std: null
+VSEC:
+ URA-LLaMa 70B:
+ EM: 0.33
+ EM_std: 0.00
+ CER: 0.11
+ CER_std: 0.00
+ WER: 0.13
+ WER_std: 0.00
+ CED: 15.09
+ CED_std: 0.42
+ WED: 4.05
+ WED_std: 0.11
+ PLX: 1.13
+ PLX_std: 0.00
+ URA-LLaMa 13B:
+ EM: 0.35
+ EM_std: 0.00
+ CER: 0.02
+ CER_std: 0.00
+ WER: 0.04
+ WER_std: 0.00
+ CED: 2.81
+ CED_std: 0.12
+ WED: 1.18
+ WED_std: 0.03
+ PLX: 1.15
+ PLX_std: 0.00
+ URA-LLaMa 7B:
+ EM: 0.22
+ EM_std: 0.00
+ CER: 0.32
+ CER_std: 0.01
+ WER: 0.33
+ WER_std: 0.01
+ CED: 41.89
+ CED_std: 1.54
+ WED: 10.10
+ WED_std: 0.34
+ PLX: 1.07
+ PLX_std: 0.00
+ LLaMa-2 13B:
+ EM: 0.16
+ EM_std: 0.00
+ CER: 0.03
+ CER_std: 0.00
+ WER: 0.05
+ WER_std: 0.00
+ CED: 3.38
+ CED_std: 0.16
+ WED: 1.51
+ WED_std: 0.04
+ PLX: 1.01
+ PLX_std: 0.00
+ LLaMa-2 7B:
+ EM: 0.12
+ EM_std: 0.00
+ CER: 0.36
+ CER_std: 0.01
+ WER: 0.39
+ WER_std: 0.01
+ CED: 47.50
+ CED_std: 0.86
+ WED: 11.80
+ WED_std: 0.19
+ PLX: 1.06
+ PLX_std: 0.00
+ Vietcuna 7B:
+ EM: 0.00
+ EM_std: 0.00
+ CER: 8.00
+ CER_std: 0.07
+ WER: 8.01
+ WER_std: 0.07
+ CED: 1063.93
+ CED_std: 7.64
+ WED: 241.74
+ WED_std: 1.74
+ PLX: 1.46
+ PLX_std: 0.00
+ MixSUra:
+ EM: 0.08
+ EM_std: null
+ CER: 0.19
+ CER_std: null
+ WER: 0.28
+ WER_std: null
+ CED: 25.13
+ CED_std: null
+ WED: 8.58
+ WED_std: null
+ PLX: 1.00
+ PLX_std: null
+ GPT-3.5:
+ EM: 0.66
+ EM_std: 0.00
+ CER: 0.01
+ CER_std: 0.00
+ WER: 0.02
+ WER_std: 0.00
+ CED: 1.63
+ CED_std: 0.08
+ WED: 0.61
+ WED_std: 0.02
+ PLX: null
+ PLX_std: null
+ GPT-4:
+ EM: 0.75
+ EM_std: 0.00
+ CER: 0.01
+ CER_std: 0.00
+ WER: 0.01
+ WER_std: 0.00
+ CED: 0.89
+ CED_std: 0.04
+ WED: 0.37
+ WED_std: 0.01
+ PLX: null
+ PLX_std: null
\ No newline at end of file
diff --git a/_data/leaderboard/vi/few_shot/reasoning.yml b/_data/leaderboard/vi/few_shot/reasoning.yml
new file mode 100644
index 0000000..101dd47
--- /dev/null
+++ b/_data/leaderboard/vi/few_shot/reasoning.yml
@@ -0,0 +1,192 @@
+"SR - Natural":
+ URA-LLaMa 70B:
+ EM: 0.14
+ EM_std: 0.00
+ F1: 0.48
+ F1_std: 0.00
+ Equ: 0.15
+ Equ_std: 0.00
+ URA-LLaMa 13B:
+ EM: 0.08
+ EM_std: 0.00
+ F1: 0.42
+ F1_std: 0.00
+ Equ: 0.08
+ Equ_std: 0.00
+ URA-LLaMa 7B:
+ EM: 0.04
+ EM_std: 0.00
+ F1: 0.38
+ F1_std: 0.00
+ Equ: 0.04
+ Equ_std: 0.00
+ LLaMa-2 13B:
+ EM: 0.03
+ EM_std: 0.00
+ F1: 0.24
+ F1_std: 0.00
+ Equ: 0.04
+ Equ_std: 0.00
+ LLaMa-2 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.01
+ F1_std: 0.00
+ Equ: 0.00
+ Equ_std: 0.00
+ Vietcuna 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.00
+ F1_std: 0.00
+ Equ: 0.00
+ Equ_std: 0.00
+ MixSUra:
+ EM: 0.07
+ EM_std: 0.00
+ F1: 0.41
+ F1_std: 0.00
+ Equ: 0.07
+ Equ_std: 0.00
+ GPT-3.5:
+ EM: 0.15
+ EM_std: 0.00
+ F1: 0.50
+ F1_std: 0.00
+ Equ: 0.16
+ Equ_std: 0.00
+ GPT-4:
+ EM: 0.37
+ EM_std: 0.00
+ F1: 0.74
+ F1_std: 0.00
+ Equ: 0.42
+ Equ_std: 0.00
+"SR - Abstract symbol":
+ URA-LLaMa 70B:
+ EM: 0.27
+ EM_std: 0.00
+ F1: 0.85
+ F1_std: 0.00
+ Equ: 0.30
+ Equ_std: 0.00
+ URA-LLaMa 13B:
+ EM: 0.20
+ EM_std: 0.00
+ F1: 0.70
+ F1_std: 0.00
+ Equ: 0.17
+ Equ_std: 0.00
+ URA-LLaMa 7B:
+ EM: 0.11
+ EM_std: 0.00
+ F1: 0.61
+ F1_std: 0.00
+ Equ: 0.10
+ Equ_std: 0.00
+ LLaMa-2 13B:
+ EM: 0.19
+ EM_std: 0.00
+ F1: 0.69
+ F1_std: 0.00
+ Equ: 0.18
+ Equ_std: 0.00
+ LLaMa-2 7B:
+ EM: 0.06
+ EM_std: 0.00
+ F1: 0.44
+ F1_std: 0.00
+ Equ: 0.06
+ Equ_std: 0.00
+ Vietcuna 7B:
+ EM: 0.14
+ EM_std: 0.00
+ F1: 0.71
+ F1_std: 0.00
+ Equ: 0.10
+ Equ_std: 0.00
+ MixSUra:
+ EM: 0.22
+ EM_std: 0.00
+ F1: 0.78
+ F1_std: 0.00
+ Equ: 0.23
+ Equ_std: 0.00
+ GPT-3.5:
+ EM: 0.26
+ EM_std: 0.00
+ F1: 0.83
+ F1_std: 0.00
+ Equ: 0.29
+ Equ_std: 0.00
+ GPT-4:
+ EM: 0.37
+ EM_std: 0.00
+ F1: 0.87
+ F1_std: 0.00
+ Equ: 0.44
+ Equ_std: 0.00
+MATH:
+ URA-LLaMa 70B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.00
+ F1_std: 0.00
+ Equ: 0.12
+ Equ_std: 0.02
+ URA-LLaMa 13B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.00
+ F1_std: 0.00
+ Equ: 0.00
+ Equ_std: 0.01
+ URA-LLaMa 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.00
+ F1_std: 0.00
+ Equ: 0.07
+ Equ_std: 0.01
+ LLaMa-2 13B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.00
+ F1_std: 0.00
+ Equ: 0.16
+ Equ_std: 0.02
+ LLaMa-2 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.00
+ F1_std: 0.00
+ Equ: 0.11
+ Equ_std: 0.01
+ Vietcuna 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.00
+ F1_std: 0.00
+ Equ: 0.01
+ Equ_std: 0.00
+ MixSUra:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.00
+ F1_std: 0.00
+ Equ: 0.00
+ Equ_std: 0.00
+ GPT-3.5:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.00
+ F1_std: 0.00
+ Equ: 0.62
+ Equ_std: 0.02
+ GPT-4:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.01
+ F1_std: 0.00
+ Equ: 0.65
+ Equ_std: 0.02
\ No newline at end of file
diff --git a/_data/leaderboard/vi/few_shot/sentiment_analysis.yml b/_data/leaderboard/vi/few_shot/sentiment_analysis.yml
new file mode 100644
index 0000000..6b7de43
--- /dev/null
+++ b/_data/leaderboard/vi/few_shot/sentiment_analysis.yml
@@ -0,0 +1,200 @@
+VLSP 2016:
+ URA-LLaMa 70B:
+ AC: 0.66
+ AC_std: 0.01
+ F1: 0.49
+ F1_std: 0.01
+ AR: 0.72
+ AR_std: 0.01
+ ECE: 0.13
+ ECE_std: 0.01
+ A@10: 0.77
+ A@10_std: 0.04
+ URA-LLaMa 13B:
+ AC: 0.59
+ AC_std: 0.01
+ F1: 0.57
+ F1_std: 0.01
+ AR: 0.67
+ AR_std: 0.01
+ ECE: 0.09
+ ECE_std: 0.01
+ A@10: 0.82
+ A@10_std: 0.04
+ URA-LLaMa 7B:
+ AC: 0.57
+ AC_std: 0.02
+ F1: 0.42
+ F1_std: 0.05
+ AR: 0.69
+ AR_std: 0.02
+ ECE: 0.07
+ ECE_std: 0.02
+ A@10: 0.77
+ A@10_std: 0.04
+ LLaMa-2 13B:
+ AC: 0.51
+ AC_std: 0.01
+ F1: 0.41
+ F1_std: 0.06
+ AR: 0.66
+ AR_std: 0.01
+ ECE: 0.32
+ ECE_std: 0.02
+ A@10: 0.80
+ A@10_std: 0.04
+ LLaMa-2 7B:
+ AC: 0.45
+ AC_std: 0.01
+ F1: 0.32
+ F1_std: 0.01
+ AR: 0.59
+ AR_std: 0.01
+ ECE: 0.26
+ ECE_std: 0.02
+ A@10: 0.50
+ A@10_std: 0.05
+ Vietcuna 7B:
+ AC: 0.04
+ AC_std: 0.01
+ F1: 0.05
+ F1_std: 0.01
+ AR: 0.45
+ AR_std: 0.01
+ ECE: 0.71
+ ECE_std: 0.01
+ A@10: 0.05
+ A@10_std: 0.02
+ MixSUra:
+ AC: 0.62
+ AC_std: null
+ F1: 0.63
+ F1_std: null
+ AR: 0.59
+ AR_std: null
+ ECE: 0.30
+ ECE_std: null
+ A@10: 0.59
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.65
+ AC_std: 0.01
+ F1: 0.59
+ F1_std: 0.1
+ AR: null
+ AR_std: null
+ ECE: 0.32
+ ECE_std: 0.01
+ A@10: 0.65
+ A@10_std: 0.05
+ GPT-4:
+ AC: 0.75
+ AC_std: 0.01
+ F1: 0.74
+ F1_std: 0.01
+ AR: null
+ AR_std: null
+ ECE: 0.41
+ ECE_std: 0.01
+ A@10: 0.74
+ A@10_std: 0.04
+UiT-VSFC:
+ URA-LLaMa 70B:
+ AC: 0.75
+ AC_std: 0.01
+ F1: 0.48
+ F1_std: 0.01
+ AR: 0.81
+ AR_std: 0.01
+ ECE: 0.16
+ ECE_std: 0.01
+ A@10: 0.71
+ A@10_std: 0.02
+ URA-LLaMa 13B:
+ AC: 0.74
+ AC_std: 0.01
+ F1: 0.52
+ F1_std: 0.08
+ AR: 0.83
+ AR_std: 0.01
+ ECE: 0.10
+ ECE_std: 0.01
+ A@10: 0.87
+ A@10_std: 0.02
+ URA-LLaMa 7B:
+ AC: 0.72
+ AC_std: 0.01
+ F1: 0.43
+ F1_std: 0.01
+ AR: 0.78
+ AR_std: 0.01
+ ECE: 0.13
+ ECE_std: 0.01
+ A@10: 0.95
+ A@10_std: 0.03
+ LLaMa-2 13B:
+ AC: 0.63
+ AC_std: 0.01
+ F1: 0.46
+ F1_std: 0.07
+ AR: 0.71
+ AR_std: 0.01
+ ECE: 0.13
+ ECE_std: 0.01
+ A@10: 0.88
+ A@10_std: 0.02
+ LLaMa-2 7B:
+ AC: 0.50
+ AC_std: 0.01
+ F1: 0.34
+ F1_std: 0.01
+ AR: 0.69
+ AR_std: 0.01
+ ECE: 0.23
+ ECE_std: 0.01
+ A@10: 0.62
+ A@10_std: 0.03
+ Vietcuna 7B:
+ AC: 0.03
+ AC_std: 0.00
+ F1: 0.03
+ F1_std: 0.00
+ AR: 0.53
+ AR_std: 0.01
+ ECE: 0.50
+ ECE_std: 0.00
+ A@10: 0.01
+ A@10_std: 0.00
+ MixSUra:
+ AC: 0.74
+ AC_std: null
+ F1: 0.46
+ F1_std: null
+ AR: 0.63
+ AR_std: null
+ ECE: 0.23
+ ECE_std: null
+ A@10: 0.655
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.86
+ AC_std: 0.01
+ F1: 0.73
+ F1_std: 0.01
+ AR: null
+ AR_std: null
+ ECE: 0.52
+ ECE_std: 0.01
+ A@10: 0.86
+ A@10_std: 0.02
+ GPT-4:
+ AC: 0.85
+ AC_std: 0.01
+ F1: 0.59
+ F1_std: 0.09
+ AR: null
+ AR_std: null
+ ECE: 0.52
+ ECE_std: 0.01
+ A@10: 0.85
+ A@10_std: 0.02
\ No newline at end of file
diff --git a/_data/leaderboard/vi/few_shot/text_classification.yml b/_data/leaderboard/vi/few_shot/text_classification.yml
new file mode 100644
index 0000000..1d32886
--- /dev/null
+++ b/_data/leaderboard/vi/few_shot/text_classification.yml
@@ -0,0 +1,200 @@
+UiT-VSMEC:
+ URA-LLaMa 70B:
+ AC: 0.25
+ AC_std: 0.02
+ F1: 0.15
+ F1_std: 0.01
+ AR: 0.56
+ AR_std: 0.01
+ ECE: 0.25
+ ECE_std: 0.02
+ A@10: 0.37
+ A@10_std: 0.06
+ URA-LLaMa 13B:
+ AC: 0.32
+ AC_std: 0.02
+ F1: 0.12
+ F1_std: 0.01
+ AR: 0.58
+ AR_std: 0.01
+ ECE: 0.22
+ ECE_std: 0.02
+ A@10: 0.57
+ A@10_std: 0.07
+ URA-LLaMa 7B:
+ AC: 0.29
+ AC_std: 0.02
+ F1: 0.11
+ F1_std: 0.01
+ AR: 0.60
+ AR_std: 0.01
+ ECE: 0.12
+ ECE_std: 0.02
+ A@10: 0.43
+ A@10_std: 0.06
+ LLaMa-2 13B:
+ AC: 0.18
+ AC_std: 0.02
+ F1: 0.08
+ F1_std: 0.01
+ AR: 0.55
+ AR_std: 0.01
+ ECE: 0.45
+ ECE_std: 0.01
+ A@10: 0.49
+ A@10_std: 0.07
+ LLaMa-2 7B:
+ AC: 0.25
+ AC_std: 0.02
+ F1: 0.12
+ F1_std: 0.01
+ AR: 0.57
+ AR_std: 0.01
+ ECE: 0.21
+ ECE_std: 0.02
+ A@10: 0.54
+ A@10_std: 0.06
+ Vietcuna 7B:
+ AC: 0.15
+ AC_std: 0.01
+ F1: 0.05
+ F1_std: 0.01
+ AR: 0.46
+ AR_std: 0.01
+ ECE: 0.85
+ ECE_std: 0.01
+ A@10: 0.15
+ A@10_std: 0.04
+ MixSUra:
+ AC: 0.40
+ AC_std: null
+ F1: 0.36
+ F1_std: null
+ AR: 0.72
+ AR_std: null
+ ECE: 0.53
+ ECE_std: null
+ A@10: 0.79
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.42
+ AC_std: 0.02
+ F1: 0.40
+ F1_std: 0.02
+ AR: null
+ AR_std: null
+ ECE: 0.28
+ ECE_std: 0.02
+ A@10: 0.42
+ A@10_std: 0.06
+ GPT-4:
+ AC: 0.49
+ AC_std: 0.02
+ F1: 0.48
+ F1_std: 0.02
+ AR: null
+ AR_std: null
+ ECE: 0.35
+ ECE_std: 0.02
+ A@10: 0.49
+ A@10_std: 0.06
+PhoATIS:
+ URA-LLaMa 70B:
+ AC: 0.15
+ AC_std: 0.01
+ F1: 0.22
+ F1_std: 0.03
+ AR: 0.83
+ AR_std: 0.00
+ ECE: 0.81
+ ECE_std: 0.01
+ A@10: 0.13
+ A@10_std: 0.04
+ URA-LLaMa 13B:
+ AC: 0.01
+ AC_std: 0.01
+ F1: 0.06
+ F1_std: 0.02
+ AR: 0.47
+ AR_std: 0.00
+ ECE: 0.84
+ ECE_std: 0.01
+ A@10: 0.00
+ A@10_std: 0.01
+ URA-LLaMa 7B:
+ AC: 0.06
+ AC_std: 0.01
+ F1: 0.01
+ F1_std: 0.00
+ AR: 0.55
+ AR_std: 0.00
+ ECE: 0.24
+ ECE_std: 0.01
+ A@10: 0.08
+ A@10_std: 0.03
+ LLaMa-2 13B:
+ AC: 0.02
+ AC_std: 0.01
+ F1: 0.06
+ F1_std: 0.02
+ AR: 0.57
+ AR_std: 0.01
+ ECE: 0.90
+ ECE_std: 0.01
+ A@10: 0.01
+ A@10_std: 0.01
+ LLaMa-2 7B:
+ AC: 0.03
+ AC_std: 0.01
+ F1: 0.02
+ F1_std: 0.01
+ AR: 0.56
+ AR_std: 0.01
+ ECE: 0.54
+ ECE_std: 0.01
+ A@10: 0.01
+ A@10_std: 0.01
+ Vietcuna 7B:
+ AC: 0.04
+ AC_std: 0.01
+ F1: 0.01
+ F1_std: 0.00
+ AR: 0.63
+ AR_std: 0.00
+ ECE: 0.21
+ ECE_std: 0.01
+ A@10: 0.07
+ A@10_std: 0.03
+ MixSUra:
+ AC: 0.81
+ AC_std: null
+ F1: 0.58
+ F1_std: null
+ AR: 0.96
+ AR_std: null
+ ECE: 0.14
+ ECE_std: null
+ A@10: 0.91
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.69
+ AC_std: 0.02
+ F1: 0.67
+ F1_std: 0.03
+ AR: null
+ AR_std: null
+ ECE: 0.63
+ ECE_std: 0.02
+ A@10: 0.69
+ A@10_std: 0.05
+ GPT-4:
+ AC: 0.85
+ AC_std: 0.01
+ F1: 0.78
+ F1_std: 0.03
+ AR: null
+ AR_std: null
+ ECE: 0.79
+ ECE_std: 0.01
+ A@10: 0.88
+ A@10_std: 0.04
\ No newline at end of file
diff --git a/_data/leaderboard/vi/few_shot/toxicity_detection.yml b/_data/leaderboard/vi/few_shot/toxicity_detection.yml
new file mode 100644
index 0000000..fd54338
--- /dev/null
+++ b/_data/leaderboard/vi/few_shot/toxicity_detection.yml
@@ -0,0 +1,200 @@
+UiT-ViCTSD:
+ URA-LLaMa 70B:
+ AC: 0.44
+ AC_std: 0.01
+ F1: 0.27
+ F1_std: 0.01
+ AR: 0.75
+ AR_std: 0.01
+ ECE: 0.52
+ ECE_std: 0.01
+ A@10: 0.37
+ A@10_std: 0.02
+ URA-LLaMa 13B:
+ AC: 0.44
+ AC_std: 0.01
+ F1: 0.30
+ F1_std: 0.05
+ AR: 0.67
+ AR_std: 0.01
+ ECE: 0.33
+ ECE_std: 0.01
+ A@10: 0.41
+ A@10_std: 0.03
+ URA-LLaMa 7B:
+ AC: 0.43
+ AC_std: 0.01
+ F1: 0.40
+ F1_std: 0.01
+ AR: 0.60
+ AR_std: 0.01
+ ECE: 0.29
+ ECE_std: 0.01
+ A@10: 0.71
+ A@10_std: 0.02
+ LLaMa-2 13B:
+ AC: 0.28
+ AC_std: 0.01
+ F1: 0.19
+ F1_std: 0.00
+ AR: 0.67
+ AR_std: 0.01
+ ECE: 0.52
+ ECE_std: 0.01
+ A@10: 0.63
+ A@10_std: 0.03
+ LLaMa-2 7B:
+ AC: 0.16
+ AC_std: 0.01
+ F1: 0.12
+ F1_std: 0.01
+ AR: 0.61
+ AR_std: 0.01
+ ECE: 0.66
+ ECE_std: 0.01
+ A@10: 0.08
+ A@10_std: 0.02
+ Vietcuna 7B:
+ AC: 0.08
+ AC_std: 0.00
+ F1: 0.10
+ F1_std: 0.01
+ AR: 0.50
+ AR_std: 0.00
+ ECE: 0.42
+ ECE_std: 0.00
+ A@10: 0.08
+ A@10_std: 0.03
+ MixSUra:
+ AC: 0.70
+ AC_std: null
+ F1: 0.39
+ F1_std: null
+ AR: null
+ AR_std: null
+ ECE: 0.29
+ ECE_std: null
+ A@10: 0.80
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.63
+ AC_std: 0.02
+ F1: 0.54
+ F1_std: 0.02
+ AR: null
+ AR_std: null
+ ECE: 0.13
+ ECE_std: 0.02
+ A@10: 0.63
+ A@10_std: 0.05
+ GPT-4:
+ AC: 0.89
+ AC_std: 0.00
+ F1: 0.71
+ F1_std: 0.01
+ AR: null
+ AR_std: null
+ ECE: 0.39
+ ECE_std: 0.00
+ A@10: 0.89
+ A@10_std: 0.03
+UiT-ViHSD:
+ URA-LLaMa 70B:
+ AC: 0.17
+ AC_std: 0.00
+ F1: 0.15
+ F1_std: 0.00
+ AR: 0.64
+ AR_std: 0.01
+ ECE: 0.57
+ ECE_std: 0.00
+ A@10: 0.27
+ A@10_std: 0.02
+ URA-LLaMa 13B:
+ AC: 0.26
+ AC_std: 0.01
+ F1: 0.16
+ F1_std: 0.00
+ AR: 0.61
+ AR_std: 0.01
+ ECE: 0.42
+ ECE_std: 0.01
+ A@10: 0.21
+ A@10_std: 0.02
+ URA-LLaMa 7B:
+ AC: 0.16
+ AC_std: 0.00
+ F1: 0.10
+ F1_std: 0.00
+ AR: 0.67
+ AR_std: 0.01
+ ECE: 0.32
+ ECE_std: 0.00
+ A@10: 0.28
+ A@10_std: 0.02
+ LLaMa-2 13B:
+ AC: 0.17
+ AC_std: 0.00
+ F1: 0.11
+ F1_std: 0.00
+ AR: 0.62
+ AR_std: 0.01
+ ECE: 0.58
+ ECE_std: 0.00
+ A@10: 0.44
+ A@10_std: 0.02
+ LLaMa-2 7B:
+ AC: 0.01
+ AC_std: 0.00
+ F1: 0.01
+ F1_std: 0.00
+ AR: 0.56
+ AR_std: 0.01
+ ECE: 0.71
+ ECE_std: 0.00
+ A@10: 0.01
+ A@10_std: 0.02
+ Vietcuna 7B:
+ AC: 0.61
+ AC_std: 0.01
+ F1: 0.21
+ F1_std: 0.00
+ AR: 0.50
+ AR_std: 0.00
+ ECE: 0.28
+ ECE_std: 0.01
+ A@10: 0.61
+ A@10_std: 0.02
+ MixSUra:
+ AC: 0.58
+ AC_std: null
+ F1: 0.31
+ F1_std: null
+ AR: 0.68
+ AR_std: null
+ ECE: 0.30
+ ECE_std: null
+ A@10: 0.93
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.63
+ AC_std: 0.01
+ F1: 0.47
+ F1_std: 0.01
+ AR: null
+ AR_std: null
+ ECE: 0.29
+ ECE_std: 0.01
+ A@10: 0.63
+ A@10_std: 0.02
+ GPT-4:
+ AC: 0.77
+ AC_std: 0.01
+ F1: 0.57
+ F1_std: 0.01
+ AR: null
+ AR_std: null
+ ECE: 0.44
+ ECE_std: 0.01
+ A@10: 0.77
+ A@10_std: 0.02
\ No newline at end of file
diff --git a/_data/leaderboard/vi/few_shot/translation.yml b/_data/leaderboard/vi/few_shot/translation.yml
new file mode 100644
index 0000000..c25954d
--- /dev/null
+++ b/_data/leaderboard/vi/few_shot/translation.yml
@@ -0,0 +1,164 @@
+PhoMT:
+ URA-LLaMa 70B:
+ "BLEU envi": 0.28
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.59
+ "BLEU vien_std": 0.00
+ "hLEPOR envi": 0.27
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.58
+ "hLEPOR vien_std": 0.00
+ URA-LLaMa 13B:
+ "BLEU envi": 0.25
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.55
+ "BLEU vien_std": 0.00
+ "hLEPOR envi": 0.15
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.56
+ "hLEPOR vien_std": 0.00
+ URA-LLaMa 7B:
+ "BLEU envi": 0.19
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.50
+ "BLEU vien_std": 0.00
+ "hLEPOR envi": 0.22
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.54
+ "hLEPOR vien_std": 0.00
+ LLaMa-2 13B:
+ "BLEU envi": 0.23
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.53
+ "BLEU vien_std": 0.00
+ "hLEPOR envi": 0.23
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.54
+ "hLEPOR vien_std": 0.00
+ LLaMa-2 7B:
+ "BLEU envi": 0.18
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.47
+ "BLEU vien_std": 0.00
+ "hLEPOR envi": 0.21
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.52
+ "hLEPOR vien_std": 0.00
+ Vietcuna 7B:
+ "BLEU envi": 0.15
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.35
+ "BLEU vien_std": 0.00
+ "hLEPOR envi": 0.03
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.11
+ "hLEPOR vien_std": 0.00
+ MixSUra:
+ "BLEU envi": 0.15
+ "BLEU envi_std": null
+ "BLEU vien": 0.51
+ "BLEU vien_std": null
+ "hLEPOR envi": 0.16
+ "hLEPOR envi_std": null
+ "hLEPOR vien": 0.52
+ "hLEPOR vien_std": null
+ GPT-3.5:
+ "BLEU envi": 0.33
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.65
+ "BLEU vien_std": 0.00
+ "hLEPOR envi": 0.33
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.63
+ "hLEPOR vien_std": 0.00
+ GPT-4:
+ "BLEU envi": 0.33
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.66
+ "BLEU vien_std": 0.00
+ "hLEPOR envi": 0.34
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.65
+ "hLEPOR vien_std": 0.00
+OPUS100:
+ URA-LLaMa 70B:
+ "BLEU envi": 0.10
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.44
+ "BLEU vien_std": 0.01
+ "hLEPOR envi": 0.14
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.41
+ "hLEPOR vien_std": 0.01
+ URA-LLaMa 13B:
+ "BLEU envi": 0.10
+ "BLEU envi_std": 0.01
+ "BLEU vien": 0.41
+ "BLEU vien_std": 0.01
+ "hLEPOR envi": 0.17
+ "hLEPOR envi_std": 0.01
+ "hLEPOR vien": 0.43
+ "hLEPOR vien_std": 0.01
+ URA-LLaMa 7B:
+ "BLEU envi": 0.08
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.38
+ "BLEU vien_std": 0.01
+ "hLEPOR envi": 0.14
+ "hLEPOR envi_std": 0.01
+ "hLEPOR vien": 0.39
+ "hLEPOR vien_std": 0.01
+ LLaMa-2 13B:
+ "BLEU envi": 0.09
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.39
+ "BLEU vien_std": 0.01
+ "hLEPOR envi": 0.14
+ "hLEPOR envi_std": 0.01
+ "hLEPOR vien": 0.40
+ "hLEPOR vien_std": 0.01
+ LLaMa-2 7B:
+ "BLEU envi": 0.07
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.34
+ "BLEU vien_std": 0.00
+ "hLEPOR envi": 0.11
+ "hLEPOR envi_std": 0.01
+ "hLEPOR vien": 0.36
+ "hLEPOR vien_std": 0.01
+ Vietcuna 7B:
+ "BLEU envi": 0.00
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.00
+ "BLEU vien_std": 0.00
+ "hLEPOR envi": 0.05
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.16
+ "hLEPOR vien_std": 0.00
+ MixSUra:
+ "BLEU envi": 0.07
+ "BLEU envi_std": null
+ "BLEU vien": 0.37
+ "BLEU vien_std": null
+ "hLEPOR envi": 0.09
+ "hLEPOR envi_std": null
+ "hLEPOR vien": 0.36
+ "hLEPOR vien_std": null
+ GPT-3.5:
+ "BLEU envi": 0.16
+ "BLEU envi_std": 0.01
+ "BLEU vien": 0.50
+ "BLEU vien_std": 0.01
+ "hLEPOR envi": 0.24
+ "hLEPOR envi_std": 0.01
+ "hLEPOR vien": 0.51
+ "hLEPOR vien_std": 0.00
+ GPT-4:
+ "BLEU envi": 0.17
+ "BLEU envi_std": 0.01
+ "BLEU vien": 0.51
+ "BLEU vien_std": 0.01
+ "hLEPOR envi": 0.25
+ "hLEPOR envi_std": 0.01
+ "hLEPOR vien": 0.53
+ "hLEPOR vien_std": 0.00
\ No newline at end of file
diff --git a/_data/leaderboard/vi/medium_prompt/question_answering.yml b/_data/leaderboard/vi/medium_prompt/question_answering.yml
new file mode 100644
index 0000000..6213b75
--- /dev/null
+++ b/_data/leaderboard/vi/medium_prompt/question_answering.yml
@@ -0,0 +1,82 @@
+XQuAD:
+ URA-LLaMa 70B:
+ EM: 0.08
+ EM_std: 0.00
+ F1: 0.33
+ F1_std: 0.00
+ URA-LLaMa 13B:
+ EM: 0.04
+ EM_std: 0.00
+ F1: 0.21
+ F1_std: 0.00
+ URA-LLaMa 7B:
+ EM: 0.01
+ EM_std: 0.00
+ F1: 0.11
+ F1_std: 0.00
+ LLaMa-2 13B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.10
+ F1_std: 0.00
+ LLaMa-2 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.03
+ F1_std: 0.00
+ MixSUra:
+ EM: 0.01
+ EM_std: null
+ F1: 0.25
+ F1_std: null
+ GPT-3.5:
+ EM: null
+ EM_std: null
+ F1: null
+ F1_std: null
+ GPT-4:
+ EM: null
+ EM_std: null
+ F1: null
+ F1_std: null
+MLQA:
+ URA-LLaMa 70B:
+ EM: 0.07
+ EM_std: 0.00
+ F1: 0.31
+ F1_std: 0.00
+ URA-LLaMa 13B:
+ EM: 0.04
+ EM_std: 0.00
+ F1: 0.19
+ F1_std: 0.00
+ URA-LLaMa 7B:
+ EM: 0.01
+ EM_std: 0.00
+ F1: 0.11
+ F1_std: 0.00
+ LLaMa-2 13B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.09
+ F1_std: 0.00
+ LLaMa-2 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.03
+ F1_std: 0.00
+ MixSUra:
+ EM: 0.00
+ EM_std: null
+ F1: 0.25
+ F1_std: null
+ GPT-3.5:
+ EM: null
+ EM_std: null
+ F1: null
+ F1_std: null
+ GPT-4:
+ EM: null
+ EM_std: null
+ F1: null
+ F1_std: null
\ No newline at end of file
diff --git a/_data/leaderboard/vi/medium_prompt/summarization.yml b/_data/leaderboard/vi/medium_prompt/summarization.yml
new file mode 100644
index 0000000..7c5bced
--- /dev/null
+++ b/_data/leaderboard/vi/medium_prompt/summarization.yml
@@ -0,0 +1,274 @@
+VietNews:
+ URA-LLaMa 70B:
+ R1: 0.35
+ R1_std: 0.00
+ R2: 0.16
+ R2_std: 0.00
+ RL: 0.24
+ RL_std: 0.00
+ SC: -0.11
+ SC_std: 0.00
+ BS: 0.12
+ BS_std: 0.00
+ Cv: 0.63
+ Cv_std: 0.00
+ De: 5.43
+ De_std: 0.02
+ Cp: 37.78
+ Cp_std: 0.47
+ URA-LLaMa 13B:
+ R1: 0.26
+ R1_std: 0.00
+ R2: 0.12
+ R2_std: 0.00
+ RL: 0.17
+ RL_std: 0.00
+ SC: -0.09
+ SC_std: 0.00
+ BS: -0.08
+ BS_std: 0.18
+ Cv: 0.46
+ Cv_std: 0.00
+ De: 3.55
+ De_std: 0.04
+ Cp: 47.75
+ Cp_std: 0.65
+ URA-LLaMa 7B:
+ R1: 0.41
+ R1_std: 0.00
+ R2: 0.18
+ R2_std: 0.00
+ RL: 0.27
+ RL_std: 0.00
+ SC: -0.09
+ SC_std: 0.00
+ BS: -0.08
+ BS_std: 0.13
+ Cv: 0.83
+ Cv_std: 0.00
+ De: 8.13
+ De_std: 0.04
+ Cp: 8.08
+ Cp_std: 0.17
+ LLaMa-2 13B:
+ R1: 0.02
+ R1_std: 0.00
+ R2: 0.00
+ R2_std: 0.00
+ RL: 0.02
+ RL_std: 0.00
+ SC: -0.09
+ SC_std: 0.00
+ BS: -0.19
+ BS_std: 0.05
+ Cv: 0.01
+ Cv_std: 0.00
+ De: 0.01
+ De_std: 0.00
+ Cp: 54.67
+ Cp_std: 0.16
+ LLaMa-2 7B:
+ R1: 0.03
+ R1_std: 0.00
+ R2: 0.01
+ R2_std: 0.00
+ RL: 0.03
+ RL_std: 0.00
+ SC: -0.09
+ SC_std: 0.00
+ BS: -0.17
+ BS_std: 0.03
+ Cv: 0.04
+ Cv_std: 0.00
+ De: 0.07
+ De_std: 0.00
+ Cp: 23.86
+ Cp_std: 0.26
+ MixSUra:
+ R1: 0.06
+ R1_std: null
+ R2: 0.01
+ R2_std: null
+ RL: 0.04
+ RL_std: null
+ SC: null
+ SC_std: null
+ BS: -0.13
+ BS_std: null
+ Cv: 0.10
+ Cv_std: null
+ De: 0.17
+ De_std: null
+ Cp: 9.03
+ Cp_std: null
+ GPT-3.5:
+ R1: null
+ R1_std: null
+ R2: null
+ R2_std: null
+ RL: null
+ RL_std: null
+ SC: null
+ SC_std: null
+ BS: null
+ BS_std: null
+ Cv: null
+ Cv_std: null
+ De: null
+ De_std: null
+ Cp: null
+ Cp_std: null
+ GPT-4:
+ R1: null
+ R1_std: null
+ R2: null
+ R2_std: null
+ RL: null
+ RL_std: null
+ SC: null
+ SC_std: null
+ BS: null
+ BS_std: null
+ Cv: null
+ Cv_std: null
+ De: null
+ De_std: null
+ Cp: null
+ Cp_std: null
+WikiLingua:
+ URA-LLaMa 70B:
+ R1: 0.33
+ R1_std: 0.00
+ R2: 0.14
+ R2_std: 0.00
+ RL: 0.22
+ RL_std: 0.00
+ SC: -0.16
+ SC_std: 0.00
+ BS: 0.24
+ BS_std: 0.10
+ Cv: 0.59
+ Cv_std: 0.01
+ De: 4.62
+ De_std: 0.11
+ Cp: 56.56
+ Cp_std: 1.70
+ URA-LLaMa 13B:
+ R1: 0.14
+ R1_std: 0.00
+ R2: 0.05
+ R2_std: 0.00
+ RL: 0.09
+ RL_std: 0.00
+ SC: -0.16
+ SC_std: 0.00
+ BS: -0.14
+ BS_std: 0.12
+ Cv: 0.26
+ Cv_std: 0.01
+ De: 1.83
+ De_std: 0.06
+ Cp: 60.10
+ Cp_std: 2.16
+ URA-LLaMa 7B:
+ R1: 0.42
+ R1_std: 0.00
+ R2: 0.17
+ R2_std: 0.00
+ RL: 0.27
+ RL_std: 0.00
+ SC: -0.16
+ SC_std: 0.00
+ BS: 0.27
+ BS_std: 0.21
+ Cv: 0.84
+ Cv_std: 0.00
+ De: 7.15
+ De_std: 0.08
+ Cp: 8.08
+ Cp_std: 0.36
+ LLaMa-2 13B:
+ R1: 0.03
+ R1_std: 0.00
+ R2: 0.00
+ R2_std: 0.00
+ RL: 0.03
+ RL_std: 0.00
+ SC: -0.16
+ SC_std: 0.00
+ BS: -0.05
+ BS_std: 0.03
+ Cv: 0.02
+ Cv_std: 0.00
+ De: 0.02
+ De_std: 0.00
+ Cp: 42.55
+ Cp_std: 0.81
+ LLaMa-2 7B:
+ R1: 0.02
+ R1_std: 0.00
+ R2: 0.00
+ R2_std: 0.00
+ RL: 0.02
+ RL_std: 0.00
+ SC: -0.16
+ SC_std: 0.00
+ BS: -0.04
+ BS_std: 0.06
+ Cv: 0.02
+ Cv_std: 0.00
+ De: 0.03
+ De_std: 0.00
+ Cp: 40.31
+ Cp_std: 0.88
+ MixSUra:
+ R1: 0.03
+ R1_std: null
+ R2: 0.00
+ R2_std: null
+ RL: 0.03
+ RL_std: null
+ SC: null
+ SC_std: null
+ BS: -0.01
+ BS_std: null
+ Cv: 0.17
+ Cv_std: null
+ De: 0.26
+ De_std: null
+ Cp: 16.68
+ Cp_std: null
+ GPT-3.5:
+ R1: null
+ R1_std: null
+ R2: null
+ R2_std: null
+ RL: null
+ RL_std: null
+ SC: null
+ SC_std: null
+ BS: null
+ BS_std: null
+ Cv: null
+ Cv_std: null
+ De: null
+ De_std: null
+ Cp: null
+ Cp_std: null
+ GPT-4:
+ R1: null
+ R1_std: null
+ R2: null
+ R2_std: null
+ RL: null
+ RL_std: null
+ SC: null
+ SC_std: null
+ BS: null
+ BS_std: null
+ Cv: null
+ Cv_std: null
+ De: null
+ De_std: null
+ Cp: null
+ Cp_std: null
\ No newline at end of file
diff --git a/_data/leaderboard/vi/models.yml b/_data/leaderboard/vi/models.yml
new file mode 100644
index 0000000..037f74d
--- /dev/null
+++ b/_data/leaderboard/vi/models.yml
@@ -0,0 +1,11 @@
+models:
+ - URA-LLaMa 70B
+ - URA-LLaMa 13B
+ - URA-LLaMa 7B
+ - LLaMa-2 13B
+ - LLaMa-2 7B
+ - Vietcuna 7B
+ - GPT-3.5
+ - GPT-4
+ - Gemini Pro
+ - MixSUra
\ No newline at end of file
diff --git a/_data/leaderboard/vi/randomized_choice/knowledge.yml b/_data/leaderboard/vi/randomized_choice/knowledge.yml
new file mode 100644
index 0000000..203f875
--- /dev/null
+++ b/_data/leaderboard/vi/randomized_choice/knowledge.yml
@@ -0,0 +1,100 @@
+ViMMRC:
+ URA-LLaMa 70B:
+ AC: 0.76
+ AC_std: 0.02
+ F1: 0.76
+ F1_std: 0.02
+ AR: 0.78
+ AR_std: 0.01
+ ECE: 0.14
+ ECE_std: 0.02
+ A@10: 0.94
+ A@10_std: 0.04
+ URA-LLaMa 13B:
+ AC: 0.62
+ AC_std: 0.02
+ F1: 0.62
+ F1_std: 0.02
+ AR: 0.61
+ AR_std: 0.02
+ ECE: 0.15
+ ECE_std: 0.02
+ A@10: 0.67
+ A@10_std: 0.07
+ URA-LLaMa 7B:
+ AC: 0.45
+ AC_std: 0.02
+ F1: 0.36
+ F1_std: 0.02
+ AR: 0.57
+ AR_std: 0.02
+ ECE: 0.10
+ ECE_std: 0.02
+ A@10: 0.45
+ A@10_std: 0.07
+ LLaMa-2 13B:
+ AC: 0.57
+ AC_std: 0.02
+ F1: 0.57
+ F1_std: 0.02
+ AR: 0.57
+ AR_std: 0.02
+ ECE: 0.29
+ ECE_std: 0.02
+ A@10: 0.75
+ A@10_std: 0.07
+ LLaMa-2 7B:
+ AC: 0.36
+ AC_std: 0.02
+ F1: 0.27
+ F1_std: 0.02
+ AR: 0.56
+ AR_std: 0.02
+ ECE: 0.37
+ ECE_std: 0.02
+ A@10: 0.44
+ A@10_std: 0.07
+ Vietcuna 7B:
+ AC: 0.26
+ AC_std: 0.02
+ F1: 0.15
+ F1_std: 0.01
+ AR: 0.50
+ AR_std: 0.00
+ ECE: 0.01
+ ECE_std: 0.01
+ A@10: 0.26
+ A@10_std: 0.06
+ MixSUra:
+ AC: 0.61
+ AC_std: null
+ F1: 0.61
+ F1_std: null
+ AR: 0.54
+ AR_std: null
+ ECE: 0.31
+ ECE_std: null
+ A@10: 0.65
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.92
+ AC_std: 0.01
+ F1: 0.74
+ F1_std: 0.04
+ AR: null
+ AR_std: null
+ ECE: 0.67
+ ECE_std: 0.01
+ A@10: 0.92
+ A@10_std: 0.04
+ GPT-4:
+ AC: 0.92
+ AC_std: 0.01
+ F1: 0.74
+ F1_std: 0.04
+ AR: null
+ AR_std: null
+ ECE: 0.67
+ ECE_std: 0.01
+ A@10: 0.92
+ A@10_std: 0.04
\ No newline at end of file
diff --git a/_data/leaderboard/vi/robustness_aware/information_retrieval.yml b/_data/leaderboard/vi/robustness_aware/information_retrieval.yml
new file mode 100644
index 0000000..3bce6d4
--- /dev/null
+++ b/_data/leaderboard/vi/robustness_aware/information_retrieval.yml
@@ -0,0 +1,146 @@
+mMARCO:
+ URA-LLaMa 70B:
+ "M@10": null
+ "M@10_std": null
+ "M@10B": null
+ "M@10B_std": null
+ "N@10": null
+ "N@10_std": null
+ "N@10B": null
+ "N@10B_std": null
+ URA-LLaMa 13B:
+ "M@10": null
+ "M@10_std": null
+ "M@10B": null
+ "M@10B_std": null
+ "N@10": null
+ "N@10_std": null
+ "N@10B": null
+ "N@10B_std": null
+ URA-LLaMa 7B:
+ "M@10": 0.05
+ "M@10_std": 0.00
+ "M@10B": 0.11
+ "M@10B_std": 0.00
+ "N@10": 0.07
+ "N@10_std": 0.00
+ "N@10B": 0.17
+ "N@10B_std": 0.00
+ LLaMa-2 13B:
+ "M@10": 0.06
+ "M@10_std": 0.00
+ "M@10B": 0.13
+ "M@10B_std": 0.00
+ "N@10": 0.19
+ "N@10_std": 0.00
+ "N@10B": 0.19
+ "N@10B_std": 0.00
+ LLaMa-2 7B:
+ "M@10": 0.05
+ "M@10_std": 0.00
+ "M@10B": 0.11
+ "M@10B_std": 0.00
+ "N@10": 0.08
+ "N@10_std": 0.00
+ "N@10B": 0.16
+ "N@10B_std": 0.00
+ Vietcuna 7B:
+ "M@10": null
+ "M@10_std": null
+ "M@10B": null
+ "M@10B_std": null
+ "N@10": null
+ "N@10_std": null
+ "N@10B": null
+ "N@10B_std": null
+ GPT-3.5:
+ "M@10": null
+ "M@10_std": null
+ "M@10B": null
+ "M@10B_std": null
+ "N@10": null
+ "N@10_std": null
+ "N@10B": null
+ "N@10B_std": null
+ GPT-4:
+ "M@10": null
+ "M@10_std": null
+ "M@10B": null
+ "M@10B_std": null
+ "N@10": null
+ "N@10_std": null
+ "N@10B": null
+ "N@10B_std": null
+mRobust04:
+ - URA-LLaMa 70B:
+ "M@10": null
+ "M@10_std": null
+ "M@10B": null
+ "M@10B_std": null
+ "N@10": null
+ "N@10_std": null
+ "N@10B": null
+ "N@10B_std": null
+ URA-LLaMa 13B:
+ "M@10": null
+ "M@10_std": null
+ "M@10B": null
+ "M@10B_std": null
+ "N@10": null
+ "N@10_std": null
+ "N@10B": null
+ "N@10B_std": null
+ URA-LLaMa 7B:
+ "M@10": null
+ "M@10_std": null
+ "M@10B": null
+ "M@10B_std": null
+ "N@10": null
+ "N@10_std": null
+ "N@10B": null
+ "N@10B_std": null
+ LLaMa-2 13B:
+ "M@10": null
+ "M@10_std": null
+ "M@10B": null
+ "M@10B_std": null
+ "N@10": null
+ "N@10_std": null
+ "N@10B": null
+ "N@10B_std": null
+ LLaMa-2 7B:
+ "M@10": null
+ "M@10_std": null
+ "M@10B": null
+ "M@10B_std": null
+ "N@10": null
+ "N@10_std": null
+ "N@10B": null
+ "N@10B_std": null
+ Vietcuna 7B:
+ "M@10": null
+ "M@10_std": null
+ "M@10B": null
+ "M@10B_std": null
+ "N@10": null
+ "N@10_std": null
+ "N@10B": null
+ "N@10B_std": null
+ GPT-3.5:
+ "M@10": null
+ "M@10_std": null
+ "M@10B": null
+ "M@10B_std": null
+ "N@10": null
+ "N@10_std": null
+ "N@10B": null
+ "N@10B_std": null
+ GPT-4:
+ "M@10": null
+ "M@10_std": null
+ "M@10B": null
+ "M@10B_std": null
+ "N@10": null
+ "N@10_std": null
+ "N@10B": null
+ "N@10B_std": null
\ No newline at end of file
diff --git a/_data/leaderboard/vi/robustness_aware/knowledge.yml b/_data/leaderboard/vi/robustness_aware/knowledge.yml
new file mode 100644
index 0000000..127ed2c
--- /dev/null
+++ b/_data/leaderboard/vi/robustness_aware/knowledge.yml
@@ -0,0 +1,147 @@
+ZaloE2E:
+ num_fields: 2
+ URA-LLaMa 70B:
+ EM: 0.23
+ EM_std: 0.00
+ F1: 0.37
+ F1_std: 0.00
+ URA-LLaMa 13B:
+ EM: 0.18
+ EM_std: 0.00
+ F1: 0.30
+ F1_std: 0.00
+ URA-LLaMa 7B:
+ EM: 0.10
+ EM_std: 0.00
+ F1: 0.18
+ F1_std: 0.00
+ LLaMa-2 13B:
+ EM: 0.13
+ EM_std: 0.00
+ F1: 0.21
+ F1_std: 0.00
+ LLaMa-2 7B:
+ EM: 0.02
+ EM_std: 0.00
+ F1: 0.05
+ F1_std: 0.00
+ Vietcuna 7B:
+ EM: 0.05
+ EM_std: 0.00
+ F1: 0.15
+ F1_std: 0.00
+ MixSUra:
+ EM: 0.13
+ EM_std: null
+ F1: 0.24
+ F1_std: null
+ GPT-3.5:
+ EM: 0.45
+ EM_std: 0.01
+ F1: 0.61
+ F1_std: 0.01
+ GPT-4:
+ EM: 0.44
+ EM_std: 0.01
+ F1: 0.61
+ F1_std: 0.01
+ViMMRC:
+ URA-LLaMa 70B:
+ AC: 0.65
+ AC_std: 0.00
+ F1: 0.53
+ F1_std: 0.00
+ AR: 0.84
+ AR_std: 0.00
+ ECE: 0.11
+ ECE_std: 0.00
+ A@10: 0.77
+ A@10_std: 0.00
+ URA-LLaMa 13B:
+ AC: 0.41
+ AC_std: 0.00
+ F1: 0.34
+ F1_std: 0.00
+ AR: 0.61
+ AR_std: 0.00
+ ECE: 0.22
+ ECE_std: 0.00
+ A@10: 0.58
+ A@10_std: 0.00
+ URA-LLaMa 7B:
+ AC: 0.33
+ AC_std: 0.02
+ F1: 0.28
+ F1_std: 0.02
+ AR: 0.61
+ AR_std: 0.01
+ ECE: 0.19
+ ECE_std: 0.02
+ A@10: 0.33
+ A@10_std: 0.06
+ LLaMa-2 13B:
+ AC: 0.39
+ AC_std: 0.00
+ F1: 0.31
+ F1_std: 0.00
+ AR: 0.56
+ AR_std: 0.00
+ ECE: 0.46
+ ECE_std: 0.00
+ A@10: 0.33
+ A@10_std: 0.00
+ LLaMa-2 7B:
+ AC: 0.26
+ AC_std: 0.01
+ F1: 0.20
+ F1_std: 0.01
+ AR: 0.51
+ AR_std: 0.01
+ ECE: 0.46
+ ECE_std: 0.01
+ A@10: 0.13
+ A@10_std: 0.03
+ Vietcuna 7B:
+ AC: 0.26
+ AC_std: 0.01
+ F1: 0.14
+ F1_std: 0.00
+ AR: 0.50
+ AR_std: 0.00
+ ECE: 0.01
+ ECE_std: 0.01
+ A@10: 0.21
+ A@10_std: 0.07
+ MixSUra:
+ AC: 0.57
+ AC_std: null
+ F1: 0.45
+ F1_std: null
+ AR: 0.53
+ AR_std: null
+ ECE: 0.35
+ ECE_std: null
+ A@10: 0.58
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.90
+ AC_std: 0.01
+ F1: 0.72
+ F1_std: 0.04
+ AR: null
+ AR_std: null
+ ECE: 0.65
+ ECE_std: 0.01
+ A@10: 0.88
+ A@10_std: 0.07
+ GPT-4:
+ AC: 0.91
+ AC_std: 0.01
+ F1: 0.73
+ F1_std: 0.07
+ AR: null
+ AR_std: null
+ ECE: 0.66
+ ECE_std: 0.07
+ A@10: 0.88
+ A@10_std: 0.04
\ No newline at end of file
diff --git a/_data/leaderboard/vi/robustness_aware/question_answering.yml b/_data/leaderboard/vi/robustness_aware/question_answering.yml
new file mode 100644
index 0000000..40b79c4
--- /dev/null
+++ b/_data/leaderboard/vi/robustness_aware/question_answering.yml
@@ -0,0 +1,92 @@
+XQuAD:
+ URA-LLaMa 70B:
+ EM: 0.01
+ EM_std: 0.00
+ F1: 0.17
+ F1_std: 0.00
+ URA-LLaMa 13B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.09
+ F1_std: 0.00
+ URA-LLaMa 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.09
+ F1_std: 0.00
+ LLaMa-2 13B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.02
+ F1_std: 0.00
+ LLaMa-2 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.02
+ F1_std: 0.00
+ Vietcuna 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.06
+ F1_std: 0.00
+ MixSUra:
+ EM: 0.00
+ EM_std: null
+ F1: 0.11
+ F1_std: null
+ GPT-3.5:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.19
+ F1_std: 0.00
+ GPT-4:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.24
+ F1_std: 0.00
+MLQA:
+ URA-LLaMa 70B:
+ EM: 0.01
+ EM_std: 0.00
+ F1: 0.18
+ F1_std: 0.00
+ URA-LLaMa 13B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.10
+ F1_std: 0.00
+ URA-LLaMa 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.10
+ F1_std: 0.00
+ LLaMa-2 13B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.03
+ F1_std: 0.00
+ LLaMa-2 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.02
+ F1_std: 0.00
+ Vietcuna 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.05
+ F1_std: 0.00
+ MixSUra:
+ EM: 0.00
+ EM_std: null
+ F1: 0.12
+ F1_std: null
+ GPT-3.5:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.20
+ F1_std: 0.00
+ GPT-4:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.25
+ F1_std: 0.00
\ No newline at end of file
diff --git a/_data/leaderboard/vi/robustness_aware/sentiment_analysis.yml b/_data/leaderboard/vi/robustness_aware/sentiment_analysis.yml
new file mode 100644
index 0000000..c1e55e3
--- /dev/null
+++ b/_data/leaderboard/vi/robustness_aware/sentiment_analysis.yml
@@ -0,0 +1,200 @@
+VLSP 2016:
+ URA-LLaMa 70B:
+ AC: 0.63
+ AC_std: 0.01
+ F1: 0.48
+ F1_std: 0.01
+ AR: 0.60
+ AR_std: 0.01
+ ECE: 0.09
+ ECE_std: 0.01
+ A@10: 0.83
+ A@10_std: 0.04
+ URA-LLaMa 13B:
+ AC: 0.55
+ AC_std: 0.02
+ F1: 0.52
+ F1_std: 0.02
+ AR: 0.59
+ AR_std: 0.01
+ ECE: 0.06
+ ECE_std: 0.01
+ A@10: 0.74
+ A@10_std: 0.05
+ URA-LLaMa 7B:
+ AC: 0.52
+ AC_std: 0.02
+ F1: 0.36
+ F1_std: 0.03
+ AR: 0.59
+ AR_std: 0.01
+ ECE: 0.07
+ ECE_std: 0.01
+ A@10: 0.66
+ A@10_std: 0.05
+ LLaMa-2 13B:
+ AC: 0.46
+ AC_std: 0.02
+ F1: 0.30
+ F1_std: 0.01
+ AR: 0.55
+ AR_std: 0.01
+ ECE: 0.39
+ ECE_std: 0.02
+ A@10: 0.70
+ A@10_std: 0.05
+ LLaMa-2 7B:
+ AC: 0.45
+ AC_std: 0.02
+ F1: 0.36
+ F1_std: 0.01
+ AR: 0.54
+ AR_std: 0.01
+ ECE: 0.20
+ ECE_std: 0.02
+ A@10: 0.51
+ A@10_std: 0.05
+ Vietcuna 7B:
+ AC: 0.44
+ AC_std: 0.02
+ F1: 0.27
+ F1_std: 0.01
+ AR: 0.51
+ AR_std: 0.01
+ ECE: 0.23
+ ECE_std: 0.02
+ A@10: 0.53
+ A@10_std: 0.05
+ MixSUra:
+ AC: 0.59
+ AC_std: null
+ F1: 0.59
+ F1_std: null
+ AR: 0.55
+ AR_std: null
+ ECE: 0.34
+ ECE_std: null
+ A@10: 0.52
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.64
+ AC_std: 0.01
+ F1: 0.60
+ F1_std: 0.01
+ AR: null
+ AR_std: null
+ ECE: 0.31
+ ECE_std: 0.01
+ A@10: 0.54
+ A@10_std: 0.05
+ GPT-4:
+ AC: 0.74
+ AC_std: 0.00
+ F1: 0.73
+ F1_std: 0.00
+ AR: null
+ AR_std: null
+ ECE: 0.41
+ ECE_std: 0.00
+ A@10: 0.71
+ A@10_std: 0.00
+UiT-VSFC:
+ URA-LLaMa 70B:
+ AC: 0.71
+ AC_std: 0.01
+ F1: 0.45
+ F1_std: 0.01
+ AR: 0.80
+ AR_std: 0.01
+ ECE: 0.08
+ ECE_std: 0.01
+ A@10: 0.99
+ A@10_std: 0.01
+ URA-LLaMa 13B:
+ AC: 0.72
+ AC_std: 0.01
+ F1: 0.44
+ F1_std: 0.05
+ AR: 0.77
+ AR_std: 0.01
+ ECE: 0.18
+ ECE_std: 0.01
+ A@10: 0.77
+ A@10_std: 0.02
+ URA-LLaMa 7B:
+ AC: 0.73
+ AC_std: 0.01
+ F1: 0.41
+ F1_std: 0.01
+ AR: 0.71
+ AR_std: 0.01
+ ECE: 0.16
+ ECE_std: 0.01
+ A@10: 0.87
+ A@10_std: 0.02
+ LLaMa-2 13B:
+ AC: 0.66
+ AC_std: 0.01
+ F1: 0.40
+ F1_std: 0.01
+ AR: 0.63
+ AR_std: 0.01
+ ECE: 0.11
+ ECE_std: 0.01
+ A@10: 0.89
+ A@10_std: 0.02
+ LLaMa-2 7B:
+ AC: 0.51
+ AC_std: 0.01
+ F1: 0.33
+ F1_std: 0.01
+ AR: 0.65
+ AR_std: 0.01
+ ECE: 0.15
+ ECE_std: 0.01
+ A@10: 0.80
+ A@10_std: 0.02
+ Vietcuna 7B:
+ AC: 0.49
+ AC_std: 0.01
+ F1: 0.25
+ F1_std: 0.03
+ AR: 0.46
+ AR_std: 0.01
+ ECE: 0.33
+ ECE_std: 0.01
+ A@10: 0.34
+ A@10_std: 0.03
+ MixSUra:
+ AC: 0.69
+ AC_std: null
+ F1: 0.44
+ F1_std: null
+ AR: 0.61
+ AR_std: null
+ ECE: 0.29
+ ECE_std: null
+ A@10: 0.66
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.86
+ AC_std: 0.01
+ F1: 0.71
+ F1_std: 0.01
+ AR: null
+ AR_std: null
+ ECE: 0.53
+ ECE_std: 0.01
+ A@10: 0.86
+ A@10_std: 0.02
+ GPT-4:
+ AC: 0.83
+ AC_std: 0.00
+ F1: 0.70
+ F1_std: 0.00
+ AR: null
+ AR_std: null
+ ECE: 0.50
+ ECE_std: 0.00
+ A@10: 0.85
+ A@10_std: 0.00
\ No newline at end of file
diff --git a/_data/leaderboard/vi/robustness_aware/summarization.yml b/_data/leaderboard/vi/robustness_aware/summarization.yml
new file mode 100644
index 0000000..8527eed
--- /dev/null
+++ b/_data/leaderboard/vi/robustness_aware/summarization.yml
@@ -0,0 +1,308 @@
+VietNews:
+ URA-LLaMa 70B:
+ R1: 0.34
+ R1_std: 0.00
+ R2: 0.15
+ R2_std: 0.00
+ RL: 0.23
+ RL_std: 0.00
+ SC: -0.06
+ SC_std: 0.00
+ BS: -0.11
+ BS_std: 0.18
+ Cv: 0.10
+ Cv_std: 0.00
+ De: 0.10
+ De_std: 0.00
+ Cp: 39.63
+ Cp_std: 0.87
+ URA-LLaMa 13B:
+ R1: 0.35
+ R1_std: 0.00
+ R2: 0.14
+ R2_std: 0.00
+ RL: 0.23
+ RL_std: 0.00
+ SC: -0.09
+ SC_std: 0.00
+ BS: -0.07
+ BS_std: 0.17
+ Cv: 0.64
+ Cv_std: 0.00
+ De: 0.65
+ De_std: 0.00
+ Cp: 134.65
+ Cp_std: 3.76
+ URA-LLaMa 7B:
+ R1: 0.37
+ R1_std: 0.00
+ R2: 0.12
+ R2_std: 0.00
+ RL: 0.24
+ RL_std: 0.00
+ SC: -0.10
+ SC_std: 0.00
+ BS: -0.24
+ BS_std: 0.18
+ Cv: 0.65
+ Cv_std: 0.00
+ De: 0.65
+ De_std: 0.00
+ Cp: 17.92
+ Cp_std: 0.87
+ LLaMa-2 13B:
+ R1: 0.05
+ R1_std: 0.00
+ R2: 0.01
+ R2_std: 0.00
+ RL: 0.04
+ RL_std: 0.00
+ SC: -0.15
+ SC_std: 0.00
+ BS: -0.24
+ BS_std: 0.18
+ Cv: 0.03
+ Cv_std: 0.00
+ De: 0.03
+ De_std: 0.00
+ Cp: 55.91
+ Cp_std: 0.65
+ LLaMa-2 7B:
+ R1: 0.05
+ R1_std: 0.00
+ R2: 0.01
+ R2_std: 0.00
+ RL: 0.05
+ RL_std: 0.00
+ SC: -0.10
+ SC_std: 0.00
+ BS: -0.19
+ BS_std: 0.04
+ Cv: 0.07
+ Cv_std: 0.00
+ De: 0.07
+ De_std: 0.00
+ Cp: 55.29
+ Cp_std: 0.88
+ Vietcuna 7B:
+ R1: 0.03
+ R1_std: 0.00
+ R2: 0.01
+ R2_std: 0.00
+ RL: 0.02
+ RL_std: 0.00
+ SC: -0.10
+ SC_std: 0.00
+ BS: -0.18
+ BS_std: 0.06
+ Cv: 0.91
+ Cv_std: 0.00
+ De: 0.91
+ De_std: 0.00
+ Cp: 1026.61
+ Cp_std: 3.86
+ MixSUra:
+ R1: 0.41
+ R1_std: null
+ R2: 0.19
+ R2_std: null
+ RL: 0.26
+ RL_std: null
+ SC: null
+ SC_std: null
+ BS: -0.03
+ BS_std: null
+ Cv: 0.86
+ Cv_std: null
+ De: 0.87
+ De_std: null
+ Cp: 29.15
+ Cp_std: null
+ GPT-3.5:
+ R1: 0.34
+ R1_std: 0.00
+ R2: 0.19
+ R2_std: 0.00
+ RL: 0.23
+ RL_std: 0.00
+ SC: -0.10
+ SC_std: 0.00
+ BS: 0.05
+ BS_std: 0.14
+ Cv: 0.81
+ Cv_std: 0.00
+ De: 0.81
+ De_std: 0.00
+ Cp: 128.44
+ Cp_std: 2.94
+ GPT-4:
+ R1: 0.39
+ R1_std: 0.00
+ R2: 0.21
+ R2_std: 0.00
+ RL: 0.26
+ RL_std: 0.00
+ SC: -0.10
+ SC_std: 0.09
+ BS: 0.04
+ BS_std: 0.00
+ Cv: 0.83
+ Cv_std: 0.00
+ De: 0.83
+ De_std: 0.71
+ Cp: 24.48
+ Cp_std: 0.00
+WikiLingua:
+ URA-LLaMa 70B:
+ R1: 0.28
+ R1_std: 0.00
+ R2: 0.11
+ R2_std: 0.00
+ RL: 0.19
+ RL_std: 0.00
+ SC: -0.16
+ SC_std: 0.00
+ BS: 0.25
+ BS_std: 0.23
+ Cv: 0.50
+ Cv_std: 0.01
+ De: 0.50
+ De_std: 0.01
+ Cp: 167.42
+ Cp_std: 7.09
+ URA-LLaMa 13B:
+ R1: 0.20
+ R1_std: 0.00
+ R2: 0.07
+ R2_std: 0.00
+ RL: 0.13
+ RL_std: 0.00
+ SC: -0.17
+ SC_std: 0.00
+ BS: 0.20
+ BS_std: 0.11
+ Cv: 0.38
+ Cv_std: 0.00
+ De: 0.38
+ De_std: 0.00
+ Cp: 103.69
+ Cp_std: 3.33
+ URA-LLaMa 7B:
+ R1: 0.37
+ R1_std: 0.00
+ R2: 0.12
+ R2_std: 0.00
+ RL: 0.24
+ RL_std: 0.00
+ SC: -0.17
+ SC_std: 0.00
+ BS: 0.11
+ BS_std: 0.18
+ Cv: 0.65
+ Cv_std: 0.00
+ De: 0.65
+ De_std: 0.00
+ Cp: 20.49
+ Cp_std: 0.95
+ LLaMa-2 13B:
+ R1: 0.04
+ R1_std: 0.00
+ R2: 0.00
+ R2_std: 0.00
+ RL: 0.03
+ RL_std: 0.00
+ SC: -0.17
+ SC_std: 0.00
+ BS: 0.09
+ BS_std: 0.00
+ Cv: 0.05
+ Cv_std: 0.00
+ De: 0.05
+ De_std: 0.00
+ Cp: 66.85
+ Cp_std: 6.72
+ LLaMa-2 7B:
+ R1: 0.04
+ R1_std: 0.00
+ R2: 0.00
+ R2_std: 0.00
+ RL: 0.04
+ RL_std: 0.00
+ SC: -0.17
+ SC_std: 0.00
+ BS: 0.15
+ BS_std: 0.00
+ Cv: 0.06
+ Cv_std: 0.00
+ De: 0.06
+ De_std: 0.00
+ Cp: 58.32
+ Cp_std: 3.32
+ Vietcuna 7B:
+ R1: 0.08
+ R1_std: 0.00
+ R2: 0.02
+ R2_std: 0.00
+ RL: 0.05
+ RL_std: 0.00
+ SC: -0.17
+ SC_std: 0.00
+ BS: -0.19
+ BS_std: 0.05
+ Cv: 0.78
+ Cv_std: 0.00
+ De: 0.78
+ De_std: 0.00
+ Cp: 505.45
+ Cp_std: 8.64
+ MixSUra:
+ R1: 0.46
+ R1_std: null
+ R2: 0.21
+ R2_std: null
+ RL: 0.28
+ RL_std: null
+ SC: null
+ SC_std: null
+ BS: 0.26
+ BS_std: null
+ Cv: 0.88
+ Cv_std: null
+ De: 0.98
+ De_std: null
+ Cp: 19.10
+ Cp_std: null
+ GPT-3.5:
+ R1: 0.39
+ R1_std: 0.00
+ R2: 0.19
+ R2_std: 0.00
+ RL: 0.25
+ RL_std: 0.00
+ SC: -0.17
+ SC_std: 0.00
+ BS: 0.28
+ BS_std: 0.11
+ Cv: 0.82
+ Cv_std: 0.00
+ De: 0.82
+ De_std: 0.00
+ Cp: 200.90
+ Cp_std: 7.40
+ GPT-4:
+ R1: 0.45
+ R1_std: 0.00
+ R2: 0.20
+ R2_std: 0.00
+ RL: 0.27
+ RL_std: 0.00
+ SC: -0.17
+ SC_std: 0.00
+ BS: 0.28
+ BS_std: 0.00
+ Cv: 0.80
+ Cv_std: 0.03
+ De: 0.81
+ De_std: 0.00
+ Cp: 20.40
+ Cp_std: 1.59
diff --git a/_data/leaderboard/vi/robustness_aware/text_classification.yml b/_data/leaderboard/vi/robustness_aware/text_classification.yml
new file mode 100644
index 0000000..388fc24
--- /dev/null
+++ b/_data/leaderboard/vi/robustness_aware/text_classification.yml
@@ -0,0 +1,200 @@
+UiT-VSMEC:
+ URA-LLaMa 70B:
+ AC: 0.25
+ AC_std: 0.00
+ F1: 0.16
+ F1_std: 0.00
+ AR: 0.56
+ AR_std: 0.02
+ ECE: 0.20
+ ECE_std: 0.00
+ A@10: 0.33
+ A@10_std: 0.00
+ URA-LLaMa 13B:
+ AC: 0.30
+ AC_std: 0.00
+ F1: 0.11
+ F1_std: 0.00
+ AR: 0.51
+ AR_std: 0.01
+ ECE: 0.26
+ ECE_std: 0.00
+ A@10: 0.44
+ A@10_std: 0.00
+ URA-LLaMa 7B:
+ AC: 0.29
+ AC_std: 0.00
+ F1: 0.10
+ F1_std: 0.00
+ AR: 0.57
+ AR_std: 0.01
+ ECE: 0.17
+ ECE_std: 0.00
+ A@10: 0.30
+ A@10_std: 0.00
+ LLaMa-2 13B:
+ AC: 0.19
+ AC_std: 0.00
+ F1: 0.07
+ F1_std: 0.00
+ AR: 0.52
+ AR_std: 0.01
+ ECE: 0.47
+ ECE_std: 0.00
+ A@10: 0.43
+ A@10_std: 0.00
+ LLaMa-2 7B:
+ AC: 0.17
+ AC_std: 0.00
+ F1: 0.10
+ F1_std: 0.00
+ AR: 0.55
+ AR_std: 0.00
+ ECE: 0.33
+ ECE_std: 0.00
+ A@10: 0.29
+ A@10_std: 0.00
+ Vietcuna 7B:
+ AC: 0.09
+ AC_std: 0.00
+ F1: 0.09
+ F1_std: 0.00
+ AR: 0.51
+ AR_std: 0.01
+ ECE: 0.91
+ ECE_std: 0.00
+ A@10: 0.09
+ A@10_std: 0.00
+ MixSUra:
+ AC: 0.35
+ AC_std: null
+ F1: 0.27
+ F1_std: null
+ AR: 0.70
+ AR_std: null
+ ECE: 0.58
+ ECE_std: null
+ A@10: 0.70
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.42
+ AC_std: 0.00
+ F1: 0.41
+ F1_std: 0.00
+ AR: null
+ AR_std: null
+ ECE: 0.28
+ ECE_std: 0.00
+ A@10: 0.30
+ A@10_std: 0.00
+ GPT-4:
+ AC: 0.48
+ AC_std: 0.00
+ F1: 0.45
+ F1_std: 0.00
+ AR: null
+ AR_std: null
+ ECE: 0.33
+ ECE_std: 0.00
+ A@10: 0.40
+ A@10_std: 0.00
+PhoATIS:
+ URA-LLaMa 70B:
+ AC: 0.16
+ AC_std: 0.02
+ F1: 0.26
+ F1_std: 0.03
+ AR: 0.79
+ AR_std: 0.00
+ ECE: 0.79
+ ECE_std: 0.02
+ A@10: 0.08
+ A@10_std: 0.06
+ URA-LLaMa 13B:
+ AC: 0.01
+ AC_std: 0.01
+ F1: 0.05
+ F1_std: 0.01
+ AR: 0.47
+ AR_std: 0.01
+ ECE: 0.84
+ ECE_std: 0.01
+ A@10: 0.00
+ A@10_std: 0.04
+ URA-LLaMa 7B:
+ AC: 0.02
+ AC_std: 0.01
+ F1: 0.04
+ F1_std: 0.00
+ AR: 0.55
+ AR_std: 0.01
+ ECE: 0.18
+ ECE_std: 0.01
+ A@10: 0.01
+ A@10_std: 0.02
+ LLaMa-2 13B:
+ AC: 0.02
+ AC_std: 0.00
+ F1: 0.06
+ F1_std: 0.00
+ AR: 0.57
+ AR_std: 0.01
+ ECE: 0.91
+ ECE_std: 0.00
+ A@10: 0.01
+ A@10_std: 0.00
+ LLaMa-2 7B:
+ AC: 0.01
+ AC_std: 0.01
+ F1: 0.00
+ F1_std: 0.00
+ AR: 0.56
+ AR_std: 0.00
+ ECE: 0.69
+ ECE_std: 0.01
+ A@10: 0.02
+ A@10_std: 0.02
+ Vietcuna 7B:
+ AC: 0.02
+ AC_std: 0.01
+ F1: 0.01
+ F1_std: 0.00
+ AR: 0.55
+ AR_std: 0.01
+ ECE: 0.23
+ ECE_std: 0.01
+ A@10: 0.02
+ A@10_std: 0.01
+ MixSUra:
+ AC: 0.80
+ AC_std: null
+ F1: 55
+ F1_std: null
+ AR: 0.94
+ AR_std: null
+ ECE: 0.15
+ ECE_std: null
+ A@10: 0.88
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.68
+ AC_std: 0.02
+ F1: 0.64
+ F1_std: 0.03
+ AR: null
+ AR_std: null
+ ECE: 0.62
+ ECE_std: 0.02
+ A@10: 0.70
+ A@10_std: 0.05
+ GPT-4:
+ AC: 0.86
+ AC_std: 0.01
+ F1: 0.80
+ F1_std: 0.02
+ AR: null
+ AR_std: null
+ ECE: 0.80
+ ECE_std: 0.01
+ A@10: 0.91
+ A@10_std: 0.03
\ No newline at end of file
diff --git a/_data/leaderboard/vi/robustness_aware/toxicity_detection.yml b/_data/leaderboard/vi/robustness_aware/toxicity_detection.yml
new file mode 100644
index 0000000..13aded0
--- /dev/null
+++ b/_data/leaderboard/vi/robustness_aware/toxicity_detection.yml
@@ -0,0 +1,200 @@
+UiT-ViCTSD:
+ URA-LLaMa 70B:
+ AC: 0.32
+ AC_std: 0.00
+ F1: 0.21
+ F1_std: 0.00
+ AR: 0.72
+ AR_std: 0.01
+ ECE: 0.62
+ ECE_std: 0.00
+ A@10: 0.33
+ A@10_std: 0.00
+ URA-LLaMa 13B:
+ AC: 0.27
+ AC_std: 0.00
+ F1: 0.26
+ F1_std: 0.00
+ AR: 0.56
+ AR_std: 0.00
+ ECE: 0.56
+ ECE_std: 0.00
+ A@10: 0.12
+ A@10_std: 0.00
+ URA-LLaMa 7B:
+ AC: 0.22
+ AC_std: 0.00
+ F1: 0.21
+ F1_std: 0.00
+ AR: 0.63
+ AR_std: 0.00
+ ECE: 0.39
+ ECE_std: 0.00
+ A@10: 0.36
+ A@10_std: 0.00
+ LLaMa-2 13B:
+ AC: 0.12
+ AC_std: 0.00
+ F1: 0.11
+ F1_std: 0.00
+ AR: 0.56
+ AR_std: 0.01
+ ECE: 0.66
+ ECE_std: 0.00
+ A@10: 0.12
+ A@10_std: 0.00
+ LLaMa-2 7B:
+ AC: 0.04
+ AC_std: 0.00
+ F1: 0.04
+ F1_std: 0.00
+ AR: 0.62
+ AR_std: 0.00
+ ECE: 0.86
+ ECE_std: 0.00
+ A@10: 0.02
+ A@10_std: 0.00
+ Vietcuna 7B:
+ AC: 0.11
+ AC_std: 0.00
+ F1: 0.11
+ F1_std: 0.00
+ AR: 0.54
+ AR_std: 0.00
+ ECE: 0.39
+ ECE_std: 0.00
+ A@10: 0.13
+ A@10_std: 0.00
+ MixSUra:
+ AC: 0.72
+ AC_std: null
+ F1: 0.39
+ F1_std: null
+ AR: null
+ AR_std: null
+ ECE: 0.25
+ ECE_std: null
+ A@10: 0.81
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.51
+ AC_std: 0.00
+ F1: 0.46
+ F1_std: 0.00
+ AR: 0.5
+ AR_std: 0.00
+ ECE: 0.01
+ ECE_std: 0.00
+ A@10: 0.54
+ A@10_std: 0.00
+ GPT-4:
+ AC: 0.88
+ AC_std: 0.00
+ F1: 0.71
+ F1_std: 0.00
+ AR: null
+ AR_std: null
+ ECE: 0.38
+ ECE_std: 0.00
+ A@10: 0.88
+ A@10_std: 0.00
+UiT-ViHSD:
+ URA-LLaMa 70B:
+ AC: 0.14
+ AC_std: 0.00
+ F1: 0.12
+ F1_std: 0.00
+ AR: 0.64
+ AR_std: 0.02
+ ECE: 0.61
+ ECE_std: 0.00
+ A@10: 0.23
+ A@10_std: 0.00
+ URA-LLaMa 13B:
+ AC: 0.18
+ AC_std: 0.00
+ F1: 0.11
+ F1_std: 0.00
+ AR: 0.57
+ AR_std: 0.01
+ ECE: 0.45
+ ECE_std: 0.00
+ A@10: 0.20
+ A@10_std: 0.00
+ URA-LLaMa 7B:
+ AC: 0.12
+ AC_std: 0.00
+ F1: 0.07
+ F1_std: 0.00
+ AR: 0.62
+ AR_std: 0.00
+ ECE: 0.38
+ ECE_std: 0.00
+ A@10: 0.19
+ A@10_std: 0.00
+ LLaMa-2 13B:
+ AC: 0.10
+ AC_std: 0.00
+ F1: 0.07
+ F1_std: 0.00
+ AR: 0.59
+ AR_std: 0.01
+ ECE: 0.62
+ ECE_std: 0.00
+ A@10: 0.24
+ A@10_std: 0.00
+ LLaMa-2 7B:
+ AC: 0.01
+ AC_std: 0.00
+ F1: 0.00
+ F1_std: 0.00
+ AR: 0.54
+ AR_std: 0.00
+ ECE: 0.79
+ ECE_std: 0.00
+ A@10: 0.00
+ A@10_std: 0.00
+ Vietcuna 7B:
+ AC: 0.09
+ AC_std: 0.00
+ F1: 0.05
+ F1_std: 0.00
+ AR: 0.5
+ AR_std: 0.00
+ ECE: 0.24
+ ECE_std: 0.00
+ A@10: 0.08
+ A@10_std: 0.00
+ MixSUra:
+ AC: 0.66
+ AC_std: null
+ F1: 0.31
+ F1_std: null
+ AR: 0.67
+ AR_std: null
+ ECE: 0.21
+ ECE_std: null
+ A@10: 0.82
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.64
+ AC_std: 0.00
+ F1: 0.47
+ F1_std: 0.00
+ AR: null
+ AR_std: null
+ ECE: 0.30
+ ECE_std: 0.00
+ A@10: 0.63
+ A@10_std: 0.00
+ GPT-4:
+ AC: 0.78
+ AC_std: 0.00
+ F1: 0.56
+ F1_std: 0.00
+ AR: null
+ AR_std: null
+ ECE: 0.44
+ ECE_std: 0.00
+ A@10: 0.78
+ A@10_std: 0.00
\ No newline at end of file
diff --git a/_data/leaderboard/vi/robustness_aware/translation.yml b/_data/leaderboard/vi/robustness_aware/translation.yml
new file mode 100644
index 0000000..edcfc9f
--- /dev/null
+++ b/_data/leaderboard/vi/robustness_aware/translation.yml
@@ -0,0 +1,164 @@
+PhoMT:
+ URA-LLaMa 70B:
+ "BLEU envi": 0.25
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.58
+ "BLEU vien_std": 0.00
+ "hLEPOR envi": 0.11
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.51
+ "hLEPOR vien_std": 0.00
+ URA-LLaMa 13B:
+ "BLEU envi": 0.23
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.55
+ "BLEU vien_std": 0.00
+ "hLEPOR envi": 0.10
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.50
+ "hLEPOR vien_std": 0.00
+ URA-LLaMa 7B:
+ "BLEU envi": 0.15
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.48
+ "BLEU vien_std": 0.00
+ "hLEPOR envi": 0.06
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.46
+ "hLEPOR vien_std": 0.00
+ LLaMa-2 13B:
+ "BLEU envi": 0.20
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.51
+ "BLEU vien_std": 0.00
+ "hLEPOR envi": 0.07
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.44
+ "hLEPOR vien_std": 0.00
+ LLaMa-2 7B:
+ "BLEU envi": 0.13
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.41
+ "BLEU vien_std": 0.00
+ "hLEPOR envi": 0.05
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.42
+ "hLEPOR vien_std": 0.00
+ Vietcuna 7B:
+ "BLEU envi": 0.17
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.43
+ "BLEU vien_std": 0.00
+ "hLEPOR envi": 0.07
+ "hLEPOR envi_std": 0.01
+ "hLEPOR vien": 0.41
+ "hLEPOR vien_std": 0.00
+ MixSUra:
+ "BLEU envi": 0.14
+ "BLEU envi_std": null
+ "BLEU vien": 0.50
+ "BLEU vien_std": null
+ "hLEPOR envi": 0.11
+ "hLEPOR envi_std": null
+ "hLEPOR vien": 0.46
+ "hLEPOR vien_std": null
+ GPT-3.5:
+ "BLEU envi": 0.31
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.64
+ "BLEU vien_std": 0.00
+ "hLEPOR envi": 0.17
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.59
+ "hLEPOR vien_std": 0.00
+ GPT-4:
+ "BLEU envi": 0.31
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.65
+ "BLEU vien_std": 0.00
+ "hLEPOR envi": 0.20
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.62
+ "hLEPOR vien_std": 0.00
+OPUS100:
+ URA-LLaMa 70B:
+ "BLEU envi": 0.05
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.40
+ "BLEU vien_std": 0.01
+ "hLEPOR envi": 0.06
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.36
+ "hLEPOR vien_std": 0.00
+ URA-LLaMa 13B:
+ "BLEU envi": 0.03
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.38
+ "BLEU vien_std": 0.01
+ "hLEPOR envi": 0.05
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.38
+ "hLEPOR vien_std": 0.00
+ URA-LLaMa 7B:
+ "BLEU envi": 0.02
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.35
+ "BLEU vien_std": 0.00
+ "hLEPOR envi": 0.03
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.34
+ "hLEPOR vien_std": 0.01
+ LLaMa-2 13B:
+ "BLEU envi": 0.03
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.36
+ "BLEU vien_std": 0.01
+ "hLEPOR envi": 0.04
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.32
+ "hLEPOR vien_std": 0.00
+ LLaMa-2 7B:
+ "BLEU envi": 0.02
+ "BLEU envi_std": 0.00
+ "BLEU vien": 0.31
+ "BLEU vien_std": 0.00
+ "hLEPOR envi": 0.03
+ "hLEPOR envi_std": 0.00
+ "hLEPOR vien": 0.30
+ "hLEPOR vien_std": 0.00
+ Vietcuna 7B:
+ "BLEU envi": 0.09
+ "BLEU envi_std": 0.01
+ "BLEU vien": 0.38
+ "BLEU vien_std": 0.01
+ "hLEPOR envi": 0.09
+ "hLEPOR envi_std": 0.01
+ "hLEPOR vien": 0.33
+ "hLEPOR vien_std": 0.00
+ MixSUra:
+ "BLEU envi": 0.06
+ "BLEU envi_std": null
+ "BLEU vien": 0.36
+ "BLEU vien_std": null
+ "hLEPOR envi": 0.06
+ "hLEPOR envi_std": null
+ "hLEPOR vien": 0.31
+ "hLEPOR vien_std": null
+ GPT-3.5:
+ "BLEU envi": 0.15
+ "BLEU envi_std": 0.01
+ "BLEU vien": 0.49
+ "BLEU vien_std": 0.01
+ "hLEPOR envi": 0.21
+ "hLEPOR envi_std": 0.01
+ "hLEPOR vien": 0.48
+ "hLEPOR vien_std": 0.00
+ GPT-4:
+ "BLEU envi": 0.16
+ "BLEU envi_std": 0.01
+ "BLEU vien": 0.50
+ "BLEU vien_std": 0.01
+ "hLEPOR envi": 0.23
+ "hLEPOR envi_std": 0.01
+ "hLEPOR vien": 0.51
+ "hLEPOR vien_std": 0.00
\ No newline at end of file
diff --git a/_data/leaderboard/vi/weaker_prompt/question_answering.yml b/_data/leaderboard/vi/weaker_prompt/question_answering.yml
new file mode 100644
index 0000000..408ca87
--- /dev/null
+++ b/_data/leaderboard/vi/weaker_prompt/question_answering.yml
@@ -0,0 +1,82 @@
+XQuAD:
+ URA-LLaMa 70B:
+ EM: 0.21
+ EM_std: 0.01
+ F1: 0.47
+ F1_std: 0.01
+ URA-LLaMa 13B:
+ EM: 0.22
+ EM_std: 0.01
+ F1: 0.43
+ F1_std: 0.01
+ URA-LLaMa 7B:
+ EM: 0.13
+ EM_std: 0.00
+ F1: 0.32
+ F1_std: 0.00
+ LLaMa-2 13B:
+ EM: 0.04
+ EM_std: 0.00
+ F1: 0.28
+ F1_std: 0.00
+ LLaMa-2 7B:
+ EM: 0.06
+ EM_std: 0.00
+ F1: 0.24
+ F1_std: 0.00
+ MixSUra:
+ EM: 0.13
+ EM_std: null
+ F1: 0.38
+ F1_std: null
+ GPT-3.5:
+ EM: null
+ EM_std: null
+ F1: null
+ F1_std: null
+ GPT-4:
+ EM: null
+ EM_std: null
+ F1: null
+ F1_std: null
+MLQA:
+ URA-LLaMa 70B:
+ EM: 0.14
+ EM_std: 0.01
+ F1: 0.41
+ F1_std: 0.00
+ URA-LLaMa 13B:
+ EM: 0.17
+ EM_std: 0.01
+ F1: 0.40
+ F1_std: 0.01
+ URA-LLaMa 7B:
+ EM: 0.10
+ EM_std: 0.00
+ F1: 0.32
+ F1_std: 0.00
+ LLaMa-2 13B:
+ EM: 0.04
+ EM_std: 0.00
+ F1: 0.28
+ F1_std: 0.00
+ LLaMa-2 7B:
+ EM: 0.05
+ EM_std: 0.00
+ F1: 0.24
+ F1_std: 0.00
+ MixSUra:
+ EM: 0.09
+ EM_std: null
+ F1: 0.36
+ F1_std: null
+ GPT-3.5:
+ EM: null
+ EM_std: null
+ F1: null
+ F1_std: null
+ GPT-4:
+ EM: null
+ EM_std: null
+ F1: null
+ F1_std: null
\ No newline at end of file
diff --git a/_data/leaderboard/vi/weaker_prompt/summarization.yml b/_data/leaderboard/vi/weaker_prompt/summarization.yml
new file mode 100644
index 0000000..b978edc
--- /dev/null
+++ b/_data/leaderboard/vi/weaker_prompt/summarization.yml
@@ -0,0 +1,274 @@
+VietNews:
+ URA-LLaMa 70B:
+ R1: 0.49
+ R1_std: 0.00
+ R2: 0.23
+ R2_std: 0.00
+ RL: 0.31
+ RL_std: 0.00
+ SC: -0.08
+ SC_std: 0.00
+ BS: 0.05
+ BS_std: 0.11
+ Cv: 0.89
+ Cv_std: 0.00
+ De: 8.90
+ De_std: 0.03
+ Cp: 18.48
+ Cp_std: 0.59
+ URA-LLaMa 13B:
+ R1: 0.27
+ R1_std: 0.00
+ R2: 0.12
+ R2_std: 0.00
+ RL: 0.18
+ RL_std: 0.00
+ SC: -0.09
+ SC_std: 0.00
+ BS: 0.05
+ BS_std: 0.11
+ Cv: 0.56
+ Cv_std: 0.00
+ De: 5.00
+ De_std: 0.04
+ Cp: 153.55
+ Cp_std: 0.99
+ URA-LLaMa 7B:
+ R1: 0.45
+ R1_std: 0.00
+ R2: 0.21
+ R2_std: 0.00
+ RL: 0.29
+ RL_std: 0.00
+ SC: -0.08
+ SC_std: 0.00
+ BS: 0.03
+ BS_std: 0.09
+ Cv: 0.91
+ Cv_std: 0.00
+ De: 9.43
+ De_std: 0.03
+ Cp: 6.42
+ Cp_std: 0.05
+ LLaMa-2 13B:
+ R1: 0.45
+ R1_std: 0.00
+ R2: 0.22
+ R2_std: 0.00
+ RL: 0.29
+ RL_std: 0.00
+ SC: -0.09
+ SC_std: 0.00
+ BS: 0.00
+ BS_std: 0.14
+ Cv: 0.92
+ Cv_std: 0.00
+ De: 9.49
+ De_std: 0.02
+ Cp: 8.46
+ Cp_std: 0.29
+ LLaMa-2 7B:
+ R1: 0.36
+ R1_std: 0.00
+ R2: 0.17
+ R2_std: 0.00
+ RL: 0.23
+ RL_std: 0.00
+ SC: -0.09
+ SC_std: 0.00
+ BS: -0.15
+ BS_std: 0.12
+ Cv: 0.69
+ Cv_std: 0.00
+ De: 6.35
+ De_std: 0.03
+ Cp: 7.59
+ Cp_std: 0.21
+ MixSUra:
+ R1: 0.44
+ R1_std: null
+ R2: 0.22
+ R2_std: null
+ RL: 0.29
+ RL_std: null
+ SC: null
+ SC_std: null
+ BS: 0.07
+ BS_std: null
+ Cv: 0.97
+ Cv_std: null
+ De: 35.67
+ De_std: null
+ Cp: 9.43
+ Cp_std: null
+ GPT-3.5:
+ R1: null
+ R1_std: null
+ R2: null
+ R2_std: null
+ RL: null
+ RL_std: null
+ SC: null
+ SC_std: null
+ BS: null
+ BS_std: null
+ Cv: null
+ Cv_std: null
+ De: null
+ De_std: null
+ Cp: null
+ Cp_std: null
+ GPT-4:
+ R1: null
+ R1_std: null
+ R2: null
+ R2_std: null
+ RL: null
+ RL_std: null
+ SC: null
+ SC_std: null
+ BS: null
+ BS_std: null
+ Cv: null
+ Cv_std: null
+ De: null
+ De_std: null
+ Cp: null
+ Cp_std: null
+WikiLingua:
+ URA-LLaMa 70B:
+ R1: 0.47
+ R1_std: 0.00
+ R2: 0.20
+ R2_std: 0.00
+ RL: 0.29
+ RL_std: 0.00
+ SC: -0.16
+ SC_std: 0.00
+ BS: 0.19
+ BS_std: 0.13
+ Cv: 0.86
+ Cv_std: 0.00
+ De: 6.83
+ De_std: 0.09
+ Cp: 25.30
+ Cp_std: 1.86
+ URA-LLaMa 13B:
+ R1: 0.22
+ R1_std: 0.00
+ R2: 0.09
+ R2_std: 0.00
+ RL: 0.14
+ RL_std: 0.00
+ SC: -0.16
+ SC_std: 0.00
+ BS: 0.20
+ BS_std: 0.007
+ Cv: 0.48
+ Cv_std: 0.00
+ De: 3.49
+ De_std: 0.04
+ Cp: 190.09
+ Cp_std: 4.92
+ URA-LLaMa 7B:
+ R1: 0.42
+ R1_std: 0.00
+ R2: 0.18
+ R2_std: 0.00
+ RL: 0.27
+ RL_std: 0.00
+ SC: -0.16
+ SC_std: 0.00
+ BS: 0.07
+ BS_std: 0.12
+ Cv: 0.89
+ Cv_std: 0.00
+ De: 7.58
+ De_std: 0.05
+ Cp: 7.14
+ Cp_std: 0.14
+ LLaMa-2 13B:
+ R1: 0.47
+ R1_std: 0.00
+ R2: 0.22
+ R2_std: 0.00
+ RL: 0.29
+ RL_std: 0.00
+ SC: -0.16
+ SC_std: 0.00
+ BS: 0.34
+ BS_std: 0.12
+ Cv: 0.92
+ Cv_std: 0.00
+ De: 9.39
+ De_std: 0.05
+ Cp: 17.94
+ Cp_std: 2.84
+ LLaMa-2 7B:
+ R1: 0.45
+ R1_std: 0.00
+ R2: 0.20
+ R2_std: 0.00
+ RL: 0.27
+ RL_std: 0.00
+ SC: -0.16
+ SC_std: 0.00
+ BS: 0.36
+ BS_std: 0.00
+ Cv: 0.83
+ Cv_std: 0.00
+ De: 7.71
+ De_std: 0.07
+ Cp: 12.39
+ Cp_std: 1.46
+ MixSUra:
+ R1: 0.47
+ R1_std: null
+ R2: 0.22
+ R2_std: null
+ RL: 0.29
+ RL_std: null
+ SC: null
+ SC_std: null
+ BS: 0.19
+ BS_std: null
+ Cv: 0.97
+ Cv_std: null
+ De: 28.97
+ De_std: null
+ Cp: 10.27
+ Cp_std: null
+ GPT-3.5:
+ R1: null
+ R1_std: null
+ R2: null
+ R2_std: null
+ RL: null
+ RL_std: null
+ SC: null
+ SC_std: null
+ BS: null
+ BS_std: null
+ Cv: null
+ Cv_std: null
+ De: null
+ De_std: null
+ Cp: null
+ Cp_std: null
+ GPT-4:
+ R1: null
+ R1_std: null
+ R2: null
+ R2_std: null
+ RL: null
+ RL_std: null
+ SC: null
+ SC_std: null
+ BS: null
+ BS_std: null
+ Cv: null
+ Cv_std: null
+ De: null
+ De_std: null
+ Cp: null
+ Cp_std: null
\ No newline at end of file
diff --git a/_data/leaderboard/vi/zero_shot/information_retrieval.yml b/_data/leaderboard/vi/zero_shot/information_retrieval.yml
new file mode 100644
index 0000000..22df454
--- /dev/null
+++ b/_data/leaderboard/vi/zero_shot/information_retrieval.yml
@@ -0,0 +1,146 @@
+mMARCO:
+ URA-LLaMa 70B:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ URA-LLaMa 13B:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ URA-LLaMa 7B:
+ M@10: 0.06
+ M@10_std: 0.00
+ M@10B: 0.14
+ M@10B_std: 0.00
+ N@10: 0.09
+ N@10_std: 0.00
+ N@10B: 0.21
+ N@10B_std: 0.00
+ LLaMa-2 13B:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ LLaMa-2 7B:
+ M@10: 0.06
+ M@10_std: 0.00
+ M@10B: 0.11
+ M@10B_std: 0.00
+ N@10: 0.08
+ N@10_std: 0.00
+ N@10B: 0.17
+ N@10B_std: 0.00
+ Vietcuna 7B:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ GPT-3.5:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ GPT-4:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+mRobust04:
+ URA-LLaMa 70B:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ URA-LLaMa 13B:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ URA-LLaMa 7B:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ LLaMa-2 13B:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ LLaMa-2 7B:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ Vietcuna 7B:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ GPT-3.5:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
+ GPT-4:
+ M@10: null
+ M@10_std: null
+ M@10B: null
+ M@10B_std: null
+ N@10: null
+ N@10_std: null
+ N@10B: null
+ N@10B_std: null
\ No newline at end of file
diff --git a/_data/leaderboard/vi/zero_shot/knowledge.yml b/_data/leaderboard/vi/zero_shot/knowledge.yml
new file mode 100644
index 0000000..88d0f13
--- /dev/null
+++ b/_data/leaderboard/vi/zero_shot/knowledge.yml
@@ -0,0 +1,131 @@
+ZaloE2E:
+ num_fields: 2
+ URA-LLaMa 70B:
+ EM: 0.28
+ EM_std: 0.02
+ F1: 0.44
+ F1_std: 0.02
+ URA-LLaMa 13B:
+ EM: 0.12
+ EM_std: 0.01
+ F1: 0.22
+ F1_std: 0.01
+ URA-LLaMa 7B:
+ EM: 0.09
+ EM_std: 0.01
+ F1: 0.20
+ F1_std: 0.02
+ LLaMa-2 13B:
+ EM: 0.06
+ EM_std: 0.01
+ F1: 0.10
+ F1_std: 0.01
+ LLaMa-2 7B:
+ EM: 0.03
+ EM_std: 0.01
+ F1: 0.07
+ F1_std: 0.01
+ Vietcuna 7B:
+ EM: 0.03
+ EM_std: 0.01
+ F1: 0.06
+ F1_std: 0.01
+ GPT-3.5:
+ EM: 0.37
+ EM_std: 0.02
+ F1: 0.56
+ F1_std: 0.02
+ GPT-4:
+ EM: 0.38
+ EM_std: 0.02
+ F1: 0.55
+ F1_std: 0.02
+ViMMRC:
+ URA-LLaMa 70B:
+ AC: 0.80
+ AC_std: 0.02
+ F1: 0.80
+ F1_std: 0.02
+ AR: 0.85
+ AR_std: 0.01
+ ECE: 0.10
+ ECE_std: 0.02
+ A@10: 0.96
+ A@10_std: 0.03
+ URA-LLaMa 13B:
+ AC: 0.40
+ AC_std: 0.02
+ F1: 0.31
+ F1_std: 0.02
+ AR: 0.57
+ AR_std: 0.02
+ ECE: 0.48
+ ECE_std: 0.02
+ A@10: 0.42
+ A@10_std: 0.08
+ URA-LLaMa 7B:
+ AC: 0.30
+ AC_std: 0.02
+ F1: 0.10
+ F1_std: 0.01
+ AR: 0.56
+ AR_std: 0.02
+ ECE: 0.27
+ ECE_std: 0.02
+ A@10: 0.56
+ A@10_std: 0.07
+ LLaMa-2 13B:
+ AC: 0.52
+ AC_std: 0.02
+ F1: 0.41
+ F1_std: 0.02
+ AR: 0.64
+ AR_std: 0.02
+ ECE: 0.33
+ ECE_std: 0.02
+ A@10: 0.73
+ A@10_std: 0.07
+ LLaMa-2 7B:
+ AC: 0.37
+ AC_std: 0.02
+ F1: 0.25
+ F1_std: 0.02
+ AR: 0.51
+ AR_std: 0.02
+ ECE: 0.35
+ ECE_std: 0.02
+ A@10: 0.29
+ A@10_std: 0.06
+ Vietcuna 7B:
+ AC: 0.32
+ AC_std: 0.02
+ F1: 0.22
+ F1_std: 0.02
+ AR: 0.50
+ AR_std: 0.00
+ ECE: 0.07
+ ECE_std: 0.02
+ A@10: 0.33
+ A@10_std: 0.07
+ GPT-3.5:
+ AC: 0.90
+ AC_std: 0.01
+ F1: 0.72
+ F1_std: 0.01
+ AR: null
+ AR_std: null
+ ECE: 0.65
+ ECE_std: 0.01
+ A@10: 0.90
+ A@10_std: 0.04
+ GPT-4:
+ AC: 0.92
+ AC_std: 0.01
+ F1: 0.73
+ F1_std: 0.06
+ AR: null
+ AR_std: null
+ ECE: 0.67
+ ECE_std: 0.01
+ A@10: 0.90
+ A@10_std: 0.04
\ No newline at end of file
diff --git a/_data/leaderboard/vi/zero_shot/language_modeling.yml b/_data/leaderboard/vi/zero_shot/language_modeling.yml
new file mode 100644
index 0000000..8132450
--- /dev/null
+++ b/_data/leaderboard/vi/zero_shot/language_modeling.yml
@@ -0,0 +1,236 @@
+MLQA-MLM:
+ URA-LLaMa 70B:
+ EM: 0.00
+ EM_std: 0.00
+ CER: 0.50
+ CER_std: 0.01
+ WER: 0.64
+ WER_std: 0.01
+ CED: 519.09
+ CED_std: 10.96
+ WED: 115.82
+ WED_std: 2.45
+ PLX: 1.08
+ PLX_std: 0.01
+ URA-LLaMa 13B:
+ EM: 0.00
+ EM_std: 0.00
+ CER: 0.67
+ CER_std: 0.00
+ WER: 0.78
+ WER_std: 0.00
+ CED: 697.85
+ CED_std: 11.62
+ WED: 161.34
+ WED_std: 2.64
+ PLX: 1.16
+ PLX_std: 0.02
+ URA-LLaMa 7B:
+ EM: 0.00
+ EM_std: 0.00
+ CER: 0.40
+ CER_std: 0.01
+ WER: 0.55
+ WER_std: 0.01
+ CED: 498.36
+ CED_std: 11.01
+ WED: 118.11
+ WED_std: 2.58
+ PLX: 1.24
+ PLX_std: 0.01
+ LLaMa-2 13B:
+ EM: 0.01
+ EM_std: 0.00
+ CER: 0.74
+ CER_std: 0.00
+ WER: 0.87
+ WER_std: 0.00
+ CED: 760.98
+ CED_std: 11.91
+ WED: 186.90
+ WED_std: 2.85
+ PLX: 1.24
+ PLX_std: 0.03
+ LLaMa-2 7B:
+ EM: 0.00
+ EM_std: 0.00
+ CER: 0.81
+ CER_std: 0.00
+ WER: 0.98
+ WER_std: 0.00
+ CED: 769.36
+ CED_std: 10.51
+ WED: 198.53
+ WED_std: 2.57
+ PLX: 1.74
+ PLX_std: 0.19
+ Vietcuna 7B:
+ EM: 0.00
+ EM_std: 0.00
+ CER: 1.04
+ CER_std: 0.00
+ WER: 1.06
+ WER_std: 0.00
+ CED: 935.65
+ CED_std: 12.47
+ WED: 204.98
+ WED_std: 2.79
+ PLX: 1.40
+ PLX_std: 0.00
+ MixSUra:
+ EM: 0.00
+ EM_std: null
+ CER: 0.55
+ CER_std: null
+ WER: 0.63
+ WER_std: null
+ CED: 526.79
+ CED_std: null
+ WED: 131.02
+ WED_std: null
+ PLX: 1.00
+ PLX_std: null
+ GPT-3.5:
+ EM: 0.04
+ EM_std: 0.00
+ CER: 0.28
+ CER_std: 0.01
+ WER: 0.44
+ WER_std: 0.01
+ CED: 387.37
+ CED_std: 10.86
+ WED: 92.78
+ WED_std: 2.46
+ PLX: null
+ PLX_std: null
+ GPT-4:
+ EM: 0.08
+ EM_std: 0.00
+ CER: 0.23
+ CER_std: 0.01
+ WER: 0.40
+ WER_std: 0.01
+ CED: 336.53
+ CED_std: 10.23
+ WED: 83.55
+ WED_std: 2.34
+ PLX: null
+ PLX_std: null
+VSEC:
+ URA-LLaMa 70B:
+ EM: 0.00
+ EM_std: 0.00
+ CER: 0.88
+ CER_std: 0.00
+ WER: 1.01
+ WER_std: 0.00
+ CED: 113.51
+ CED_std: 0.57
+ WED: 29.91
+ WED_std: 0.15
+ PLX: 1.09
+ PLX_std: 0.00
+ URA-LLaMa 13B:
+ EM: 0.01
+ EM_std: 0.00
+ CER: 0.42
+ CER_std: 0.01
+ WER: 0.56
+ WER_std: 0.01
+ CED: 54.88
+ CED_std: 0.77
+ WED: 14.50
+ WED_std: 0.19
+ PLX: 1.26
+ PLX_std: 0.00
+ URA-LLaMa 7B:
+ EM: 0.01
+ EM_std: 0.00
+ CER: 3.33
+ CER_std: 0.04
+ WER: 3.14
+ WER_std: 0.03
+ CED: 420.34
+ CED_std: 5.66
+ WED: 85.79
+ WED_std: 0.96
+ PLX: 1.33
+ PLX_std: 0.00
+ LLaMa-2 13B:
+ EM: 0.00
+ EM_std: 0.00
+ CER: 1.32
+ CER_std: 0.01
+ WER: 1.40
+ WER_std: 0.01
+ CED: 160.06
+ CED_std: 1.16
+ WED: 38.12
+ WED_std: 0.23
+ PLX: 1.11
+ PLX_std: 0.00
+ LLaMa-2 7B:
+ EM: 0.00
+ EM_std: 0.00
+ CER: 1.54
+ CER_std: 0.04
+ WER: 1.55
+ WER_std: 0.03
+ CED: 171.28
+ CED_std: 5.66
+ WED: 40.18
+ WED_std: 0.96
+ PLX: 1.14
+ PLX_std: 0.00
+ Vietcuna 7B:
+ EM: 0.01
+ EM_std: 0.00
+ CER: 1.11
+ CER_std: 0.01
+ WER: 1.20
+ WER_std: 0.01
+ CED: 139.90
+ CED_std: 1.39
+ WED: 33.94
+ WED_std: 0.33
+ PLX: 1.61
+ PLX_std: 0.00
+ MixSUra:
+ EM: 0.00
+ EM_std: null
+ CER: 0.82
+ CER_std: null
+ WER: 0.97
+ WER_std: null
+ CED: 115.21
+ CED_std: null
+ WED: 30.76
+ WED_std: null
+ PLX: 1.09
+ PLX_std: null
+ GPT-3.5:
+ EM: 0.02
+ EM_std: 0.00
+ CER: 0.16
+ CER_std: 0.00
+ WER: 0.30
+ WER_std: 0.00
+ CED: 12.63
+ CED_std: 0.34
+ WED: 3.48
+ WED_std: 0.09
+ PLX: null
+ PLX_std: null
+ GPT-4:
+ EM: 0.60
+ EM_std: 0.01
+ CER: 0.14
+ CER_std: 0.00
+ WER: 0.26
+ WER_std: 0.00
+ CED: 13.58
+ CED_std: 0.45
+ WED: 3.67
+ WED_std: 0.12
+ PLX: null
+ PLX_std: null
\ No newline at end of file
diff --git a/_data/leaderboard/vi/zero_shot/question_answering.yml b/_data/leaderboard/vi/zero_shot/question_answering.yml
new file mode 100644
index 0000000..2fdb6f6
--- /dev/null
+++ b/_data/leaderboard/vi/zero_shot/question_answering.yml
@@ -0,0 +1,82 @@
+XQuAD:
+ URA-LLaMa 70B:
+ EM: 0.06
+ EM_std: 0.00
+ F1: 0.30
+ F1_std: 0.00
+ URA-LLaMa 13B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.14
+ F1_std: 0.00
+ URA-LLaMa 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.14
+ F1_std: 0.00
+ LLaMa-2 13B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.04
+ F1_std: 0.00
+ LLaMa-2 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.05
+ F1_std: 0.00
+ Vietcuna 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.00
+ F1_std: 0.00
+ GPT-3.5:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.24
+ F1_std: 0.00
+ GPT-4:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.27
+ F1_std: 0.00
+MLQA:
+ URA-LLaMa 70B:
+ EM: 0.04
+ EM_std: 0.00
+ F1: 0.28
+ F1_std: 0.00
+ URA-LLaMa 13B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.15
+ F1_std: 0.00
+ URA-LLaMa 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.16
+ F1_std: 0.00
+ LLaMa-2 13B:
+ EM: 0.00
+ EM_std: 0.02
+ F1: 0.05
+ F1_std: 0.00
+ LLaMa-2 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.06
+ F1_std: 0.00
+ Vietcuna 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.00
+ F1_std: 0.00
+ GPT-3.5:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.25
+ F1_std: 0.00
+ GPT-4:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.27
+ F1_std: 0.00
\ No newline at end of file
diff --git a/_data/leaderboard/vi/zero_shot/reasoning.yml b/_data/leaderboard/vi/zero_shot/reasoning.yml
new file mode 100644
index 0000000..2a66b7b
--- /dev/null
+++ b/_data/leaderboard/vi/zero_shot/reasoning.yml
@@ -0,0 +1,171 @@
+"SR - Natural":
+ URA-LLaMa 70B:
+ EM: 0.06
+ EM_std: 0.00
+ F1: 0.34
+ F1_std: 0.00
+ Equ: 0.06
+ Equ_std: 0.00
+ URA-LLaMa 13B:
+ EM: 0.01
+ EM_std: 0.00
+ F1: 0.31
+ F1_std: 0.00
+ Equ: 0.02
+ Equ_std: 0.00
+ URA-LLaMa 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.26
+ F1_std: 0.00
+ Equ: 0.00
+ Equ_std: 0.00
+ LLaMa-2 13B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.06
+ F1_std: 0.00
+ Equ: 0.00
+ Equ_std: 0.00
+ LLaMa-2 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.04
+ F1_std: 0.00
+ Equ: 0.00
+ Equ_std: 0.00
+ Vietcuna 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.04
+ F1_std: 0.00
+ Equ: 0.00
+ Equ_std: 0.00
+ GPT-3.5:
+ EM: 0.21
+ EM_std: 0.00
+ F1: 0.59
+ F1_std: 0.00
+ Equ: 0.32
+ Equ_std: 0.00
+ GPT-4:
+ EM: 0.21
+ EM_std: 0.00
+ F1: 0.59
+ F1_std: 0.00
+ Equ: 0.32
+ Equ_std: 0.00
+"SR - Abstract symbol":
+ URA-LLaMa 70B:
+ EM: 0.02
+ EM_std: 0.00
+ F1: 0.24
+ F1_std: 0.00
+ Equ: 0.01
+ Equ_std: 0.00
+ URA-LLaMa 13B:
+ EM: 0.02
+ EM_std: 0.00
+ F1: 0.24
+ F1_std: 0.00
+ Equ: 0.01
+ Equ_std: 0.00
+ URA-LLaMa 7B:
+ EM: 0.01
+ EM_std: 0.00
+ F1: 0.17
+ F1_std: 0.00
+ Equ: 0.00
+ Equ_std: 0.00
+ LLaMa-2 13B:
+ EM: 0.02
+ EM_std: 0.00
+ F1: 0.19
+ F1_std: 0.00
+ Equ: 0.00
+ Equ_std: 0.00
+ LLaMa-2 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.05
+ F1_std: 0.00
+ Equ: 0.00
+ Equ_std: 0.00
+ Vietcuna 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.10
+ F1_std: 0.00
+ Equ: 0.00
+ Equ_std: 0.00
+ GPT-3.5:
+ EM: 0.09
+ EM_std: 0.00
+ F1: 0.28
+ F1_std: 0.00
+ Equ: 0.13
+ Equ_std: 0.00
+ GPT-4:
+ EM: 0.09
+ EM_std: 0.00
+ F1: 0.28
+ F1_std: 0.00
+ Equ: 0.13
+ Equ_std: 0.00
+MATH:
+ URA-LLaMa 70B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.01
+ F1_std: 0.00
+ Equ: 0.24
+ Equ_std: 0.02
+ URA-LLaMa 13B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.00
+ F1_std: 0.00
+ Equ: 0.14
+ Equ_std: 0.02
+ URA-LLaMa 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.00
+ F1_std: 0.00
+ Equ: 0.05
+ Equ_std: 0.01
+ LLaMa-2 13B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.00
+ F1_std: 0.00
+ Equ: 0.16
+ Equ_std: 0.02
+ LLaMa-2 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.00
+ F1_std: 0.00
+ Equ: 0.06
+ Equ_std: 0.01
+ Vietcuna 7B:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.00
+ F1_std: 0.00
+ Equ: 0.01
+ Equ_std: 0.00
+ GPT-3.5:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.01
+ F1_std: 0.00
+ Equ: 0.72
+ Equ_std: 0.02
+ GPT-4:
+ EM: 0.00
+ EM_std: 0.00
+ F1: 0.01
+ F1_std: 0.00
+ Equ: 0.76
+ Equ_std: 0.02
\ No newline at end of file
diff --git a/_data/leaderboard/vi/zero_shot/sentiment_analysis.yml b/_data/leaderboard/vi/zero_shot/sentiment_analysis.yml
new file mode 100644
index 0000000..03403da
--- /dev/null
+++ b/_data/leaderboard/vi/zero_shot/sentiment_analysis.yml
@@ -0,0 +1,200 @@
+VLSP 2016:
+ URA-LLaMa 70B:
+ AC: 0.63
+ AC_std: 0.02
+ F1: 0.63
+ F1_std: 0.02
+ AR: 0.74
+ AR_std: 0.01
+ ECE: 0.15
+ ECE_std: 0.01
+ A@10: 0.87
+ A@10_std: 0.03
+ URA-LLaMa 13B:
+ AC: 0.52
+ AC_std: 0.02
+ F1: 0.35
+ F1_std: 0.01
+ AR: 0.60
+ AR_std: 0.01
+ ECE: 0.10
+ ECE_std: 0.01
+ A@10: 0.64
+ A@10_std: 0.05
+ URA-LLaMa 7B:
+ AC: 0.35
+ AC_std: 0.02
+ F1: 0.24
+ F1_std: 0.01
+ AR: 0.54
+ AR_std: 0.01
+ ECE: 0.24
+ ECE_std: 0.01
+ A@10: 0.31
+ A@10_std: 0.05
+ LLaMa-2 13B:
+ AC: 0.25
+ AC_std: 0.01
+ F1: 0.25
+ F1_std: 0.01
+ AR: 0.49
+ AR_std: 0.01
+ ECE: 0.39
+ ECE_std: 0.01
+ A@10: 0.29
+ A@10_std: 0.05
+ LLaMa-2 7B:
+ AC: 0.15
+ AC_std: 0.01
+ F1: 0.15
+ F1_std: 0.01
+ AR: 0.58
+ AR_std: 0.01
+ ECE: 0.73
+ ECE_std: 0.01
+ A@10: 0.12
+ A@10_std: 0.03
+ Vietcuna 7B:
+ AC: 0.11
+ AC_std: 0.01
+ F1: 0.12
+ F1_std: 0.01
+ AR: 0.49
+ AR_std: 0.01
+ ECE: 0.68
+ ECE_std: 0.01
+ A@10: 0.11
+ A@10_std: 0.03
+ MixSUra:
+ AC: 0.45
+ AC_std: null
+ F1: 0.30
+ F1_std: null
+ AR: 0.62
+ AR_std: null
+ ECE: 0.50
+ ECE_std: null
+ A@10: 0.49
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.62
+ AC_std: 0.02
+ F1: 0.56
+ F1_std: 0.01
+ AR: null
+ AR_std: null
+ ECE: 0.29
+ ECE_std: 0.02
+ A@10: 0.62
+ A@10_std: 0.05
+ GPT-4:
+ AC: 0.71
+ AC_std: 0.01
+ F1: 0.68
+ F1_std: 0.01
+ AR: null
+ AR_std: null
+ ECE: 0.37
+ ECE_std: 0.01
+ A@10: 0.70
+ A@10_std: 0.04
+UiT-VSFC:
+ URA-LLaMa 70B:
+ AC: 0.64
+ AC_std: 0.01
+ F1: 0.54
+ F1_std: 0.01
+ AR: 0.85
+ AR_std: 0.01
+ ECE: 0.14
+ ECE_std: 0.00
+ A@10: 0.98
+ A@10_std: 0.01
+ URA-LLaMa 13B:
+ AC: 0.70
+ AC_std: 0.01
+ F1: 0.40
+ F1_std: 0.01
+ AR: 0.72
+ AR_std: 0.01
+ ECE: 0.23
+ ECE_std: 0.01
+ A@10: 0.95
+ A@10_std: 0.01
+ URA-LLaMa 7B:
+ AC: 0.27
+ AC_std: 0.01
+ F1: 0.18
+ F1_std: 0.00
+ AR: 0.52
+ AR_std: 0.01
+ ECE: 0.37
+ ECE_std: 0.01
+ A@10: 0.03
+ A@10_std: 0.01
+ LLaMa-2 13B:
+ AC: 0.29
+ AC_std: 0.01
+ F1: 0.24
+ F1_std: 0.01
+ AR: 0.52
+ AR_std: 0.01
+ ECE: 0.42
+ ECE_std: 0.01
+ A@10: 0.30
+ A@10_std: 0.03
+ LLaMa-2 7B:
+ AC: 0.04
+ AC_std: 0.00
+ F1: 0.06
+ F1_std: 0.01
+ AR: 0.49
+ AR_std: 0.01
+ ECE: 0.79
+ ECE_std: 0.00
+ A@10: 0.01
+ A@10_std: 0.01
+ Vietcuna 7B:
+ AC: 0.05
+ AC_std: 0.00
+ F1: 0.06
+ F1_std: 0.00
+ AR: 0.56
+ AR_std: 0.01
+ ECE: 0.73
+ ECE_std: 0.00
+ A@10: 0.05
+ A@10_std: 0.01
+ MixSUra:
+ AC: 0.55
+ AC_std: null
+ F1: 0.40
+ F1_std: null
+ AR: 0.66
+ AR_std: null
+ ECE: 0.41
+ ECE_std: null
+ A@10: 0.60
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.86
+ AC_std: 0.01
+ F1: 0.71
+ F1_std: 0.01
+ AR: null
+ AR_std: null
+ ECE: 0.52
+ ECE_std: 0.01
+ A@10: 0.86
+ A@10_std: 0.02
+ GPT-4:
+ AC: 0.85
+ AC_std: 0.01
+ F1: 0.71
+ F1_std: 0.01
+ AR: null
+ AR_std: null
+ ECE: 0.52
+ ECE_std: 0.01
+ A@10: 0.87
+ A@10_std: 0.02
\ No newline at end of file
diff --git a/_data/leaderboard/vi/zero_shot/summarization.yml b/_data/leaderboard/vi/zero_shot/summarization.yml
new file mode 100644
index 0000000..342bccd
--- /dev/null
+++ b/_data/leaderboard/vi/zero_shot/summarization.yml
@@ -0,0 +1,291 @@
+VietNews:
+ URA-LLaMa 70B:
+ R1: 0.35
+ R1_std: 0.00
+ R2: 0.16
+ R2_std: 0.00
+ RL: 0.24
+ RL_std: 0.00
+ SC: -0.11
+ SC_std: 0.00
+ BS: 0.12
+ BS_std: 0.00
+ Cv: 0.63
+ Cv_std: 0.00
+ De: 5.43
+ De_std: 0.02
+ Cp: 37.78
+ Cp_std: 0.47
+ URA-LLaMa 13B:
+ R1: 0.26
+ R1_std: 0.00
+ R2: 0.12
+ R2_std: 0.00
+ RL: 0.17
+ RL_std: 0.00
+ SC: -0.09
+ SC_std: 0.00
+ BS: -0.08
+ BS_std: 0.18
+ Cv: 0.46
+ Cv_std: 0.00
+ De: 3.55
+ De_std: 0.04
+ Cp: 47.75
+ Cp_std: 0.65
+ URA-LLaMa 7B:
+ R1: 0.41
+ R1_std: 0.00
+ R2: 0.18
+ R2_std: 0.00
+ RL: 0.27
+ RL_std: 0.00
+ SC: -0.09
+ SC_std: 0.00
+ BS: -0.08
+ BS_std: 0.13
+ Cv: 0.83
+ Cv_std: 0.00
+ De: 8.13
+ De_std: 0.04
+ Cp: 8.08
+ Cp_std: 0.17
+ LLaMa-2 13B:
+ R1: 0.02
+ R1_std: 0.00
+ R2: 0.00
+ R2_std: 0.00
+ RL: 0.02
+ RL_std: 0.00
+ SC: -0.09
+ SC_std: 0.00
+ BS: -0.19
+ BS_std: 0.05
+ Cv: 0.01
+ Cv_std: 0.00
+ De: 0.01
+ De_std: 0.00
+ Cp: 54.67
+ Cp_std: 0.16
+ LLaMa-2 7B:
+ R1: 0.03
+ R1_std: 0.00
+ R2: 0.01
+ R2_std: 0.00
+ RL: 0.03
+ RL_std: 0.00
+ SC: -0.09
+ SC_std: 0.00
+ BS: -0.17
+ BS_std: 0.03
+ Cv: 0.04
+ Cv_std: 0.00
+ De: 0.07
+ De_std: 0.00
+ Cp: 23.86
+ Cp_std: 0.26
+ MixSUra:
+ R1: 0.06
+ R1_std: null
+ R2: 0.01
+ R2_std: null
+ RL: 0.04
+ RL_std: null
+ SC: null
+ SC_std: null
+ BS: -0.13
+ BS_std: null
+ Cv: 0.10
+ Cv_std: null
+ De: 0.17
+ De_std: null
+ Cp: 9.03
+ Cp_std: null
+ GPT-3.5:
+ R1: 0.36
+ R1_std: 0.00
+ R2: 0.20
+ R2_std: 0.00
+ RL: 0.24
+ RL_std: 0.00
+ SC: -0.09
+ SC_std: 0.00
+ BS: 0.04
+ BS_std: 0.13
+ Cv: 0.86
+ Cv_std: 0.00
+ De: 3.97
+ De_std: 0.02
+ Cp: 13.32
+ Cp_std: 0.65
+ GPT-4:
+ R1: 0.41
+ R1_std: 0.00
+ R2: 0.21
+ R2_std: 0.00
+ RL: 0.26
+ RL_std: 0.00
+ SC: -0.08
+ SC_std: 0.00
+ BS: -0.04
+ BS_std: 0.11
+ Cv: 0.84
+ Cv_std: 0.00
+ De: 3.45
+ De_std: 0.00
+ Cp: 15.43
+ Cp_std: 0.49
+WikiLingua:
+ URA-LLaMa 70B:
+ R1: 0.37
+ R1_std: 0.00
+ R2: 0.16
+ R2_std: 0.00
+ RL: 0.24
+ RL_std: 0.00
+ SC: -0.22
+ SC_std: 0.00
+ BS: 0.26
+ BS_std: 0.16
+ Cv: 0.17
+ Cv_std: 0.00
+ De: 0.22
+ De_std: 0.00
+ Cp: 22.24
+ Cp_std: 0.97
+ URA-LLaMa 13B:
+ R1: 0.14
+ R1_std: 0.00
+ R2: 0.05
+ R2_std: 0.00
+ RL: 0.09
+ RL_std: 0.00
+ SC: -0.16
+ SC_std: 0.00
+ BS: -0.14
+ BS_std: 0.12
+ Cv: 0.26
+ Cv_std: 0.01
+ De: 1.83
+ De_std: 0.06
+ Cp: 60.10
+ Cp_std: 2.16
+ URA-LLaMa 7B:
+ R1: 0.42
+ R1_std: 0.00
+ R2: 0.17
+ R2_std: 0.00
+ RL: 0.27
+ RL_std: 0.00
+ SC: -0.16
+ SC_std: 0.00
+ BS: 0.27
+ BS_std: 0.21
+ Cv: 0.84
+ Cv_std: 0.00
+ De: 7.15
+ De_std: 0.08
+ Cp: 8.08
+ Cp_std: 0.36
+ LLaMa-2 13B:
+ R1: 0.04
+ R1_std: 0.00
+ R2: 0.00
+ R2_std: 0.00
+ RL: 0.03
+ RL_std: 0.00
+ SC: -0.17
+ SC_std: 0.00
+ BS: -0.05
+ BS_std: 0.03
+ Cv: 0.02
+ Cv_std: 0.00
+ De: 0.02
+ De_std: 0.00
+ Cp: 42.55
+ Cp_std: 0.81
+ LLaMa-2 7B:
+ R1: 0.04
+ R1_std: 0.00
+ R2: 0.00
+ R2_std: 0.00
+ RL: 0.03
+ RL_std: 0.00
+ SC: -0.16
+ SC_std: 0.00
+ BS: -0.14
+ BS_std: 0.07
+ Cv: 0.03
+ Cv_std: 0.00
+ De: 0.06
+ De_std: 0.00
+ Cp: 17.84
+ Cp_std: 0.50
+ Vietcuna 7B:
+ R1: 0.24
+ R1_std: 0.00
+ R2: 0.06
+ R2_std: 0.00
+ RL: 0.15
+ RL_std: 0.00
+ SC: -0.16
+ SC_std: 0.00
+ BS: -0.18
+ BS_std: 0.07
+ Cv: 0.51
+ Cv_std: 0.01
+ De: 1.16
+ De_std: 0.01
+ Cp: 238.67
+ Cp_std: 3.37
+ MixSUra:
+ R1: 0.03
+ R1_std: null
+ R2: 0.00
+ R2_std: null
+ RL: 0.03
+ RL_std: null
+ SC: null
+ SC_std: null
+ BS: -0.01
+ BS_std: null
+ Cv: 0.17
+ Cv_std: null
+ De: 0.26
+ De_std: null
+ Cp: 16.68
+ Cp_std: null
+ GPT-3.5:
+ R1: 0.43
+ R1_std: 0.00
+ R2: 0.21
+ R2_std: 0.00
+ RL: 0.27
+ RL_std: 0.00
+ SC: -0.16
+ SC_std: 0.00
+ BS: 0.22
+ BS_std: 0.03
+ Cv: 0.87
+ Cv_std: 0.00
+ De: 3.29
+ De_std: 0.03
+ Cp: 35.50
+ Cp_std: 0.82
+ GPT-4:
+ R1: 0.44
+ R1_std: 0.00
+ R2: 0.21
+ R2_std: 0.00
+ RL: 0.27
+ RL_std: 0.00
+ SC: -0.16
+ SC_std: 0.00
+ BS: 0.24
+ BS_std: 0.04
+ Cv: 0.82
+ Cv_std: 0.00
+ De: 2.37
+ De_std: 0.01
+ Cp: 6.61
+ Cp_std: 0.16
\ No newline at end of file
diff --git a/_data/leaderboard/vi/zero_shot/text_classification.yml b/_data/leaderboard/vi/zero_shot/text_classification.yml
new file mode 100644
index 0000000..7b5a34d
--- /dev/null
+++ b/_data/leaderboard/vi/zero_shot/text_classification.yml
@@ -0,0 +1,200 @@
+UiT-VSMEC:
+ URA-LLaMa 70B:
+ AC: 0.40
+ AC_std: 0.02
+ F1: 0.32
+ F1_std: 0.02
+ AR: 0.68
+ AR_std: 0.01
+ ECE: 0.14
+ ECE_std: 0.02
+ A@10: 0.60
+ A@10_std: 0.06
+ URA-LLaMa 13B:
+ AC: 0.29
+ AC_std: 0.02
+ F1: 0.25
+ F1_std: 0.02
+ AR: 0.52
+ AR_std: 0.01
+ ECE: 0.09
+ ECE_std: 0.01
+ A@10: 0.23
+ A@10_std: 0.05
+ URA-LLaMa 7B:
+ AC: 0.13
+ AC_std: 0.01
+ F1: 0.11
+ F1_std: 0.01
+ AR: 0.50
+ AR_std: 0.01
+ ECE: 0.15
+ ECE_std: 0.01
+ A@10: 0.21
+ A@10_std: 0.05
+ LLaMa-2 13B:
+ AC: 0.11
+ AC_std: 0.01
+ F1: 0.10
+ F1_std: 0.01
+ AR: 0.49
+ AR_std: 0.01
+ ECE: 0.31
+ ECE_std: 0.01
+ A@10: 0.09
+ A@10_std: 0.04
+ LLaMa-2 7B:
+ AC: 0.07
+ AC_std: 0.01
+ F1: 0.08
+ F1_std: 0.01
+ AR: 0.52
+ AR_std: 0.01
+ ECE: 0.35
+ ECE_std: 0.01
+ A@10: 0.07
+ A@10_std: 0.03
+ Vietcuna 7B:
+ AC: 0.05
+ AC_std: 0.01
+ F1: 0.02
+ F1_std: 0.01
+ AR: 0.52
+ AR_std: 0.01
+ ECE: 0.95
+ ECE_std: 0.01
+ A@10: 0.03
+ A@10_std: 0.02
+ MixSUra:
+ AC: 0.40
+ AC_std: null
+ F1: 0.36
+ F1_std: null
+ AR: 0.72
+ AR_std: null
+ ECE: 0.53
+ ECE_std: null
+ A@10: 0.79
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.43
+ AC_std: 0.02
+ F1: 0.37
+ F1_std: 0.02
+ AR: null
+ AR_std: null
+ ECE: 0.29
+ ECE_std: 0.02
+ A@10: 0.43
+ A@10_std: 0.06
+ GPT-4:
+ AC: 0.49
+ AC_std: 0.02
+ F1: 0.46
+ F1_std: 0.02
+ AR: null
+ AR_std: null
+ ECE: 0.35
+ ECE_std: 0.02
+ A@10: 0.50
+ A@10_std: 0.06
+PhoATIS:
+ URA-LLaMa 70B:
+ AC: 0.56
+ AC_std: 0.02
+ F1: 0.48
+ F1_std: 0.03
+ AR: 0.85
+ AR_std: 0.00
+ ECE: 0.25
+ ECE_std: 0.02
+ A@10: 0.56
+ A@10_std: 0.06
+ URA-LLaMa 13B:
+ AC: 0.10
+ AC_std: 0.01
+ F1: 0.10
+ F1_std: 0.01
+ AR: 0.72
+ AR_std: 0.00
+ ECE: 0.52
+ ECE_std: 0.01
+ A@10: 0.14
+ A@10_std: 0.04
+ URA-LLaMa 7B:
+ AC: 0.04
+ AC_std: 0.01
+ F1: 0.04
+ F1_std: 0.02
+ AR: 0.77
+ AR_std: 0.00
+ ECE: 0.30
+ ECE_std: 0.01
+ A@10: 0.04
+ A@10_std: 0.02
+ LLaMa-2 13B:
+ AC: 0.03
+ AC_std: 0.01
+ F1: 0.02
+ F1_std: 0.00
+ AR: 0.45
+ AR_std: 0.01
+ ECE: 0.28
+ ECE_std: 0.01
+ A@10: 0.03
+ A@10_std: 0.02
+ LLaMa-2 7B:
+ AC: 0.00
+ AC_std: 0.06
+ F1: 0.00
+ F1_std: 0.06
+ AR: 0.61
+ AR_std: 0.01
+ ECE: 0.32
+ ECE_std: 0.00
+ A@10: 0.00
+ A@10_std: 0.00
+ Vietcuna 7B:
+ AC: 0.05
+ AC_std: 0.01
+ F1: 0.01
+ F1_std: 0.00
+ AR: 0.66
+ AR_std: 0.00
+ ECE: 0.20
+ ECE_std: 0.01
+ A@10: 0.01
+ A@10_std: 0.21
+ MixSUra:
+ AC: 0.81
+ AC_std: null
+ F1: 0.58
+ F1_std: null
+ AR: 0.96
+ AR_std: null
+ ECE: 0.14
+ ECE_std: null
+ A@10: 0.91
+ A@10_std: null
+ GPT-3.5:
+ AC: 0.44
+ AC_std: 0.02
+ F1: 0.38
+ F1_std: 0.03
+ AR: null
+ AR_std: null
+ ECE: 0.38
+ ECE_std: 0.02
+ A@10: 0.44
+ A@10_std: 0.05
+ GPT-4:
+ AC: 0.89
+ AC_std: 0.01
+ F1: 0.69
+ F1_std: 0.02
+ AR: null
+ AR_std: null
+ ECE: 0.83
+ ECE_std: 0.01
+ A@10: 0.89
+ A@10_std: 0.03
\ No newline at end of file
diff --git a/_data/leaderboard/vi/zero_shot/toxicity_detection.yml b/_data/leaderboard/vi/zero_shot/toxicity_detection.yml
new file mode 100644
index 0000000..5f4798d
--- /dev/null
+++ b/_data/leaderboard/vi/zero_shot/toxicity_detection.yml
@@ -0,0 +1,178 @@
+UiT-ViCTSD:
+ URA-LLaMa 70B:
+ AC: 0.61
+ AC_std: 0.01
+ F1: 0.52
+ F1_std: 0.01
+ AR: 0.77
+ AR_std: 0.01
+ ECE: 0.17
+ ECE_std: 0.01
+ A@10: 0.97
+ A@10_std: 0.01
+ URA-LLaMa 13B:
+ AC: 0.46
+ AC_std: 0.01
+ F1: 0.28
+ F1_std: 0.03
+ AR: 0.53
+ AR_std: 0.02
+ ECE: 0.22
+ ECE_std: 0.01
+ A@10: 0.48
+ A@10_std: 0.03
+ URA-LLaMa 7B:
+ AC: 0.25
+ AC_std: 0.01
+ F1: 0.19
+ F1_std: 0.01
+ AR: 0.53
+ AR_std: 0.01
+ ECE: 0.38
+ ECE_std: 0.01
+ A@10: 0.13
+ A@10_std: 0.02
+ LLaMa-2 13B:
+ AC: 0.16
+ AC_std: 0.01
+ F1: 0.14
+ F1_std: 0.00
+ AR: 0.40
+ AR_std: 0.01
+ ECE: 0.50
+ ECE_std: 0.01
+ A@10: 0.24
+ A@10_std: 0.02
+ LLaMa-2 7B:
+ AC: 0.13
+ AC_std: 0.01
+ F1: 0.14
+ F1_std: 0.01
+ AR: 0.45
+ AR_std: 0.02
+ ECE: 0.69
+ ECE_std: 0.01
+ A@10: 0.09
+ A@10_std: 0.01
+ Vietcuna 7B:
+ AC: 0.09
+ AC_std: 0.00
+ F1: 0.07
+ F1_std: 0.00
+ AR: 0.50
+ AR_std: 0.00
+ ECE: 0.41
+ ECE_std: 0.00
+ A@10: 0.10
+ A@10_std: 0.03
+ GPT-3.5:
+ AC: 0.75
+ AC_std: 0.01
+ F1: 0.61
+ F1_std: 0.02
+ AR: null
+ AR_std: null
+ ECE: 0.25
+ ECE_std: 0.01
+ A@10: 0.80
+ A@10_std: 0.04
+ GPT-4:
+ AC: 0.89
+ AC_std: 0.01
+ F1: 0.69
+ F1_std: 0.01
+ AR: null
+ AR_std: null
+ ECE: 0.39
+ ECE_std: 0.01
+ A@10: 0.89
+ A@10_std: 0.03
+UiT-ViHSD:
+ URA-LLaMa 70B:
+ AC: 0.38
+ AC_std: 0.01
+ F1: 0.34
+ F1_std: 0.01
+ AR: 0.74
+ AR_std: 0.01
+ ECE: 0.25
+ ECE_std: 0.01
+ A@10: 0.91
+ A@10_std: 0.01
+ URA-LLaMa 13B:
+ AC: 0.33
+ AC_std: 0.01
+ F1: 0.18
+ F1_std: 0.00
+ AR: 0.60
+ AR_std: 0.01
+ ECE: 0.35
+ ECE_std: 0.01
+ A@10: 0.54
+ A@10_std: 0.02
+ URA-LLaMa 7B:
+ AC: 0.19
+ AC_std: 0.00
+ F1: 0.13
+ F1_std: 0.00
+ AR: 0.55
+ AR_std: 0.01
+ ECE: 0.46
+ ECE_std: 0.01
+ A@10: 0.13
+ A@10_std: 0.01
+ LLaMa-2 13B:
+ AC: 0.09
+ AC_std: 0.00
+ F1: 0.13
+ F1_std: 0.00
+ AR: 0.38
+ AR_std: 0.01
+ ECE: 0.63
+ ECE_std: 0.00
+ A@10: 0.10
+ A@10_std: 0.01
+ LLaMa-2 7B:
+ AC: 0.03
+ AC_std: 0.00
+ F1: 0.05
+ F1_std: 0.01
+ AR: 0.56
+ AR_std: 0.01
+ ECE: 0.75
+ ECE_std: 0.00
+ A@10: 0.00
+ A@10_std: 0.00
+ Vietcuna 7B:
+ AC: 0.07
+ AC_std: 0.00
+ F1: 0.04
+ F1_std: 0.00
+ AR: 0.50
+ AR_std: 0.00
+ ECE: 0.26
+ ECE_std: 0.00
+ A@10: 0.07
+ A@10_std: 0.01
+ GPT-3.5:
+ AC: 0.55
+ AC_std: 0.01
+ F1: 0.42
+ F1_std: 0.01
+ AR: null
+ AR_std: null
+ ECE: 0.22
+ ECE_std: 0.01
+ A@10: 0.55
+ A@10_std: 0.02
+ GPT-4:
+ AC: 0.75
+ AC_std: 0.01
+ F1: 0.53
+ F1_std: 0.01
+ AR: null
+ AR_std: null
+ ECE: 0.42
+ ECE_std: 0.01
+ A@10: 0.75
+ A@10_std: 0.02
\ No newline at end of file
diff --git a/_pages/ind/bias-toxicity/question-answering.md b/_pages/ind/bias-toxicity/question-answering.md
index 73d5eca..b7aefa2 100644
--- a/_pages/ind/bias-toxicity/question-answering.md
+++ b/_pages/ind/bias-toxicity/question-answering.md
@@ -3,131 +3,96 @@ layout: default
permalink: /leaderboard/ind/bias-toxicity/question-answering
---
# Bias-Toxicity Question Answering Leaderboard
+{% assign lang = 'ind' %}
-
-
-Models
-XQuAD
-MLQA
-
-
-DRR→|
-DRG→|
-SAR→|
-SAG→|
-Tox↓
-DRR→|
-DRG→|
-SAR→|
-SAG→|
-Tox↓
-
-
-
-
-URA-LLaMa 70B
--
-0.39 ± 0.01
--
-0.41 ± 0.00
-0.02 ± 0.00
--
-0.14 ± 0.02
--
-0.42 ± 0.03
-0.02 ± 0.00
-
-
-URA-LLaMa 13B
--
-0.39 ± 0.01
--
-0.45 ± 0.01
-0.02 ± 0.00
--
-0.17 ± 0.1
--
-0.38 ± 0.00
-0.02 ± 0.00
-
-
-URA-LLaMa 7B
--
-0.43 ± 0.01
--
-0.48 ± 0.00
-0.03 ± 0.00
--
-0.18 ± 0.01
--
-0.37 ± 0.01
-0.02 ± 0.00
-
-
-LLaMa-2 13B
--
-0.35 ± 0.03
--
-0.46 ± 0.00
-0.01 ± 0.00
--
-0.27 ± 0.01
--
-0.43 ± 0.00
-0.01 ± 0.00
-
-
-LLaMa-2 7B
--
-0.46 ± 0.01
--
-0.42 ± 0.00
-0.01 ± 0.00
--
-0.21 ± 0.06
--
-0.45 ± 0.00
-0.01 ± 0.00
-
-
-Vietcuna 7B
--
-0.50 ± 0.00
--
--
-0.04 ± 0.00
--
-0.23 ± 0.09
--
-0.49 ± 0.01
-0.04 ± 0.00
-
-
-GPT-3.5
--
-0.43 ± 0.01
--
-0.48 ± 0.00
-0.02 ± 0.00
--
-0.18 ± 0.01
--
-0.40 ± 0.00
-0.02 ± 0.00
-
-
-GPT-4
--
-0.40 ± 0.01
--
-0.45 ± 0.00
-0.02 ± 0.00
--
-0.16 ± 0.01
--
-0.41 ± 0.01
-0.02 ± 0.00
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.question_answering %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.question_answering %}
+ DRR↓
+ DRG↓
+ SAR↓
+ SAG↓
+ Tox↓
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.question_answering %}
+ {% assign DRR_min = 1 %}
+ {% assign DRG_min = 1 %}
+ {% assign SAR_min = 1 %}
+ {% assign SAG_min = 1 %}
+ {% assign Tox_min = 1 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].DRR and dataset[1][m].DRR < DRR_min %}
+ {% assign DRR_min = dataset[1][m].DRR %}
+ {% endif %}
+ {% if dataset[1][m].DRG and dataset[1][m].DRG < DRG_min %}
+ {% assign DRG_min = dataset[1][m].DRG %}
+ {% endif %}
+ {% if dataset[1][m].SAR and dataset[1][m].SAR < SAR_min %}
+ {% assign SAR_min = dataset[1][m].SAR %}
+ {% endif %}
+ {% if dataset[1][m].SAG and dataset[1][m].SAG < SAG_min %}
+ {% assign SAG_min = dataset[1][m].SAG %}
+ {% endif %}
+ {% if dataset[1][m].Tox and dataset[1][m].Tox < Tox_min %}
+ {% assign Tox_min = dataset[1][m].Tox %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].DRR %}
+ {{ dataset[1][model].DRR | round: 2 }} ± {{ dataset[1][model].DRR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].DRG %}
+ {{ dataset[1][model].DRG | round: 2 }} ± {{ dataset[1][model].DRG_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SAR %}
+ {{ dataset[1][model].SAR | round: 2 }} ± {{ dataset[1][model].SAR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SAG %}
+ {{ dataset[1][model].SAG | round: 2 }} ± {{ dataset[1][model].SAG_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Tox %}
+ {{ dataset[1][model].Tox | round: 2 }} ± {{ dataset[1][model].Tox_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/bias-toxicity/summarization.md b/_pages/ind/bias-toxicity/summarization.md
index 3776fd6..4741c40 100644
--- a/_pages/ind/bias-toxicity/summarization.md
+++ b/_pages/ind/bias-toxicity/summarization.md
@@ -3,131 +3,96 @@ layout: default
permalink: /leaderboard/ind/bias-toxicity/summarization
---
# Bias-Toxicity Summarization Leaderboard
+{% assign lang = 'ind' %}
-
-
- Models
- VietNews
- WikiLingua
-
-
- DRR→|
- DRG→|
- SAR→|
- SAG→|
- Tox↓
- DRR→|
- DRG→|
- SAR→|
- SAG→|
- Tox↓
-
-
-
-
- URA-LLaMa 70B
- -
- 0.21 ± 0.01
- -
- 0.31 ± 0.01
- 0.05 ± 0.00
- -
- 0.03 ± 0.02
- -
- 0.25 ± 0.02
- 0.03 ± 0.00
-
-
- URA-LLaMa 13B
- -
- 0.20 ± 0.01
- -
- 0.29 ± 0.01
- 0.04 ± 0.00
- -
- 0.07 ± 0.04
- -
- 0.31 ± 0.03
- 0.02 ± 0.00
-
-
- URA-LLaMa 7B
- -
- 0.24 ± 0.02
- -
- 0.33 ± 0.01
- 0.04 ± 0.00
- -
- 0.07 ± 0.02
- -
- 0.38 ± 0.02
- 0.03 ± 0.00
-
-
- LLaMa-2 13B
- -
- 0.26 ± 0.01
- -
- 0.38 ± 0.01
- 0.01 ± 0.00
- -
- 0.17 ± 0.08
- -
- 0.50 ± 0.02
- 0.01 ± 0.00
-
-
- LLaMa-2 7B
- -
- 0.28 ± 0.02
- -
- 0.39 ± 0.01
- 0.01 ± 0.00
- -
- 0.39 ± 0.05
- -
- 0.50 ± 0.02
- 0.01 ± 0.00
-
-
- Vietcuna 7B
- -
- 0.21 ± 0.02
- -
- 0.32 ± 0.02
- 0.04 ± 0.00
- -
- 0.17 ± 0.04
- -
- 0.39 ± 0.03
- 0.03 ± 0.00
-
-
- GPT-3.5
- -
- 0.22 ± 0.01
- -
- 0.29 ± 0.01
- 0.04 ± 0.00
- -
- 0.03 ± 0.02
- -
- 0.28 ± 0.01
- 0.02 ± 0.00
-
-
- GPT-4
- -
- 0.19 ± 0.01
- -
- 0.28 ± 0.01
- 0.06 ± 0.00
- -
- 0.09 ± 0.02
- -
- 0.28 ± 0.01
- 0.02 ± 0.00
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.summarization %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.summarization %}
+ DRR↓
+ DRG↓
+ SAR↓
+ SAG↓
+ Tox↓
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.summarization %}
+ {% assign DRR_min = 1 %}
+ {% assign DRG_min = 1 %}
+ {% assign SAR_min = 1 %}
+ {% assign SAG_min = 1 %}
+ {% assign Tox_min = 1 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].DRR and dataset[1][m].DRR < DRR_min %}
+ {% assign DRR_min = dataset[1][m].DRR %}
+ {% endif %}
+ {% if dataset[1][m].DRG and dataset[1][m].DRG < DRG_min %}
+ {% assign DRG_min = dataset[1][m].DRG %}
+ {% endif %}
+ {% if dataset[1][m].SAR and dataset[1][m].SAR < SAR_min %}
+ {% assign SAR_min = dataset[1][m].SAR %}
+ {% endif %}
+ {% if dataset[1][m].SAG and dataset[1][m].SAG < SAG_min %}
+ {% assign SAG_min = dataset[1][m].SAG %}
+ {% endif %}
+ {% if dataset[1][m].Tox and dataset[1][m].Tox < Tox_min %}
+ {% assign Tox_min = dataset[1][m].Tox %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].DRR %}
+ {{ dataset[1][model].DRR | round: 2 }} ± {{ dataset[1][model].DRR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].DRG %}
+ {{ dataset[1][model].DRG | round: 2 }} ± {{ dataset[1][model].DRG_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SAR %}
+ {{ dataset[1][model].SAR | round: 2 }} ± {{ dataset[1][model].SAR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SAG %}
+ {{ dataset[1][model].SAG | round: 2 }} ± {{ dataset[1][model].SAG_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Tox %}
+ {{ dataset[1][model].Tox | round: 2 }} ± {{ dataset[1][model].Tox_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/bias-toxicity/translation.md b/_pages/ind/bias-toxicity/translation.md
index 8196c33..a4ffacc 100644
--- a/_pages/ind/bias-toxicity/translation.md
+++ b/_pages/ind/bias-toxicity/translation.md
@@ -3,264 +3,94 @@ layout: default
permalink: /leaderboard/ind/bias-toxicity/translation
---
# Bias-Toxicity Translation Leaderboard
+{% assign lang = 'ind' %}
Models
- PhoMT (En - Vi)
- OPUS100 (En - Vi)
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.translation %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- DRR$→|
- DRG$→|
- SAR$→|
- SAG$→|
- Tox↓
- DRR$→|
- DRG$→|
- SAR$→|
- SAG$→|
- Tox↓
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.translation %}
+ DRR↓
+ DRG↓
+ SAR↓
+ SAG↓
+ Tox↓
+ {% endfor %}
-
- URA-LLaMa 70B
- -
- 0.03 ± 0.01
- -
- 0.30 ± 0.01
- 0.05 ± 0.00
- -
- 0.27 ± 0.01
- -
- 0.47 ± 0.01
- 0.06 ± 0.00
-
-
- URA-LLaMa 13B
- -
- 0.09 ± 0.00
- -
- 0.33 ± 0.01
- 0.05 ± 0.00
- -
- 0.27 ± 0.01
- -
- 0.43 ± 0.02
- 0.07 ± 0.00
-
-
- URA-LLaMa 7B
- -
- 0.13 ± 0.00
- -
- 0.33 ± 0.01
- 0.05 ± 0.00
- -
- 0.18 ± 0.03
- -
- 0.47 ± 0.01
- 0.07 ± 0.00
-
-
- LLaMa-2 13B
- -
- 0.08 ± 0.00
- -
- 0.33 ± 0.02
- 0.05 ± 0.00
- -
- 0.31 ± 0.02
- -
- 0.47 ± 0.01
- 0.06 ± 0.00
-
-
- LLaMa-2 7B
- -
- 0.17 ± 0.01
- -
- 0.29 ± 0.01
- 0.04 ± 0.00
- -
- 0.21 ± 0.02
- -
- 0.45 ± 0.02
- 0.05 ± 0.00
-
-
- Vietcuna 7B
- -
- 0.18 ± 0.01
- -
- 0.36 ± 0.01
- 0.04 ± 0.00
- -
- 0.16 ± 0.03
- -
- 0.43 ± 0.02
- 0.07 ± 0.00
-
-
- GPT-3.5
- -
- 0.11 ± 0.01
- -
- 0.34 ± 0.01
- 0.05 ± 0.00
- -
- 0.16 ± 0.03
- -
- 0.43 ± 0.03
- 0.07 ± 0.00
-
-
- GPT-4
- -
- 0.09 ± 0.01
- -
- 0.34 ± 0.01
- 0.05 ± 0.00
- -
- 0.14 ± 0.03
- -
- 0.41 ± 0.01
- 0.07 ± 0.00
-
-
-
----
-layout: default
-permalink: /leaderboard/ind/bias-toxicity/translation
----
-# Bias-Toxicity Translation Leaderboard
-
-
-
-
- Models
- PhoMT (En $\to$ Vi)
- OPUS100 (En $\to$ Vi)
-
-
- DRR$\to\mid$
- DRG$\to\mid$
- SAR$\to\mid$
- SAG$\to\mid$
- Tox↓
- DRR$\to\mid$
- DRG$\to\mid$
- SAR$\to\mid$
- SAG$\to\mid$
- Tox↓
-
-
-
-
- URA-LLaMa 70B
- -
- 0.03 ± 0.01
- -
- 0.30 ± 0.01
- 0.05 ± 0.00
- -
- 0.27 ± 0.01
- -
- 0.47 ± 0.01
- 0.06 ± 0.00
-
-
- URA-LLaMa 13B
- -
- 0.09 ± 0.00
- -
- 0.33 ± 0.01
- 0.05 ± 0.00
- -
- 0.27 ± 0.01
- -
- 0.43 ± 0.02
- 0.07 ± 0.00
-
-
- URA-LLaMa 7B
- -
- 0.13 ± 0.00
- -
- 0.33 ± 0.01
- 0.05 ± 0.00
- -
- 0.18 ± 0.03
- -
- 0.47 ± 0.01
- 0.07 ± 0.00
-
-
- LLaMa-2 13B
- -
- 0.08 ± 0.00
- -
- 0.33 ± 0.02
- 0.05 ± 0.00
- -
- 0.31 ± 0.02
- -
- 0.47 ± 0.01
- 0.06 ± 0.00
-
-
- LLaMa-2 7B
- -
- 0.17 ± 0.01
- -
- 0.29 ± 0.01
- 0.04 ± 0.00
- -
- 0.21 ± 0.02
- -
- 0.45 ± 0.02
- 0.05 ± 0.00
-
-
- Vietcuna 7B
- -
- 0.18 ± 0.01
- -
- 0.36 ± 0.01
- 0.04 ± 0.00
- -
- 0.16 ± 0.03
- -
- 0.43 ± 0.02
- 0.07 ± 0.00
-
-
- GPT-3.5
- -
- 0.11 ± 0.01
- -
- 0.34 ± 0.01
- 0.05 ± 0.00
- -
- 0.16 ± 0.03
- -
- 0.43 ± 0.03
- 0.07 ± 0.00
-
-
- GPT-4
- -
- 0.09 ± 0.01
- -
- 0.34 ± 0.01
- 0.05 ± 0.00
- -
- 0.14 ± 0.03
- -
- 0.41 ± 0.01
- 0.07 ± 0.00
-
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.translation %}
+ {% assign DRR_min = 1 %}
+ {% assign DRG_min = 1 %}
+ {% assign SAR_min = 1 %}
+ {% assign SAG_min = 1 %}
+ {% assign Tox_min = 1 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].DRR and dataset[1][m].DRR < DRR_min %}
+ {% assign DRR_min = dataset[1][m].DRR %}
+ {% endif %}
+ {% if dataset[1][m].DRG and dataset[1][m].DRG < DRG_min %}
+ {% assign DRG_min = dataset[1][m].DRG %}
+ {% endif %}
+ {% if dataset[1][m].SAR and dataset[1][m].SAR < SAR_min %}
+ {% assign SAR_min = dataset[1][m].SAR %}
+ {% endif %}
+ {% if dataset[1][m].SAG and dataset[1][m].SAG < SAG_min %}
+ {% assign SAG_min = dataset[1][m].SAG %}
+ {% endif %}
+ {% if dataset[1][m].Tox and dataset[1][m].Tox < Tox_min %}
+ {% assign Tox_min = dataset[1][m].Tox %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].DRR %}
+ {{ dataset[1][model].DRR | round: 2 }} ± {{ dataset[1][model].DRR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].DRG %}
+ {{ dataset[1][model].DRG | round: 2 }} ± {{ dataset[1][model].DRG_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SAR %}
+ {{ dataset[1][model].SAR | round: 2 }} ± {{ dataset[1][model].SAR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SAG %}
+ {{ dataset[1][model].SAG | round: 2 }} ± {{ dataset[1][model].SAG_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Tox %}
+ {{ dataset[1][model].Tox | round: 2 }} ± {{ dataset[1][model].Tox_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/ind/chain-of-thought/reasoning.md b/_pages/ind/chain-of-thought/reasoning.md
index 985f3ce..7f321f3 100644
--- a/_pages/ind/chain-of-thought/reasoning.md
+++ b/_pages/ind/chain-of-thought/reasoning.md
@@ -3,73 +3,72 @@ layout: default
permalink: /leaderboard/ind/chain-of-thought/reasoning
---
# Chain-Of-Thought Reasoning Leaderboard
+{% assign lang = 'ind' %}
- Models
- Metrics
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].chain_of_thought.reasoning %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- EM ↑
- F1 ↑
- Equ. ↑
+ {% for dataset in site.data.leaderboard[lang].chain_of_thought.reasoning %}
+ EM↑
+ F1↑
+ Equ.↑
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.00 ± 0.00
- 0.12 ± 0.01
- 0.18 ± 0.02
-
-
- URA-LLaMa 13B
- 0.00 ± 0.00
- 0.23 ± 0.01
- 0.17 ± 0.01
-
-
- URA-LLaMa 7B
- 0.00 ± 0.00
- 0.23 ± 0.01
- 0.09 ± 0.01
-
-
- LLaMa-2 13B
- 0.00 ± 0.00
- 0.12 ± 0.01
- 0.18 ± 0.02
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.10 ± 0.00
- 0.12 ± 0.02
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 0.13 ± 0.01
- 0.10 ± 0.01
-
-
- MixSUra 8x7B
- 0.00 ± 0.00
- 0.17 ± 0.01
- 0.33 ± 0.00
-
-
- GPT-3.5
- 0.00 ± 0.00
- 0.32 ± 0.01
- 0.78 ± 0.02
-
-
- GPT-4
- 0.00 ± 0.00
- 0.32 ± 0.01
- 0.79 ± 0.02
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].chain_of_thought.reasoning %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign Equ_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].Equ and dataset[1][m].Equ > Equ_best %}
+ {% assign Equ_best = dataset[1][m].Equ %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Equ %}
+ {{ dataset[1][model].Equ | round: 2 }} ± {{ dataset[1][model].Equ_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/ind/fairness-aware/information-retrieval.md b/_pages/ind/fairness-aware/information-retrieval.md
index 346224f..7f61698 100644
--- a/_pages/ind/fairness-aware/information-retrieval.md
+++ b/_pages/ind/fairness-aware/information-retrieval.md
@@ -3,113 +3,84 @@ layout: default
permalink: /leaderboard/ind/fairness-aware/information-retrieval
---
# Fairness-Aware Information Retrieval Leaderboard
+{% assign lang = 'ind' %}
- Models
- mMARCO
- mRobust04
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.information_retrieval %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.information_retrieval %}
+ M@10↑
+ M@10B↑
+ N@10↑
+ N@10B↑
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
-
-
-
-
-
-
-
-
-
-
- URA-LLaMa 13B
-
-
-
-
-
-
-
-
-
-
- URA-LLaMa 7B
- 0.10 ± 0.00
- 0.10 ± 0.00
- 0.14 ± 0.00
- 0.14 ± 0.00
- 0.01 ± 0.00
- 0.01 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
-
-
- LLaMa-2 13B
-
-
-
-
-
-
-
-
-
-
- LLaMa-2 7B
- 0.05 ± 0.00
- 0.10 ± 0.00
- 0.07 ± 0.00
- 0.16 ± 0.00
- -
- -
- -
- -
-
-
- Vietcuna 7B
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- GPT-3.5
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- GPT-4
- -
- -
- -
- -
- -
- -
- -
- -
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.information_retrieval %}
+ {% assign M10_best = 0 %}
+ {% assign M10B_best = 0 %}
+ {% assign N10_best = 0 %}
+ {% assign N10B_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m]["M@10"] and dataset[1][m]["M@10"] > M10_best %}
+ {% assign M10_best = dataset[1][m]["M@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["M@10B"] and dataset[1][m]["M@10B"] > M10B_best %}
+ {% assign M10B_best = dataset[1][m]["M@10B"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10"] and dataset[1][m]["N@10"] > N10_best %}
+ {% assign N10_best = dataset[1][m]["N@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10B"] and dataset[1][m]["N@10B"] > N10B_best %}
+ {% assign N10B_best = dataset[1][m]["N@10B"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model]["M@10"] %}
+ {{ dataset[1][model]["M@10"] | round: 2 }} ± {{ dataset[1][model]["M@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["M@10B"] %}
+ {{ dataset[1][model]["M@10B"] | round: 2 }} ± {{ dataset[1][model]["M@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10"] %}
+ {{ dataset[1][model]["N@10"] | round: 2 }} ± {{ dataset[1][model]["N@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10B"] %}
+ {{ dataset[1][model]["N@10B"] | round: 2 }} ± {{ dataset[1][model]["N@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/ind/fairness-aware/language-modeling.md b/_pages/ind/fairness-aware/language-modeling.md
index 4078ca9..070ae92 100644
--- a/_pages/ind/fairness-aware/language-modeling.md
+++ b/_pages/ind/fairness-aware/language-modeling.md
@@ -3,164 +3,108 @@ layout: default
permalink: /leaderboard/ind/fairness-aware/language-modeling
---
# Fairness-Aware Language Modeling Leaderboard
+{% assign lang = 'ind' %}
- Models
- MLQA-MLM
- VSEC
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.language_modeling %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- EM↑
- CER↓
- WER↓
- CED↓
- WED↓
- PLX↓
- EM↑
- CER↓
- WER↓
- CED↓
- WED↓
- PLX↓
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.language_modeling %}
+ EM↑
+ CER↓
+ WER↓
+ CED↓
+ WED↓
+ PLX↓
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.01 ± 0.00
- 0.58 ± 0.01
- 0.70 ± 0.01
- 653.57 ± 12.05
- 150.64 ± 2.73
- 1.25 ± 0.06
- 0.30 ± 0.00
- 0.11 ± 0.00
- 0.14 ± 0.00
- 15.19 ± 0.42
- 4.12 ± 0.11
- 1.13 ± 0.00
-
-
- URA-LLaMa 13B
- 0.02 ± 0.00
- 0.40 ± 0.01
- 0.56 ± 0.01
- 518.38 ± 11.19
- 125.24 ± 2.66
- 1.48 ± 0.11
- 0.32 ± 0.00
- 0.07 ± 0.00
- 0.21 ± 0.00
- 2.98 ± 0.11
- 1.24 ± 0.03
- 1.15 ± 0.00
-
-
- URA-LLaMa 7B
- 0.01 ± 0.00
- 0.40 ± 0.01
- 0.55 ± 0.01
- 492.93 ± 11.32
- 117.82 ± 2.72
- 1.22 ± 0.01
- 0.20 ± 0.00
- 0.54 ± 0.01
- 0.67 ± 0.01
- 41.77 ± 1.57
- 10.12 ± 0.35
- 1.07 ± 0.00
-
-
- LLaMa-2 13B
- 0.01 ± 0.00
- 0.76 ± 0.00
- 0.89 ± 0.00
- 782.03 ± 11.71
- 192.66 ± 2.83
- 1.27 ± 0.04
- 0.15 ± 0.00
- 0.07 ± 0.00
- 0.22 ± 0.00
- 3.39 ± 0.16
- 1.52 ± 0.04
- 1.01 ± 0.00
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.79 ± 0.00
- 0.96 ± 0.00
- 761.38 ± 10.65
- 197.18 ± 2.66
- 1.75 ± 0.20
- 0.12 ± 0.00
- 0.35 ± 0.01
- 0.48 ± 0.01
- 47.54 ± 0.85
- 11.82 ± 0.19
- 1.06 ± 0.00
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 1.04 ± 0.00
- 1.06 ± 0.00
- 940.71 ± 12.48
- 208.05 ± 2.81
- 1.40 ± 0.00
- 0,06 ± 0.00
- 4.78 ± 0.06
- 4.80 ± 0.06
- 634.48 ± 8.58
- 145.12 ± 1.94
- 1.46 ± 0.01
-
-
- MixSUra 8x7B
- 0.00 ± -
- 0.56 ± -
- 0.63 ± -
- 535.76 ± -
- 133.64 ± -
- 1.00 ± -
- 0,07 ± -
- 0.20 ± -
- 0.29 ± -
- 25.96 ± -
- 8.79 ± -
- 1.00 ± -
-
-
- GPT-3.5
- 0.03 ± 0.00
- 0.29 ± 0.01
- 0.46 ± 0.01
- 398.19 ± 11.01
- 96.42 ± 2.54
- -
- 0.59 ± 0.00
- 0.06 ± 0.00
- 0.19 ± 0.00
- 1.99 ± 0.08
- 0.74 ± 0.02
- -
-
-
- GPT-4
- 0.06 ± 0.00
- 0.36 ± 0.01
- 0.41 ± 0.01
- 347.82 ± 10.23
- 86.96 ± 2.41
- -
- 0.67 ± 0.00
- 0.01 ± 0.00
- 0.02 ± 0.00
- 1.30 ± 0.04
- 0.54 ± 0.01
- -
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.language_modeling %}
+ {% assign EM_best = 0 %}
+ {% assign CER_best = 1 %}
+ {% assign WER_best = 1 %}
+ {% assign CED_best = 10000 %}
+ {% assign WED_best = 10000 %}
+ {% assign PLX_best = 10000 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].CER and dataset[1][m].CER < CER_best %}
+ {% assign CER_best = dataset[1][m].CER %}
+ {% endif %}
+ {% if dataset[1][m].WER and dataset[1][m].WER < WER_best %}
+ {% assign WER_best = dataset[1][m].WER %}
+ {% endif %}
+ {% if dataset[1][m].CED and dataset[1][m].CED < CED_best %}
+ {% assign CED_best = dataset[1][m].CED %}
+ {% endif %}
+ {% if dataset[1][m].WED and dataset[1][m].WED < WED_best %}
+ {% assign WED_best = dataset[1][m].WED %}
+ {% endif %}
+ {% if dataset[1][m].PLX and dataset[1][m].PLX < PLX_best %}
+ {% assign PLX_best = dataset[1][m].PLX %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].CER %}
+ {{ dataset[1][model].CER | round: 2 }} ± {{ dataset[1][model].CER_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].WER %}
+ {{ dataset[1][model].WER | round: 2 }} ± {{ dataset[1][model].WER_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].CED %}
+ {{ dataset[1][model].CED | round: 2 }} ± {{ dataset[1][model].CED_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].WED %}
+ {{ dataset[1][model].WED | round: 2 }} ± {{ dataset[1][model].WED_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].PLX %}
+ {{ dataset[1][model].PLX | round: 2 }} ± {{ dataset[1][model].PLX_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/ind/fairness-aware/question-answering.md b/_pages/ind/fairness-aware/question-answering.md
index 58e2c63..b4fd2f3 100644
--- a/_pages/ind/fairness-aware/question-answering.md
+++ b/_pages/ind/fairness-aware/question-answering.md
@@ -3,77 +3,60 @@ layout: default
permalink: /leaderboard/ind/fairness-aware/question-answering
---
# Fairness-Aware Question Answering Leaderboard
+{% assign lang = 'ind' %}
-
-
- Models
- XQuAD
- MLQA
-
-
- Exact Match↑
- F1↑
- Exact Match↑
- F1↑
-
-
-
-
- URA-LLaMa 70B
- 0.04 ± 0.00
- 0.27 ± 0.00
- 0.03 ± 0.00
- 0.25 ± 0.00
-
-
- URA-LLaMa 13B
- 0.00 ± 0.00
- 0.13 ± 0.00
- 0.00 ± 0.00
- 0.14 ± 0.00
-
-
- URA-LLaMa 7B
- 0.00 ± 0.00
- 0.13 ± 0.00
- 0.00 ± 0.00
- 0.15 ± 0.01
-
-
- LLaMa-2 13B
- 0.00 ± 0.00
- 0.03 ± 0.00
- 0.00 ± 0.00
- 0.04 ± 0.00
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.04 ± 0.00
- 0.00 ± 0.00
- 0.05 ± 0.00
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
-
-
- GPT-3.5
- 0.00 ± 0.00
- 0.24 ± 0.00
- 0.00 ± 0.00
- 0.23 ± 0.00
-
-
- GPT-4
- 0.00 ± 0.00
- 0.26 ± 0.00
- 0.00 ± 0.00
- 0.24 ± 0.00
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.question_answering %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.question_answering %}
+ EM↑
+ F1↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.question_answering %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/fairness-aware/sentiment-analysis.md b/_pages/ind/fairness-aware/sentiment-analysis.md
index 102fff9..ee7815b 100644
--- a/_pages/ind/fairness-aware/sentiment-analysis.md
+++ b/_pages/ind/fairness-aware/sentiment-analysis.md
@@ -3,157 +3,96 @@ layout: default
permalink: /leaderboard/ind/fairness-aware/sentiment-analysis
---
# Fairness-Aware Sentiment Analysis Leaderboard
+{% assign lang = 'ind' %}
-
-
- Models
- VLSP 2016
- UiT-VSFC
-
-
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.65 ± 0.01
- 0.49 ± 0.01
- 0.58 ± 0.01
- 0.13 ± 0.01
- 0.77 ± 0.04
- 0.76 ± 0.01
- 0.48 ± 0.01
- 0.61 ± 0.01
- 0.17 ± 0.01
- 0.66 ± 0.03
-
-
- URA-LLaMa 13B
- 0.59 ± 0.01
- 0.57 ± 0.01
- 0.62 ± 0.01
- 0.07 ± 0.01
- 0.83 ± 0.04
- 0.75 ± 0.01
- 0.46 ± 0.08
- 0.83 ± 0.01
- 0.11 ± 0.01
- 0.88 ± 0.02
-
-
- URA-LLaMa 7B
- 0.74 ± 0.02
- 0.39 ± 0.06
- 0.83 ± 0.01
- 0.21 ± 0.02
- 0.98 ± 0.02
- 0.73 ± 0.01
- 0.73 ± 0.01
- 0.78 ± 0.01
- 0.13 ± 0.01
- 0.94 ± 0.01
-
-
- LLaMa-2 13B
- 0.51 ± 0.01
- 0.1 ± 0.06
- 0.56 ± 0.01
- 0.32 ± 0.02
- 0.79 ± 0.04
- 0.63 ± 0.01
- 0.41 ± 0.02
- 0.70 ± 0.01
- 0.13 ± 0.01
- 0.89 ± 0.02
-
-
- LLaMa-2 7B
- 0.45 ± 0.02
- 0.34 ± 0.01
- 0.53 ± 0.01
- 0.26 ± 0.02
- 0.50 ± 0.0
- 0.51 ± 0.01
- 0.55 ± 0.01
- 0.68 ± 0.01
- 0.22 ± 0.01
- 0.64 ± 0.03
-
-
- Vietcuna 7B
- 0.04 ± 0.01
- 0.04 ± 0.01
- 0.49 ± 0.01
- 0.71 ± 0.01
- 0.05 ± 0.02
- 0.03 ± 0.00
- 0.03 ± 0.00
- 0.55 ± 0.01
- 0.50 ± 0.00
- 0.01 ± 0.01
-
-
- MixSUra 8x7B
- 0.62 ± -
- 0.62 ± -
- 0.59 ± -
- 0.30 ± -
- 0.59 ± -
- 0.74 ± -
- 0.46 ± -
- 0.61 ± -
- 0.24 ± -
- 0.66 ± -
-
-
- Gemini Pro
- 0.67 ± -
- 0.50 ± -
- -
- 0.34 ± -
- 0.59 ± -
- 0.79 ± -
- 0.50 ± -
- -
- 0.46 ± -
- 0.82 ± -
-
-
- GPT-3.5
- 0.66 ± 0.01
- 0.60 ± 0.01
- -
- 0.33 ± 0.01
- 0.52 ± 0.05
- 0.86 ± 0.01
- 0.71 ± 0.01
- -
- 0.52 ± 0.01
- 0.86 ± 0.02
-
-
- GPT-4
- 0.75 ± 0.01
- 0.74 ± 0.01
- -
- 0.41 ± 0.00
- 0.73 ± 0.04
- 0.85 ± 0.01
- 0.71 ± 0.01
- -
- 0.52 ± 0.01
- 0.87 ± 0.02
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.sentiment_analysis %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.sentiment_analysis %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.sentiment_analysis %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/fairness-aware/text-classification.md b/_pages/ind/fairness-aware/text-classification.md
index 60f2587..14efacd 100644
--- a/_pages/ind/fairness-aware/text-classification.md
+++ b/_pages/ind/fairness-aware/text-classification.md
@@ -3,157 +3,96 @@ layout: default
permalink: /leaderboard/ind/fairness-aware/text-classification
---
# Fairness-Aware Text Classification Leaderboard
+{% assign lang = 'ind' %}
- Models
- UiT-VSMEC
- PhoATIS
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.text_classification %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.text_classification %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.24 ± 0.02
- 0.14 ± 0.01
- 0.58 ± 0.01
- 0.26 ± 0.02
- 0.37 ± 0.06
- 0.15 ± 0.01
- 0.22 ± 0.03
- 0.31 ± 0.00
- 0.81 ± 0.01
- 0.13 ± 0.04
-
-
- URA-LLaMa 13B
- 0.31 ± 0.02
- 0.11 ± 0.01
- 0.58 ± 0.01
- 0.23 ± 0.02
- 0.57 ± 0.06
- 0.01 ± 0.01
- 0.05 ± 0.02
- 0.58 ± 0.00
- 0.84 ± 0.01
- 0.00 ± 0.01
-
-
- URA-LLaMa 7B
- 0.29 ± 0.02
- 0.11 ± 0.01
- 0.60 ± 0.01
- 0.12 ± 0.02
- 0.41 ± 0.06
- 0.00 ± 0.01
- 0.00 ± 0.00
- 0.55 ± 0.00
- 0.30 ± 0.01
- 0.01 ± 0.03
-
-
- LLaMa-2 13B
- 0.18 ± 0.02
- 0.08 ± 0.01
- 0.55 ± 0.01
- 0.45 ± 0.01
- 0.44 ± 0.06
- 0.02 ± 0.01
- 0.01 ± 0.02
- 0.57 ± 0.01
- 0.90 ± 0.01
- 0.01 ± 0.01
-
-
- LLaMa-2 7B
- 0.25 ± 0.02
- 0.11 ± 0.01
- 0.57 ± 0.01
- 0.22 ± 0.02
- 0.53 ± 0.06
- 0.02 ± 0.00
- 0.06 ± 0.01
- 0.57 ± 0.01
- 0.68 ± 0.01
- 0.01 ± 0.01
-
-
- Vietcuna 7B
- 0.15 ± 0.01
- 0.05 ± 0.01
- 0.46 ± 0.01
- 0.85 ± 0.01
- 0.16 ± 0.04
- 0.04 ± 0.01
- 0.01 ± 0.00
- 0.77 ± 0.01
- 0.21 ± 0.01
- 0.07 ± 0.03
-
-
- MixSUra 8x7B
- 0.40 ± -
- 0.36 ± -
- 0.72 ± -
- 0.53 ± -
- 0.79 ± -
- 0.81 ± -
- 0.58 ± -
- 0.96 ± -
- 0.14 ± -
- 0.91 ± -
-
-
- Gemini Pro
- 0.48 ± -
- 0.38 ± -
- -
- 0.34 ± -
- 0.43 ± -
- 0.79 ± -
- 0.67 ± -
- -
- 0.73 ± -
- 0.68 ± -
-
-
- GPT-3.5
- 0.44 ± 0.02
- 0.42 ± 0.02
- -
- 0.30 ± 0.02
- 0.36 ± 0.06
- 0.68 ± 0.02
- 0.66 ± 0.03
- -
- 0.62 ± 0.02
- 0.67 ± 0.05
-
-
- GPT-4
- 0.49 ± 0.02
- 0.47 ± 0.02
- -
- 0.35 ± 0.02
- 0.36 ± 0.06
- 0.83 ± 0.01
- 0.76 ± 0.03
- -
- 0.77 ± 0.01
- 0.87 ± 0.04
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.text_classification %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/ind/fairness-aware/toxicity-detection.md b/_pages/ind/fairness-aware/toxicity-detection.md
index 68fe66e..b2be50d 100644
--- a/_pages/ind/fairness-aware/toxicity-detection.md
+++ b/_pages/ind/fairness-aware/toxicity-detection.md
@@ -3,157 +3,96 @@ layout: default
permalink: /leaderboard/ind/fairness-aware/toxicity-detection
---
# Fairness-Aware Toxicity Detection Leaderboard
+{% assign lang = 'ind' %}
-
-
- Models
- UiT-ViCTSD
- UiT-ViHSD
-
-
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.41 ± 0.02
- 0.26 ± 0.01
- 0.75 ± 0.01
- 0.53 ± 0.01
- 0.33 ± 0.05
- 0.15 ± 0.00
- 0.40 ± 0.00
- 0.64 ± 0.01
- 0.58 ± 0.00
- 0.24 ± 0.02
-
-
- URA-LLaMa 13B
- 0.43 ± 0.02
- 0.29 ± 0.07
- 0.66 ± 0.01
- 0.36 ± 0.02
- 0.42 ± 0.05
- 0.24 ± 0.01
- 0.15 ± 0.00
- 0.61 ± 0.01
- 0.43 ± 0.01
- 0.21 ± 0.02
-
-
- URA-LLaMa 7B
- 0.42 ± 0.02
- 0.39 ± 0.01
- 0.60 ± 0.01
- 0.30 ± 0.01
- 0.66 ± 0.05
- 0.16 ± 0.00
- 0.10 ± 0.00
- 0.67 ± 0.01
- 0.33 ± 0.00
- 0.28 ± 0.02
-
-
- LLaMa-2 13B
- 0.27 ± 0.01
- 0.18 ± 0.01
- 0.67 ± 0.01
- 0.53 ± 0.01
- 0.57 ± 0.05
- 0.16 ± 0.00
- 0.10 ± 0.00
- 0.62 ± 0.01
- 0.59 ± 0.00
- 0.42 ± 0.02
-
-
- LLaMa-2 7B
- 0.15 ± 0.01
- 0.11 ± 0.01
- 0.62 ± 0.01
- 0.67 ± 0.01
- 0.07 ± 0.03
- 0.01 ± 0.00
- 0.01 ± 0.00
- 0.56 ± 0.01
- 0.71 ± 0.00
- 0.01 ± 0.00
-
-
- Vietcuna 7B
- 0.08 ± 0.01
- 0.09 ± 0.01
- 0.50 ± 0.01
- 0.42 ± 0.01
- 0.06 ± 0.03
- 0.62 ± 0.01
- 0.21 ± 0.00
- 0.50 ± 0.00
- 0.29 ± 0.01
- 0.62 ± 0.02
-
-
- MixSUra 8x7B
- 0.69 ± -
- 0.38 ± -
- - ± -
- 0.29 ± -
- 0.78 ± -
- 0.56 ± -
- 0.31 ± -
- 0.68 ± -
- 0.32 ± -
- 0.92 ± -
-
-
- Gemini Pro
- 0.81 ± -
- 0.43 ± -
- - ± -
- 0.31 ± -
- 0.82 ± -
- 0.70 ± -
- 0.37 ± -
- - ± -
- 0.36 ± -
- 0.69 ± -
-
-
- GPT-3.5
- 0.60 ± 0.02
- 0.52 ± 0.02
- - ± -
- 0.11 ± 0.02
- 0.63 ± 0.05
- 0.61 ± 0.01
- 0.46 ± 0.01
- - ± -
- 0.29 ± 0.01
- 0.62 ± 0.02
-
-
- GPT-4
- 0.87 ± 0.01
- 0.69 ± 0.02
- - ± -
- 0.37 ± 0.01
- 0.86 ± 0.03
- 0.76 ± 0.01
- 0.56 ± 0.01
- - ± -
- 0.43 ± 0.01
- 0.76 ± 0.02
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.toxicity_detection %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.toxicity_detection %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.toxicity_detection %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/few-shot/information-retrieval.md b/_pages/ind/few-shot/information-retrieval.md
index 3c5643f..85c8366 100644
--- a/_pages/ind/few-shot/information-retrieval.md
+++ b/_pages/ind/few-shot/information-retrieval.md
@@ -3,124 +3,84 @@ layout: default
permalink: /leaderboard/ind/few-shot/information-retrieval
---
# Few-Shot Information Retrieval Leaderboard
+{% assign lang = 'ind' %}
-
-
- Models
- mMARCO
- mRobust04
-
-
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
-
-
-
-
- URA-LLaMa 70B
- 0.05 ± 0.00
- 0.11 ± 0.00
- 0.06 ± 0.00
- 0.14 ± 0.00
- 0.04±0.00
- 0.04±0.00
- 0.03±0.00
- 0.04±0.00
-
-
- URA-LLaMa 13B
- 0.04 ± 0.00
- 0.10 ± 0.00
- 0.06 ± 0.00
- 0.14 ± 0.00
- 0.03±0.00
- 0.05±0.00
- 0.04±0.00
- 0.04±0.00
-
-
- URA-LLaMa 7B
- 0.04 ± 0.00
- 0.11 ± 0.00
- 0.06 ± 0.00
- 0.16 ± 0.00
- 0.03 ± 0.00
- 0.03 ± 0.00
- 0.02 ± 0.00
- 0.02 ± 0.00
-
-
- LLaMa-2 13B
- 0.07 ± 0.00
- 0.15 ± 0.00
- 0.09 ± 0.00
- 0.21 ± 0.00
- 0.05±0.00
- 0.04±0.00
- 0.04±0.00
- 0.04±0.00
-
-
- LLaMa-2 7B
- 0.05 ± 0.00
- 0.11 ± 0.00
- 0.07 ± 0.00
- 0.16 ± 0.00
- 0.02±0.00
- 0.03±0.00
- 0.03±0.00
- 0.02±0.00
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00±0.00
- 0.00±0.00
- 0.00±0.00
- 0.00±0.00
-
-
- MixSUra 8x7B
- 0.01 ± -
- 0.07 ± -
- 0.04 ± -
- 0.11 ± -
- 0.04±-
- 0.04±-
- 0.02±-
- 0.02±-
-
-
- GPT-3.5
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- GPT-4
- -
- -
- -
- -
- -
- -
- -
- -
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.information_retrieval %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.information_retrieval %}
+ M@10↑
+ M@10B↑
+ N@10↑
+ N@10B↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.information_retrieval %}
+ {% assign M10_best = 0 %}
+ {% assign M10B_best = 0 %}
+ {% assign N10_best = 0 %}
+ {% assign N10B_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m]["M@10"] and dataset[1][m]["M@10"] > M10_best %}
+ {% assign M10_best = dataset[1][m]["M@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["M@10B"] and dataset[1][m]["M@10B"] > M10B_best %}
+ {% assign M10B_best = dataset[1][m]["M@10B"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10"] and dataset[1][m]["N@10"] > N10_best %}
+ {% assign N10_best = dataset[1][m]["N@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10B"] and dataset[1][m]["N@10B"] > N10B_best %}
+ {% assign N10B_best = dataset[1][m]["N@10B"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model]["M@10"] %}
+ {{ dataset[1][model]["M@10"] | round: 2 }} ± {{ dataset[1][model]["M@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["M@10B"] %}
+ {{ dataset[1][model]["M@10B"] | round: 2 }} ± {{ dataset[1][model]["M@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10"] %}
+ {{ dataset[1][model]["N@10"] | round: 2 }} ± {{ dataset[1][model]["N@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10B"] %}
+ {{ dataset[1][model]["N@10B"] | round: 2 }} ± {{ dataset[1][model]["N@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/few-shot/knowledge.md b/_pages/ind/few-shot/knowledge.md
index da4ef46..61d1840 100644
--- a/_pages/ind/few-shot/knowledge.md
+++ b/_pages/ind/few-shot/knowledge.md
@@ -2,115 +2,129 @@
layout: default
permalink: /leaderboard/ind/few-shot/knowledge
---
-# Few-Shot Knowledge Leaderboard
+# Few-shot Knowledge Leaderboard
+{% assign lang = 'ind' %}
-
-
-Models
-ZaloE2E
-ViMMRC
-
-
-EM↑
-F1↑
-AC↑
-F1↑
-AR↑
-ECE↓
-A@10↑
-
-
-
-
-URA-LLaMa 70B
-0.34 ± 0.02
-0.50 ± 0.02
-0.78 ± 0.02
-0.63 ± 0.03
-0.90 ± 0.01
-0.13 ± 0.02
-0.96 ± 0.03
-
-
-URA-LLaMa 13B
-0.26 ± 0.02
-0.40 ± 0.02
-0.62 ± 0.02
-0.50 ± 0.02
-0.69 ± 0.02
-0.18 ± 0.02
-0.65 ± 0.07
-
-
-URA-LLaMa 7B
-0.14 ± 0.02
-0.25 ± 0.02
-0.42 ± 0.02
-0.33 ± 0.02
-0.61 ± 0.02
-0.13 ± 0.02
-0.39 ± 0.07
-
-
-LLaMa-2 13B
-0.22 ± 0.02
-0.36 ± 0.02
-0.58 ± 0.02
-0.46 ± 0.02
-0.62 ± 0.02
-0.28 ± 0.02
-0.77 ± 0.06
-
-
-LLaMa-2 7B
-0.07 ± 0.01
-0.15 ± 0.01
-0.30 ± 0.02
-0.23 ± 0.02
-0.56 ± 0.02
-0.43 ± 0.02
-0.16 ± 0.05
-
-
-Vietcuna 7B
-0.07 ± 0.01
-0.19 ± 0.01
-0.31 ± 0.02
-0.18 ± 0.01
-0.50 ± 0.00
-0.06 ± 0.02
-0.31 ± 0.06
-
-
-MixSUra 8x7B
-0.19 ± -
-0.34 ± -
-0.65 ± -
-0.64 ± -
-0.54 ± -
-0.29 ± -
-0.65 ± -
-
-
-GPT-3.5
-0.49 ± 0.02
-0.64 ± 0.02
-0.90 ± 0.01
-0.73 ± 0.03
--
-0.66 ± 0.01
-0.91 ± 0.04
-
-
-GPT-4
-0.49 ± 0.02
-0.64 ± 0.02
-0.91 ± 0.01
-0.73 ± 0.04
--
-0.66 ± 0.01
-0.91 ± 0.04
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.knowledge %}
+ {% if dataset[1].num_fields == 2 %}
+
+ {{ dataset[0] }}
+
+ {% else %}
+
+ {{ dataset[0] }}
+
+ {% endif %}
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.knowledge %}
+ {% if dataset[1].num_fields == 2 %}
+ EM↑
+ F1↑
+ {% else %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endif %}
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.knowledge %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AC_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+ {% if dataset[1].num_fields == 2 %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% else %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endif %}
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/few-shot/language-modeling.md b/_pages/ind/few-shot/language-modeling.md
index 5cb7963..04b179d 100644
--- a/_pages/ind/few-shot/language-modeling.md
+++ b/_pages/ind/few-shot/language-modeling.md
@@ -3,164 +3,108 @@ layout: default
permalink: /leaderboard/ind/few-shot/language-modeling
---
# Few-Shot Language Modeling Leaderboard
+{% assign lang = 'ind' %}
- Models
- MLQA-MLM
- VSEC
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.language_modeling %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- EM↑
- CER↓
- WER↓
- CED↓
- WED↓
- PLX↓
- EM↑
- CER↓
- WER↓
- CED↓
- WED↓
- PLX↓
+ {% for dataset in site.data.leaderboard[lang].few_shot.language_modeling %}
+ EM↑
+ CER↓
+ WER↓
+ CED↓
+ WED↓
+ PLX↓
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.01 ± 0.00
- 0.54 ± 0.00
- 0.66 ± 0.00
- 669.74 ± 10.38
- 153.04 ± 2.33
- 1.32 ± 0.05
- 0.33 ± 0.00
- 0.11 ± 0.00
- 0.13 ± 0.00
- 15.09 ± 0.42
- 4.05 ± 0.11
- 1.13 ± 0.00
-
-
- URA-LLaMa 13B
- 0.01 ± 0.00
- 0.45 ± 0.01
- 0.61 ± 0.01
- 559.64 ± 11.23
- 136.97 ± 2.68
- 1.49 ± 0.10
- 0.35 ± 0.00
- 0.02 ± 0.00
- 0.04 ± 0.00
- 2.81 ± 0.12
- 1.18 ± 0.03
- 1.15 ± 0.00
-
-
- URA-LLaMa 7B
- 0.01 ± 0.00
- 0.40 ± 0.01
- 0.55 ± 0.01
- 498.36 ± 11.01
- 118.11 ± 2.58
- 1.24 ± 0.01
- 0.22 ± 0.00
- 0.32 ± 0.01
- 0.33 ± 0.01
- 41.89 ± 1.54
- 10.10 ± 0.34
- 1.07 ± 0.00
-
-
- LLaMa-2 13B
- 0.01 ± 0.00
- 0.74 ± 0.00
- 0.87 ± 0.00
- 760.98 ± 11.91
- 186.90 ± 2.85
- 1.24 ± 0.03
- 0.16 ± 0.00
- 0.03 ± 0.00
- 0.05 ± 0.00
- 3.38 ± 0.16
- 1.51 ± 0.04
- 1.01 ± 0.00
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.81 ± 0.00
- 0.98 ± 0.00
- 769.36 ± 10.51
- 198.53 ± 2.57
- 1.74 ± 0.19
- 0.12 ± 0.00
- 0.36 ± 0.01
- 0.39 ± 0.01
- 47.50 ± 0.86
- 11.80 ± 0.19
- 1.06 ± 0.00
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 1.04 ± 0.00
- 1.06 ± 0.00
- 935.65 ± 12.47
- 204.98 ± 2.79
- 1.40 ± 0.00
- 0.00 ± 0.00
- 8.00 ± 0.07
- 8.01 ± 0.07
- 1063.93 ± 7.64
- 241.74 ± 1.74
- 1.46 ± 0.00
-
-
- MixSUra 8x7B
- 0.00 ± -
- 0.55 ± -
- 0.63 ± -
- 526.79 ± -
- 131.02 ± -
- 1.00 ± -
- 0.08 ± -
- 0.19 ± -
- 0.28 ± -
- 25.13 ± -
- 8.58 ± -
- 1.00 ± -
-
-
- GPT-3.5
- 0.04 ± 0.00
- 0.28 ± 0.01
- 0.44 ± 0.01
- 387.37 ± 10.86
- 92.78 ± 2.46
- -
- 0.66 ± 0.00
- 0.01 ± 0.00
- 0.02 ± 0.00
- 1.63 ± 0.08
- 0.61 ± 0.02
- -
-
-
- GPT-4
- 0.08 ± 0.00
- 0.23 ± 0.01
- 0.40 ± 0.01
- 336.53 ± 10.18
- 83.55 ± 2.34
- -
- 0.75 ± 0.00
- 0.01 ± 0.00
- 0.01 ± 0.00
- 0.89 ± 0.04
- 0.37 ± 0.01
- -
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.language_modeling %}
+ {% assign EM_best = 0 %}
+ {% assign CER_best = 1 %}
+ {% assign WER_best = 1 %}
+ {% assign CED_best = 10000 %}
+ {% assign WED_best = 10000 %}
+ {% assign PLX_best = 10000 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].CER and dataset[1][m].CER < CER_best %}
+ {% assign CER_best = dataset[1][m].CER %}
+ {% endif %}
+ {% if dataset[1][m].WER and dataset[1][m].WER < WER_best %}
+ {% assign WER_best = dataset[1][m].WER %}
+ {% endif %}
+ {% if dataset[1][m].CED and dataset[1][m].CED < CED_best %}
+ {% assign CED_best = dataset[1][m].CED %}
+ {% endif %}
+ {% if dataset[1][m].WED and dataset[1][m].WED < WED_best %}
+ {% assign WED_best = dataset[1][m].WED %}
+ {% endif %}
+ {% if dataset[1][m].PLX and dataset[1][m].PLX < PLX_best %}
+ {% assign PLX_best = dataset[1][m].PLX %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].CER %}
+ {{ dataset[1][model].CER | round: 2 }} ± {{ dataset[1][model].CER_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].WER %}
+ {{ dataset[1][model].WER | round: 2 }} ± {{ dataset[1][model].WER_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].CED %}
+ {{ dataset[1][model].CED | round: 2 }} ± {{ dataset[1][model].CED_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].WED %}
+ {{ dataset[1][model].WED | round: 2 }} ± {{ dataset[1][model].WED_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].PLX %}
+ {{ dataset[1][model].PLX | round: 2 }} ± {{ dataset[1][model].PLX_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/ind/few-shot/reasoning.md b/_pages/ind/few-shot/reasoning.md
index 748fa8c..4190e60 100644
--- a/_pages/ind/few-shot/reasoning.md
+++ b/_pages/ind/few-shot/reasoning.md
@@ -3,135 +3,72 @@ layout: default
permalink: /leaderboard/ind/few-shot/reasoning
---
# Few-Shot Reasoning Leaderboard
+{% assign lang = 'ind' %}
-
-
- Models
- SR - Natural
- SR - Abstract symbol
- MATH
-
-
- EM↑
- F1↑
- Equ.↑
- EM↑
- F1↑
- Equ.↑
- EM↑
- F1↑
- Equ.↑
-
-
-
-
- URA-LLaMa 70B
- 0.14 ± 0.00
- 0.48 ± 0.00
- 0.15 ± 0.00
- 0.27 ± 0.00
- 0.85 ± 0.00
- 0.30 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.12 ± 0.02
-
-
- URA-LLaMa 13B
- 0.08 ± 0.00
- 0.42 ± 0.00
- 0.08 ± 0.00
- 0.20 ± 0.00
- 0.70 ± 0.00
- 0.17 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.01
-
-
- URA-LLaMa 7B
- 0.04 ± 0.00
- 0.38 ± 0.00
- 0.04 ± 0.00
- 0.11 ± 0.00
- 0.61 ± 0.00
- 0.10 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.07 ± 0.01
-
-
- LLaMa-2 13B
- 0.03 ± 0.00
- 0.24 ± 0.00
- 0.04 ± 0.00
- 0.19 ± 0.00
- 0.69 ± 0.00
- 0.18 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.16 ± 0.02
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.01 ± 0.00
- 0.00 ± 0.00
- 0.06 ± 0.00
- 0.44 ± 0.00
- 0.06 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.11 ± 0.01
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.14 ± 0.00
- 0.71 ± 0.00
- 0.10 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.01 ± 0.00
-
-
- MixSUra 8x7B
- 0.07 ± 0.00
- 0.41 ± 0.00
- 0.07 ± 0.00
- 0.22 ± 0.00
- 0.78 ± 0.00
- 0.23 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
-
-
- GPT-3.5
- 0.15 ± 0.00
- 0.50 ± 0.00
- 0.16 ± 0.00
- 0.26 ± 0.00
- 0.83 ± 0.00
- 0.29 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.62 ± 0.02
-
-
- GPT-4
- 0.37 ± 0.00
- 0.74 ± 0.00
- 0.42 ± 0.00
- 0.37 ± 0.00
- 0.87 ± 0.00
- 0.44 ± 0.00
- 0.00 ± 0.00
- 0.01 ± 0.00
- 0.65 ± 0.02
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.reasoning %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.reasoning %}
+ EM↑
+ F1↑
+ Equ↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.reasoning %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign Equ_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m]["Equ"] and dataset[1][m]["Equ"] > Equ_best %}
+ {% assign Equ_best = dataset[1][m]["Equ"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["Equ"] %}
+ {{ dataset[1][model]["Equ"] | round: 2 }} ± {{ dataset[1][model]["Equ_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/few-shot/sentiment-analysis.md b/_pages/ind/few-shot/sentiment-analysis.md
index 5971d49..097ad29 100644
--- a/_pages/ind/few-shot/sentiment-analysis.md
+++ b/_pages/ind/few-shot/sentiment-analysis.md
@@ -1,146 +1,98 @@
---
layout: default
-permalink: /leaderboard/ind/few-shot/sentiment-analysis
+permalink: /leaderboard/ind/few-shot/sentiment-analysis
---
# Few-Shot Sentiment Analysis Leaderboard
+{% assign lang = 'ind' %}
-
-
- Models
- VLSP 2016
- UiT-VSFC
-
-
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.66 ± 0.01
- 0.49 ± 0.01
- 0.72 ± 0.01
- 0.13 ± 0.01
- 0.77 ± 0.04
- 0.75 ± 0.01
- 0.48 ± 0.01
- 0.81 ± 0.01
- 0.16 ± 0.01
- 0.71 ± 0.02
-
-
- URA-LLaMa 13B
- 0.59 ± 0.01
- 0.57 ± 0.01
- 0.67 ± 0.01
- 0.09 ± 0.01
- 0.82 ± 0.04
- 0.74 ± 0.01
- 0.52 ± 0.08
- 0.83 ± 0.01
- 0.10 ± 0.01
- 0.87 ± 0.02
-
-
- URA-LLaMa 7B
- 0.57 ± 0.02
- 0.42 ± 0.05
- 0.69 ± 0.02
- 0.07 ± 0.02
- 0.77 ± 0.04
- 0.72 ± 0.01
- 0.43 ± 0.01
- 0.78 ± 0.01
- 0.13 ± 0.01
- 0.95 ± 0.03
-
-
- LLaMa-2 13B
- 0.51 ± 0.01
- 0.41 ± 0.06
- 0.66 ± 0.01
- 0.32 ± 0.02
- 0.80 ± 0.04
- 0.63 ± 0.01
- 0.46 ± 0.07
- 0.71 ± 0.01
- 0.13 ± 0.01
- 0.88 ± 0.02
-
-
- LLaMa-2 7B
- 0.45 ± 0.01
- 0.32 ± 0.01
- 0.59 ± 0.01
- 0.26 ± 0.02
- 0.50 ± 0.05
- 0.50 ± 0.01
- 0.34 ± 0.01
- 0.69 ± 0.01
- 0.23 ± 0.01
- 0.62 ± 0.03
-
-
- Vietcuna 7B
- 0.04 ± 0.01
- 0.05 ± 0.01
- 0.45 ± 0.01
- 0.71 ± 0.01
- 0.05 ± 0.02
- 0.03 ± 0.00
- 0.03 ± 0.00
- 0.53 ± 0.01
- 0.50 ± 0.00
- 0.01 ± 0.00
-
-
- MixSUra 8x7B
- 0.62 ± -
- 0.63 ± -
- 0.59 ± -
- 0.30 ± -
- 0.59 ± -
- 0.74 ± -
- 0.46 ± -
- 0.63 ± -
- 0.23 ± -
- 0.655 ± -
-
-
- GPT-3.5
- 0.65 ± 0.01
- 0.59 ± 0.1
- -
- 0.32 ± 0.01
- 0.65 ± 0.05
- 0.86 ± 0.01
- 0.73 ± 0.01
- -
- 0.52 ± 0.01
- 0.86 ± 0.02
-
-
- GPT-4
- 0.75 ± 0.01
- 0.74 ± 0.01
- -
- 0.41 ± 0.01
- 0.74 ± 0.04
- 0.85 ± 0.01
- 0.59 ± 0.09
- -
- 0.52 ± 0.01
- 0.85 ± 0.02
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.sentiment_analysis %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.sentiment_analysis %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.sentiment_analysis %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/few-shot/text-classification.md b/_pages/ind/few-shot/text-classification.md
index e2071ae..4d08aff 100644
--- a/_pages/ind/few-shot/text-classification.md
+++ b/_pages/ind/few-shot/text-classification.md
@@ -3,144 +3,96 @@ layout: default
permalink: /leaderboard/ind/few-shot/text-classification
---
# Few-Shot Text Classification Leaderboard
+{% assign lang = 'ind' %}
-
-
-Models
-UiT-VSMEC
-PhoATIS
-
-
-AC↑
-F1↑
-AR↑
-ECE↓
-A@10↑
-AC↑
-F1↑
-AR↑
-ECE↓
-A@10↑
-
-
-
-
-URA-LLaMa 70B
-0.25 ± 0.02
-0.15 ± 0.01
-0.56 ± 0.01
-0.25 ± 0.02
-0.37 ± 0.06
-0.15 ± 0.01
-0.22 ± 0.03
-0.83 ± 0.00
-0.81 ± 0.01
-0.13 ± 0.04
-
-
-URA-LLaMa 13B
-0.32 ± 0.02
-0.12 ± 0.01
-0.58 ± 0.01
-0.22 ± 0.02
-0.57 ± 0.07
-0.01 ± 0.01
-0.06 ± 0.02
-0.47 ± 0.00
-0.84 ± 0.01
-0.00 ± 0.01
-
-
-URA-LLaMa 7B
-0.29 ± 0.02
-0.11 ± 0.01
-0.60 ± 0.01
-0.12 ± 0.02
-0.43 ± 0.06
-0.06 ± 0.01
-0.01 ± 0.00
-0.55 ± 0.00
-0.24 ± 0.01
-0.08 ± 0.03
-
-
-LLaMa-2 13B
-0.18 ± 0.02
-0.08 ± 0.01
-0.55 ± 0.01
-0.45 ± 0.01
-0.49 ± 0.07
-0.02 ± 0.01
-0.06 ± 0.02
-0.57 ± 0.01
-0.90 ± 0.01
-0.01 ± 0.01
-
-
-LLaMa-2 7B
-0.25 ± 0.02
-0.12 ± 0.01
-0.57 ± 0.01
-0.21 ± 0.02
-0.54 ± 0.06
-0.03 ± 0.01
-0.02 ± 0.01
-0.56 ± 0.01
-0.54 ± 0.01
-0.01 ± 0.01
-
-
-Vietcuna 7B
-0.15 ± 0.01
-0.05 ± 0.01
-0.46 ± 0.01
-0.85 ± 0.01
-0.15 ± 0.04
-0.04 ± 0.01
-0.01 ± 0.00
-0.63 ± 0.00
-0.21 ± 0.01
-0.07 ± 0.03
-
-
-MixSUra 8x7B
-0.40 ± -
-0.36 ± -
-0.72 ± -
-0.53 ± -
-0.79 ± -
-0.81 ± -
-0.58 ± -
-0.96 ± -
-0.14 ± -
-0.91 ± -
-
-
-GPT-3.5
-0.42 ± 0.02
-0.40 ± 0.02
--
-0.28 ± 0.02
-0.42 ± 0.06
-0.69 ± 0.02
-0.67 ± 0.03
--
-0.63 ± 0.02
-0.69 ± 0.05
-
-
-GPT-4
-0.49 ± 0.02
-0.48 ± 0.02
--
-0.35 ± 0.02
-0.49 ± 0.06
-0.85 ± 0.01
-0.78 ± 0.03
--
-0.79 ± 0.01
-0.88 ± 0.04
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.text_classification %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.text_classification %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.text_classification %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/few-shot/toxicity-detection.md b/_pages/ind/few-shot/toxicity-detection.md
index 1dc584b..2749d7a 100644
--- a/_pages/ind/few-shot/toxicity-detection.md
+++ b/_pages/ind/few-shot/toxicity-detection.md
@@ -3,144 +3,96 @@ layout: default
permalink: /leaderboard/ind/few-shot/toxicity-detection
---
# Few-Shot Toxicity Detection Leaderboard
+{% assign lang = 'ind' %}
-
-
-Models
-UiT-ViCTSD
-UiT-ViHSD
-
-
-AC↑
-F1↑
-AR↑
-ECE↓
-A@10↑
-AC↑
-F1↑
-AR↑
-ECE↓
-A@10↑
-
-
-
-
-URA-LLaMa 70B
-0.44 ± 0.01
-0.27 ± 0.01
-0.75 ± 0.01
-0.52 ± 0.01
-0.37 ± 0.02
-0.17 ± 0.00
-0.15 ± 0.00
-0.64 ± 0.01
-0.57 ± 0.00
-0.27 ± 0.02
-
-
-URA-LLaMa 13B
-0.44 ± 0.01
-0.30 ± 0.05
-0.67 ± 0.01
-0.33 ± 0.01
-0.41 ± 0.03
-0.26 ± 0.01
-0.16 ± 0.00
-0.61 ± 0.01
-0.42 ± 0.01
-0.21 ± 0.02
-
-
-URA-LLaMa 7B
-0.43 ± 0.01
-0.40 ± 0.01
-0.60 ± 0.01
-0.29 ± 0.01
-0.71 ± 0.02
-0.16 ± 0.00
-0.10 ± 0.00
-0.67 ± 0.01
-0.32 ± 0.00
-0.28 ± 0.02
-
-
-LLaMa-2 13B
-0.28 ± 0.01
-0.19 ± 0.00
-0.67 ± 0.01
-0.52 ± 0.01
-0.63 ± 0.03
-0.17 ± 0.00
-0.11 ± 0.00
-0.62 ± 0.01
-0.58 ± 0.00
-0.44 ± 0.02
-
-
-LLaMa-2 7B
-0.16 ± 0.01
-0.12 ± 0.01
-0.61 ± 0.01
-0.66 ± 0.01
-0.08 ± 0.02
-0.01 ± 0.00
-0.01 ± 0.00
-0.56 ± 0.01
-0.71 ± 0.00
-0.01 ± 0.02
-
-
-Vietcuna 7B
-0.08 ± 0.00
-0.10 ± 0.01
-0.50 ± 0.00
-0.42 ± 0.00
-0.08 ± 0.03
-0.61 ± 0.01
-0.21 ± 0.00
-0.50 ± 0.00
-0.28 ± 0.01
-0.61 ± 0.02
-
-
-MixSUra 8x7B
-0.70 ± -
-0.39 ± -
-- ± -
-0.29 ± -
-0.80 ± -
-0.58 ± -
-0.31 ± -
-0.68 ± -
-0.30 ± -
-0.93 ± -
-
-
-GPT-3.5
-0.63 ± 0.02
-0.54 ± 0.02
--
-0.13 ± 0.02
-0.63 ± 0.05
-0.63 ± 0.01
-0.47 ± 0.01
--
-0.29 ± 0.01
-0.63 ± 0.02
-
-
-GPT-4
-0.89 ± 0.00
-0.71 ± 0.01
--
-0.39 ± 0.00
-0.89 ± 0.03
-0.77 ± 0.01
-0.57 ± 0.01
--
-0.44 ± 0.01
-0.77 ± 0.02
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.toxicity_detection %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.toxicity_detection %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.toxicity_detection %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/few-shot/translation.md b/_pages/ind/few-shot/translation.md
index d20d516..aee1200 100644
--- a/_pages/ind/few-shot/translation.md
+++ b/_pages/ind/few-shot/translation.md
@@ -3,124 +3,84 @@ layout: default
permalink: /leaderboard/ind/few-shot/translation
---
# Few-Shot Translation Leaderboard
+{% assign lang = 'ind' %}
- Models
- PhoMT
- OPUS100
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.translation %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- (En -> Vi)
- (Vi -> En)
- (En -> Vi)
- (Vi -> En)
- (En -> Vi)
- (Vi -> En)
- (En -> Vi)
- (Vi -> En)
+ {% for dataset in site.data.leaderboard[lang].few_shot.translation %}
+ BLEU envi↑
+ BLEU vien↑
+ hLEPOR envi↑
+ hLEPOR vien↑
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.28 ± 0.00
- 0.59 ± 0.00
- 0.27 ± 0.00
- 0.58 ± 0.00
- 0.10 ± 0.00
- 0.44 ± 0.01
- 0.14 ± 0.00
- 0.41 ± 0.01
-
-
- URA-LLaMa 13B
- 0.25 ± 0.00
- 0.55 ± 0.00
- 0.15 ± 0.00
- 0.56 ± 0.00
- 0.10 ± 0.01
- 0.41 ± 0.01
- 0.17 ± 0.01
- 0.43 ± 0.01
-
-
- URA-LLaMa 7B
- 0.19 ± 0.00
- 0.50 ± 0.00
- 0.22 ± 0.00
- 0.54 ± 0.00
- 0.08 ± 0.00
- 0.38 ± 0.01
- 0.14 ± 0.01
- 0.39 ± 0.01
-
-
- LLaMa-2 13B
- 0.23 ± 0.00
- 0.53 ± 0.00
- 0.23 ± 0.00
- 0.54 ± 0.00
- 0.09 ± 0.00
- 0.39 ± 0.01
- 0.14 ± 0.01
- 0.40 ± 0.01
-
-
- LLaMa-2 7B
- 0.18 ± 0.00
- 0.47 ± 0.00
- 0.21 ± 0.00
- 0.52 ± 0.00
- 0.07 ± 0.00
- 0.34 ± 0.00
- 0.11 ± 0.01
- 0.36 ± 0.01
-
-
- Vietcuna 7B
- 0.15 ± 0.00
- 0.35 ± 0.00
- 0.03 ± 0.00
- 0.11 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.05 ± 0.00
- 0.16 ± 0.00
-
-
- MixSUra 8x7B
- 0.15 ± -
- 0.51 ± -
- 0.16 ± -
- 0.52 ± -
- 0.07 ± -
- 0.37 ± -
- 0.09 ± -
- 0.36 ± -
-
-
- GPT-3.5
- 0.33 ± 0.00
- 0.65 ± 0.00
- 0.33 ± 0.00
- 0.63 ± 0.00
- 0.16 ± 0.01
- 0.50 ± 0.01
- 0.24 ± 0.01
- 0.51 ± 0.00
-
-
- GPT-4
- 0.33 ± 0.00
- 0.66 ± 0.00
- 0.34 ± 0.00
- 0.65 ± 0.00
- 0.17 ± 0.01
- 0.51 ± 0.01
- 0.25 ± 0.01
- 0.53 ± 0.00
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.translation %}
+ {% assign bleu_envi_best = 0 %}
+ {% assign bleu_vien_best = 0 %}
+ {% assign hlepor_envi_best = 0 %}
+ {% assign hlepor_vien_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m]["BLEU envi"] and dataset[1][m]["BLEU envi"] > bleu_envi_best %}
+ {% assign bleu_envi_best = dataset[1][m]["BLEU envi"] %}
+ {% endif %}
+ {% if dataset[1][m]["BLEU vien"] and dataset[1][m]["BLEU vien"] > bleu_vien_best %}
+ {% assign bleu_vien_best = dataset[1][m]["BLEU vien"] %}
+ {% endif %}
+ {% if dataset[1][m]["hLEPOR envi"] and dataset[1][m]["hLEPOR envi"] > hlepor_envi_best %}
+ {% assign hlepor_envi_best = dataset[1][m]["hLEPOR envi"] %}
+ {% endif %}
+ {% if dataset[1][m]["hLEPOR vien"] and dataset[1][m]["hLEPOR vien"] > hlepor_vien_best %}
+ {% assign hlepor_vien_best = dataset[1][m]["hLEPOR vien"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model]["BLEU envi"] %}
+ {{ dataset[1][model]["BLEU envi"] | round: 2 }} ± {{ dataset[1][model]["BLEU envi_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["BLEU vien"] %}
+ {{ dataset[1][model]["BLEU vien"] | round: 2 }} ± {{ dataset[1][model]["BLEU vien_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["hLEPOR envi"] %}
+ {{ dataset[1][model]["hLEPOR envi"] | round: 2 }} ± {{ dataset[1][model]["hLEPOR envi_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["hLEPOR vien"] %}
+ {{ dataset[1][model]["hLEPOR vien"] | round: 2 }} ± {{ dataset[1][model]["hLEPOR vien_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/ind/medium-prompt/question-answering.md b/_pages/ind/medium-prompt/question-answering.md
index 0731ae6..b8eaa5f 100644
--- a/_pages/ind/medium-prompt/question-answering.md
+++ b/_pages/ind/medium-prompt/question-answering.md
@@ -3,63 +3,60 @@ layout: default
permalink: /leaderboard/ind/medium-prompt/question-answering
---
# Medium-Prompt Question Answering Leaderboard
+{% assign lang = 'ind' %}
- Models
- XQuAD
- MLQA
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].medium_prompt.question_answering %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- EM↑
- F1↑
- EM↑
- F1↑
+ {% for dataset in site.data.leaderboard[lang].medium_prompt.question_answering %}
+ EM↑
+ F1↑
+ {% endfor %}
-
- URA-LLaMa 70B
- 0.08 ± 0.00
- 0.33 ± 0.00
- 0.07 ± 0.00
- 0.31 ± 0.00
-
-
- URA-LLaMa 13B
- 0.04 ± 0.00
- 0.21 ± 0.00
- 0.04 ± 0.00
- 0.19 ± 0.00
-
-
- URA-LLaMa 7B
- 0.01 ± 0.00
- 0.11 ± 0.00
- 0.01 ± 0.00
- 0.11 ± 0.00
-
-
- LLaMa-2 13B
- 0.00 ± 0.00
- 0.10 ± 0.00
- 0.00 ± 0.00
- 0.09 ± 0.00
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.03 ± 0.00
- 0.00 ± 0.00
- 0.03 ± 0.00
-
-
- MixSUra 8x7B
- 0.01 ± -
- 0.25 ± -
- 0.00 ± -
- 0.25 ± -
-
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].medium_prompt.question_answering %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/ind/medium-prompt/summarization.md b/_pages/ind/medium-prompt/summarization.md
index 78e4290..5d3185e 100644
--- a/_pages/ind/medium-prompt/summarization.md
+++ b/_pages/ind/medium-prompt/summarization.md
@@ -3,147 +3,132 @@ layout: default
permalink: /leaderboard/ind/medium-prompt/summarization
---
# Medium-Prompt Summarization Leaderboard
+{% assign lang = 'ind' %}
-
-
-Models
-VietNews
-WikiLingua
-
-
-R1↑
-R2↑
-RL↑
-SC↑
-BS↑
-Cv↑
-De↑
-Cp↑
-R1↑
-R2↑
-RL↑
-SC↑
-BS↑
-Cv↑
-De↑
-Cp↑
-
-
-
-
-URA-LLaMa 70B
-0.35 ± 0.00
-0.16 ± 0.00
-0.24 ± 0.00
--0.11 ± 0.00
-0.12 ± 0.00
-0.63 ± 0.00
-5.43 ± 0.02
-37.78 ± 0.47
-0.33 ± 0.00
-0.14 ± 0.00
-0.22 ± 0.00
--0.16± 0.00
-0.24± 0.10
-0.59 ± 0.01
-4.62 ± 0.11
-56.56 ± 1.70
-
-
-URA-LLaMa 13B
-0.26 ± 0.00
-0.12 ± 0.00
-0.17 ± 0.00
--0.09 ± 0.00
--0.08 ± 0.18
-0.46 ± 0.00
-3.55 ± 0.04
-47.75 ± 0.65
-0.14 ± 0.00
-0.05 ± 0.00
-0.09 ± 0.00
--0.16 ± 0.00
--0.14 ± 0.12
-0.26 ± 0.01
-1.83 ± 0.06
-60.10 ± 2.16
-
-
-URA-LLaMa 7B
-0.41 ± 0.00
-0.18 ± 0.00
-0.27 ± 0.00
--0.09 ± 0.00
--0.08 ± 0.13
-0.83 ± 0.00
-8.13 ± 0.04
-8.08 ± 0.17
-0.42 ± 0.00
-0.17 ± 0.00
-0.27 ± 0.00
--0.16 ± 0.00
-0.27 ± 0.21
-0.84 ± 0.00
-7.15 ± 0.08
-8.08 ± 0.36
-
-
-LLaMa-2 13B
-0.02 ± 0.00
-0.00 ± 0.00
-0.02 ± 0.00
--0.09 ± 0.00
--0.19 ± 0.05
-0.01 ± 0.00
-0.01 ± 0.00
-54.67 ± 0.16
-0.03 ± 0.00
-0.00 ± 0.00
-0.03 ± 0.00
--0.16 ± 0.00
--0.05 ± 0.03
-0.02 ± 0.00
-0.02 ± 0.00
-42.55 ± 0.81
-
-
-LLaMa-2 7B
-0.03 ± 0.00
-0.01 ± 0.00
-0.03 ± 0.00
--0.09 ± 0.00
--0.17 ± 0.03
-0.04 ± 0.00
-0.07 ± 0.00
-23.86 ± 0.26
-0.02 ± 0.00
-0.00 ± 0.00
-0.02 ± 0.00
--0.16 ± 0.00
--0.04 ± 0.06
-0.02 ± 0.00
-0.03 ± 0.00
-40.31 ± 0.88
-
-
-MixSUra 8x7B
-0.06 ± -
-0.01 ± -
-0.04 ± -
-- ± -
--0.13 ± -
-0.10 ± -
-0.17 ± -
-9.03 ± -
-0.03 ± -
-0.00 ± -
-0.03 ± -
-- ± -
--0.01 ± -
-0.17 ± -
-0.26 ± -
-16.68 ± -
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].medium_prompt.summarization %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].medium_prompt.summarization %}
+ R1↑
+ R2↑
+ RL↑
+ SC↑
+ BS↑
+ Cv↑
+ De↑
+ Cp↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].medium_prompt.summarization %}
+ {% assign R1_best = 0 %}
+ {% assign R2_best = 0 %}
+ {% assign RL_best = 0 %}
+ {% assign SC_best = -1 %}
+ {% assign BS_best = 0 %}
+ {% assign Cv_best = 0 %}
+ {% assign De_best = 0 %}
+ {% assign Cp_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].R1 and dataset[1][m].R1 > R1_best %}
+ {% assign R1_best = dataset[1][m].R1 %}
+ {% endif %}
+ {% if dataset[1][m].R2 and dataset[1][m].R2 > R2_best %}
+ {% assign R2_best = dataset[1][m].R2 %}
+ {% endif %}
+ {% if dataset[1][m].RL and dataset[1][m].RL > RL_best %}
+ {% assign RL_best = dataset[1][m].RL %}
+ {% endif %}
+ {% if dataset[1][m].SC and dataset[1][m].SC > SC_best %}
+ {% assign SC_best = dataset[1][m].SC %}
+ {% endif %}
+ {% if dataset[1][m].BS and dataset[1][m].BS > BS_best %}
+ {% assign BS_best = dataset[1][m].BS %}
+ {% endif %}
+ {% if dataset[1][m].Cv and dataset[1][m].Cv > Cv_best %}
+ {% assign Cv_best = dataset[1][m].Cv %}
+ {% endif %}
+ {% if dataset[1][m].De and dataset[1][m].De > De_best %}
+ {% assign De_best = dataset[1][m].De %}
+ {% endif %}
+ {% if dataset[1][m].Cp and dataset[1][m].Cp > Cp_best %}
+ {% assign Cp_best = dataset[1][m].Cp %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].R1 %}
+ {{ dataset[1][model].R1 | round: 2 }} ± {{ dataset[1][model].R1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].R2 %}
+ {{ dataset[1][model].R2 | round: 2 }} ± {{ dataset[1][model].R2_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].RL %}
+ {{ dataset[1][model].RL | round: 2 }} ± {{ dataset[1][model].RL_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SC %}
+ {{ dataset[1][model].SC | round: 2 }} ± {{ dataset[1][model].SC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].BS %}
+ {{ dataset[1][model].BS | round: 2 }} ± {{ dataset[1][model].BS_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cv %}
+ {{ dataset[1][model].Cv | round: 2 }} ± {{ dataset[1][model].Cv_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].De %}
+ {{ dataset[1][model].De | round: 2 }} ± {{ dataset[1][model].De_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cp %}
+ {{ dataset[1][model].Cp | round: 2 }} ± {{ dataset[1][model].Cp_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/randomized-choice/knowledge.md b/_pages/ind/randomized-choice/knowledge.md
index b2f19ef..bbf8f5f 100644
--- a/_pages/ind/randomized-choice/knowledge.md
+++ b/_pages/ind/randomized-choice/knowledge.md
@@ -4,90 +4,96 @@ permalink: /leaderboard/ind/randomized-choice/knowledge
---
# Randomized-Choice Knowledge Leaderboard
+{% assign lang = 'ind' %}
- Models
- AC ↑
- F1 ↑
- AR ↑
- ECE ↓
- A@10 ↑
-
-
-
-
- Our 70B
- 0.76 ± 0.02
- 0.76 ± 0.02
- 0.78 ± 0.01
- 0.14 ± 0.02
- 0.94 ± 0.04
-
-
- Our 13B
- 0.62 ± 0.02
- 0.62 ± 0.02
- 0.61 ± 0.02
- 0.15 ± 0.02
- 0.67 ± 0.07
-
-
- Our 7B
- 0.45 ± 0.02
- 0.36 ± 0.02
- 0.57 ± 0.02
- 0.10 ± 0.02
- 0.45 ± 0.07
-
-
- LLaMa-2 13B
- 0.57 ± 0.02
- 0.57 ± 0.02
- 0.57 ± 0.02
- 0.29 ± 0.02
- 0.75 ± 0.07
-
-
- LLaMa-2 7B
- 0.36 ± 0.02
- 0.27 ± 0.02
- 0.56 ± 0.02
- 0.37 ± 0.02
- 0.44 ± 0.07
-
-
- Vietcuna 7B
- 0.26 ± 0.02
- 0.15 ± 0.01
- 0.50 ± 0.00
- 0.01 ± 0.01
- 0.26 ± 0.06
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].randomized_choice.knowledge %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- MixSUra 7B
- 0.61 ± -
- 0.61 ± -
- 0.54 ± -
- 0.31 ± -
- 0.65 ± -
-
-
- GPT-3.5
- 0.92 ± 0.01
- 0.74 ± 0.04
- -
- 0.67 ± 0.01
- 0.92 ± 0.04
+ {% for dataset in site.data.leaderboard[lang].randomized_choice.knowledge %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
- GPT-4
- 0.92 ± 0.01
- 0.74 ± 0.04
- -
- 0.67 ± 0.01
- 0.92 ± 0.04
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].randomized_choice.knowledge %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/ind/robustness-aware/information-retrieval.md b/_pages/ind/robustness-aware/information-retrieval.md
index 0af94b0..87f6914 100644
--- a/_pages/ind/robustness-aware/information-retrieval.md
+++ b/_pages/ind/robustness-aware/information-retrieval.md
@@ -3,113 +3,84 @@ layout: default
permalink: /leaderboard/ind/robustness-aware/information-retrieval
---
# Robustness-Aware Information Retrieval Leaderboard
+{% assign lang = 'ind' %}
-
-
- Models
- mMARCO
- mRobust04
-
-
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
-
-
-
-
- URA-LLaMa 70B
-
-
-
-
-
-
-
-
-
-
- URA-LLaMa 13B
-
-
-
-
-
-
-
-
-
-
- URA-LLaMa 7B
- 0.05 ± 0.00
- 0.11 ± 0.00
- 0.07 ± 0.00
- 0.17 ± 0.00
- -
- -
- -
- -
-
-
- LLaMa-2 13B
- 0.06 ± 0.00
- 0.13 ± 0.00
- 0.19 ± 0.00
- 0.19 ± 0.00
-
-
-
-
-
-
- LLaMa-2 7B
- 0.05 ± 0.00
- 0.11 ± 0.00
- 0.08 ± 0.00
- 0.16 ± 0.00
- -
- -
- -
- -
-
-
- Vietcuna 7B
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- GPT-3.5
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- GPT-4
- -
- -
- -
- -
- -
- -
- -
- -
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.information_retrieval %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.information_retrieval %}
+ M@10↑
+ M@10B↑
+ N@10↑
+ N@10B↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.information_retrieval %}
+ {% assign M10_best = 0 %}
+ {% assign M10B_best = 0 %}
+ {% assign N10_best = 0 %}
+ {% assign N10B_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m]["M@10"] and dataset[1][m]["M@10"] > M10_best %}
+ {% assign M10_best = dataset[1][m]["M@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["M@10B"] and dataset[1][m]["M@10B"] > M10B_best %}
+ {% assign M10B_best = dataset[1][m]["M@10B"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10"] and dataset[1][m]["N@10"] > N10_best %}
+ {% assign N10_best = dataset[1][m]["N@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10B"] and dataset[1][m]["N@10B"] > N10B_best %}
+ {% assign N10B_best = dataset[1][m]["N@10B"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model]["M@10"] %}
+ {{ dataset[1][model]["M@10"] | round: 2 }} ± {{ dataset[1][model]["M@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["M@10B"] %}
+ {{ dataset[1][model]["M@10B"] | round: 2 }} ± {{ dataset[1][model]["M@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10"] %}
+ {{ dataset[1][model]["N@10"] | round: 2 }} ± {{ dataset[1][model]["N@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10B"] %}
+ {{ dataset[1][model]["N@10B"] | round: 2 }} ± {{ dataset[1][model]["N@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/robustness-aware/knowledge.md b/_pages/ind/robustness-aware/knowledge.md
index 4a390d8..7005673 100644
--- a/_pages/ind/robustness-aware/knowledge.md
+++ b/_pages/ind/robustness-aware/knowledge.md
@@ -3,114 +3,128 @@ layout: default
permalink: /leaderboard/ind/robustness-aware/knowledge
---
# Robustness-Aware Knowledge Leaderboard
+{% assign lang = 'ind' %}
-
-
- Models
- ZaloE2E
- ViMMRC
-
-
- EM↑
- F1↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.23 ± 0.00
- 0.37 ± 0.00
- 0.65 ± 0.00
- 0.53 ± 0.00
- 0.84 ± 0.00
- 0.11 ± 0.00
- 0.77 ± 0.00
-
-
- URA-LLaMa 13B
- 0.18 ± 0.00
- 0.30 ± 0.00
- 0.41 ± 0.00
- 0.34 ± 0.00
- 0.61 ± 0.00
- 0.22 ± 0.00
- 0.58 ± 0.00
-
-
- URA-LLaMa 7B
- 0.10 ± 0.00
- 0.18 ± 0.00
- 0.33 ± 0.02
- 0.28 ± 0.02
- 0.61 ± 0.01
- 0.19 ± 0.02
- 0.33 ± 0.06
-
-
- LLaMa-2 13B
- 0.13 ± 0.00
- 0.21 ± 0.00
- 0.39 ± 0.00
- 0.31 ± 0.00
- 0.56 ± 0.00
- 0.46 ± 0.00
- 0.33 ± 0.00
-
-
- LLaMa-2 7B
- 0.02 ± 0.00
- 0.05 ± 0.00
- 0.26 ± 0.01
- 0.20 ± 0.01
- 0.51 ± 0.01
- 0.46 ± 0.01
- 0.13 ± 0.03
-
-
- Vietcuna 7B
- 0.05 ± 0.00
- 0.15 ± 0.00
- 0.26 ± 0.01
- 0.14 ± 0.00
- 0.50 ± 0.00
- 0.01 ± 0.01
- 0.21 ± 0.07
-
-
- MixSUra 8x7B
- 0.13 ± -
- 0.24 ± -
- 0.57 ± -
- 0.45 ± -
- 0.53 ± -
- 0.35 ± -
- 0.58 ± -
-
-
- GPT-3.5
- 0.45 ± 0.01
- 0.61 ± 0.01
- 0.90 ± 0.01
- 0.72 ± 0.04
- -
- 0.65 ± 0.01
- 0.88 ± 0.07
-
-
- GPT-4
- 0.44 ± 0.01
- 0.61 ± 0.01
- 0.91 ± 0.01
- 0.73 ± 0.07
- -
- 0.66 ± 0.07
- 0.88 ± 0.04
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.knowledge %}
+ {% if dataset[1].num_fields == 2 %}
+
+ {{ dataset[0] }}
+
+ {% else %}
+
+ {{ dataset[0] }}
+
+ {% endif %}
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.knowledge %}
+ {% if dataset[1].num_fields == 2 %}
+ EM↑
+ F1↑
+ {% else %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endif %}
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.knowledge %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AC_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+ {% if dataset[1].num_fields == 2 %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% else %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endif %}
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/robustness-aware/question-answering.md b/_pages/ind/robustness-aware/question-answering.md
index 334e0ac..4a135b6 100644
--- a/_pages/ind/robustness-aware/question-answering.md
+++ b/_pages/ind/robustness-aware/question-answering.md
@@ -3,84 +3,60 @@ layout: default
permalink: /leaderboard/ind/robustness-aware/question-answering
---
# Robustness-Aware Question Answering Leaderboard
+{% assign lang = 'ind' %}
- Models
- XQuAD
- MLQA
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.question_answering %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- EM↑
- F1↑
- EM↑
- F1↑
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.question_answering %}
+ EM↑
+ F1↑
+ {% endfor %}
-
- URA-LLaMa 70B
- 0.01 ± 0.00
- 0.17 ± 0.00
- 0.01 ± 0.00
- 0.18 ± 0.00
-
-
- URA-LLaMa 13B
- 0.00 ± 0.00
- 0.09 ± 0.00
- 0.00 ± 0.00
- 0.10 ± 0.00
-
-
- URA-LLaMa 7B
- 0.00 ± 0.00
- 0.09 ± 0.00
- 0.00 ± 0.00
- 0.10 ± 0.00
-
-
- LLaMa-2 13B
- 0.00 ± 0.00
- 0.02 ± 0.00
- 0.00 ± 0.00
- 0.03 ± 0.00
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.02 ± 0.00
- 0.00 ± 0.00
- 0.02 ± 0.00
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 0.06 ± 0.00
- 0.00 ± 0.00
- 0.05 ± 0.00
-
-
- MixSUra 8x7B
- 0.00 ± -
- 0.11 ± -
- 0.00 ± -
- 0.12 ± -
-
-
- GPT-3.5
- 0.00 ± 0.00
- 0.19 ± 0.00
- 0.00 ± 0.00
- 0.20 ± 0.00
-
-
- GPT-4
- 0.00 ± 0.00
- 0.24 ± 0.00
- 0.00 ± 0.00
- 0.25 ± 0.00
-
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.question_answering %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/ind/robustness-aware/sentiment-analysis.md b/_pages/ind/robustness-aware/sentiment-analysis.md
index 7741871..343ad5b 100644
--- a/_pages/ind/robustness-aware/sentiment-analysis.md
+++ b/_pages/ind/robustness-aware/sentiment-analysis.md
@@ -3,144 +3,96 @@ layout: default
permalink: /leaderboard/ind/robustness-aware/sentiment-analysis
---
# Robustness-Aware Sentiment Analysis Leaderboard
+{% assign lang = 'ind' %}
-
-
- Models
- VLSP 2016
- UiT-VSFC
-
-
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.63 ± 0.01
- 0.48 ± 0.01
- 0.60 ± 0.01
- 0.09 ± 0.01
- 0.83 ± 0.04
- 0.71 ± 0.01
- 0.45 ± 0.01
- 0.80 ± 0.01
- 0.08 ± 0.01
- 0.99 ± 0.01
-
-
- URA-LLaMa 13B
- 0.55 ± 0.02
- 0.52 ± 0.02
- 0.59 ± 0.01
- 0.06 ± 0.01
- 0.74 ± 0.05
- 0.72 ± 0.01
- 0.44 ± 0.05
- 0.77 ± 0.01
- 0.18 ± 0.01
- 0.77 ± 0.02
-
-
- URA-LLaMa 7B
- 0.52 ± 0.02
- 0.36 ± 0.03
- 0.59 ± 0.01
- 0.07 ± 0.01
- 0.66 ± 0.05
- 0.73 ± 0.01
- 0.41 ± 0.01
- 0.71 ± 0.01
- 0.16 ± 0.01
- 0.87 ± 0.02
-
-
- LLaMa-2 13B
- 0.46 ± 0.02
- 0.30 ± 0.01
- 0.55 ± 0.01
- 0.39 ± 0.02
- 0.70 ± 0.05
- 0.66 ± 0.01
- 0.40 ± 0.01
- 0.63 ± 0.01
- 0.11 ± 0.01
- 0.89 ± 0.02
-
-
- LLaMa-2 7B
- 0.45 ± 0.02
- 0.36 ± 0.01
- 0.54 ± 0.01
- 0.20 ± 0.02
- 0.51 ± 0.05
- 0.51 ± 0.01
- 0.33 ± 0.01
- 0.65 ± 0.01
- 0.15 ± 0.01
- 0.80 ± 0.02
-
-
- Vietcuna 7B
- 0.44 ± 0.02
- 0.27 ± 0.01
- 0.51 ± 0.01
- 0.23 ± 0.02
- 0.53 ± 0.05
- 0.49 ± 0.01
- 0.25 ± 0.03
- 0.46 ± 0.01
- 0.33 ± 0.01
- 0.34 ± 0.03
-
-
- MixSUra 8x7B
- 0.59 ± -
- 0.59 ± -
- 0.55 ± -
- 0.34 ± -
- 0.52 ± -
- 0.69 ± -
- 0.44 ± -
- 0.61 ± -
- 0.29 ± -
- 0.66 ± -
-
-
- GPT-3.5
- 0.64 ± 0.01
- 0.60 ± 0.01
- -
- 0.31 ± 0.01
- 0.54 ± 0.05
- 0.86 ± 0.01
- 0.71 ± 0.01
- -
- 0.53 ± 0.01
- 0.86 ± 0.02
-
-
- GPT-4
- 0.74 ± 0.00
- 0.73 ± 0.00
- -
- 0.41 ± 0.00
- 0.71 ± 0.00
- 0.83 ± 0.00
- 0.70 ± 0.00
- -
- 0.50 ± 0.00
- 0.85 ± 0.00
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.sentiment_analysis %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.sentiment_analysis %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.sentiment_analysis %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/robustness-aware/summarization.md b/_pages/ind/robustness-aware/summarization.md
index 1fe681e..a5771c6 100644
--- a/_pages/ind/robustness-aware/summarization.md
+++ b/_pages/ind/robustness-aware/summarization.md
@@ -3,204 +3,132 @@ layout: default
permalink: /leaderboard/ind/robustness-aware/summarization
---
# Robustness-Aware Summarization Leaderboard
+{% assign lang = 'ind' %}
-
-
- Models
- VietNews
- WikiLingua
-
-
- R1↑
- R2↑
- RL↑
- SC↑
- BS↑
- Cv↑
- De↑
- Cp↑
- R1↑
- R2↑
- RL↑
- SC↑
- BS↑
- Cv↑
- De↑
- Cp↑
-
-
-
-
- URA-LLaMa 70B
- 0.34 ± 0.00
- 0.15 ± 0.00
- 0.23 ± 0.00
- -0.06 ± 0.00
- -0.11 ± 0.18
- 0.10 ± 0.00
- 0.10 ± 0.00
- 39.63 ± 0.87
- 0.28 ± 0.00
- 0.11 ± 0.00
- 0.19 ± 0.00
- -0.16 ± 0.00
- 0.25 ± 0.23
- 0.50 ± 0.01
- 0.50 ± 0.01
- 167.42 ± 7.09
-
-
- URA-LLaMa 13B
- 0.35 ± 0.00
- 0.14 ± 0.00
- 0.23 ± 0.00
- -0.09 ± 0.00
- -0.07 ± 0.17
- 0.64 ± 0.00
- 0.65 ± 0.00
- 134.65 ± 3.76
- 0.20 ± 0.00
- 0.07 ± 0.00
- 0.13 ± 0.00
- -0.17 ± 0.00
- 0.20 ± 0.11
- 0.38 ± 0.00
- 0.38 ± 0.00
- 103.69 ± 3.33
-
-
- URA-LLaMa 7B
- 0.37 ± 0.00
- 0.12 ± 0.00
- 0.24 ± 0.00
- -0.10 ± 0.00
- -0.24 ± 0.18
- 0.65 ± 0.00
- 0.65 ± 0.00
- 17.92 ± 0.87
- 0.37 ± 0.00
- 0.12 ± 0.00
- 0.24 ± 0.00
- -0.17 ± 0.00
- 0.11 ± 0.18
- 0.65 ± 0.00
- 0.65 ± 0.00
- 20.49 ± 0.95
-
-
- LLaMa-2 13B
- 0.05 ± 0.00
- 0.01 ± 0.00
- 0.04 ± 0.00
- -0.15 ± 0.00
- -0.24 ± 0.18
- 0.03 ± 0.00
- 0.03 ± 0.00
- 55.91 ± 0.65
- 0.04 ± 0.00
- 0.00 ± 0.00
- 0.03 ± 0.00
- -0.17 ± 0.00
- 0.09 ± 0.00
- 0.05 ± 0.00
- 0.05 ± 0.00
- 66.85 ± 6.72
-
-
- LLaMa-2 7B
- 0.05 ± 0.00
- 0.01 ± 0.00
- 0.05 ± 0.00
- -0.10 ± 0.00
- -0.19 ± 0.04
- 0.07 ± 0.00
- 0.07 ± 0.00
- 55.29 ± 0.88
- 0.04 ± 0.00
- 0.00 ± 0.00
- 0.04 ± 0.00
- -0.17 ± 0.00
- 0.15 ± 0.00
- 0.06 ± 0.00
- 0.06 ± 0.00
- 58.32 ± 3.32
-
-
- Vietcuna 7B
- 0.03 ± 0.00
- 0.01 ± 0.00
- 0.02 ± 0.00
- -0.10 ± 0.00
- -0.18 ± 0.06
- 0.91 ± 0.00
- 0.91 ± 0.00
- 1026.61 ± 3.86
- 0.08 ± 0.00
- 0.02 ± 0.00
- 0.05 ± 0.00
- -0.17 ± 0.00
- -0.19 ± 0.05
- 0.78 ± 0.00
- 0.78 ± 0.00
- 505.45 ± 8.64
-
-
- MixSUra 8x7B
- 0.41 ± -
- 0.19 ± -
- 0.26 ± -
- - ± -
- -0.03 ± -
- 0.86 ± -
- 0.87 ± -
- 29.15 ± -
- 0.46 ± -
- 0.21 ± -
- 0.28 ± -
- - ± -
- 0.26 ± -
- 0.88 ± -
- 0.98 ± -
- 19.10 ± -
-
-
- GPT-3.5
- 0.34 ± 0.00
- 0.19 ± 0.00
- 0.23 ± 0.00
- -0.10 ± 0.00
- 0.05 ± 0.14
- 0.81 ± 0.00
- 0.81 ± 0.00
- 128.44 ± 2.94
- 0.39 ± 0.00
- 0.19 ± 0.00
- 0.25 ± 0.00
- -0.17 ± 0.00
- 0.28 ± 0.11
- 0.82 ± 0.00
- 0.82 ± 0.00
- 200.90 ± 7.40
-
-
- GPT-4
- 0.39 ± 0.00
- 0.21 ± 0.00
- 0.26 ± 0.00
- -0.10 ± 0.09
- 0.04 ± 0.00
- 0.83 ± 0.00
- 0.83 ± 0.71
- 24.48 ± 0.00
- 0.45 ± 0.00
- 0.20 ± 0.00
- 0.27 ± 0.00
- -0.17 ± 0.00
- 0.28 ± 0.00
- 0.80 ± 0.03
- 0.81 ± 0.00
- 20.40 ± 1.59
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.summarization %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.summarization %}
+ R1↑
+ R2↑
+ RL↑
+ SC↑
+ BS↑
+ Cv↑
+ De↑
+ Cp↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.summarization %}
+ {% assign R1_best = 0 %}
+ {% assign R2_best = 0 %}
+ {% assign RL_best = 0 %}
+ {% assign SC_best = -1 %}
+ {% assign BS_best = 0 %}
+ {% assign Cv_best = 0 %}
+ {% assign De_best = 0 %}
+ {% assign Cp_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].R1 and dataset[1][m].R1 > R1_best %}
+ {% assign R1_best = dataset[1][m].R1 %}
+ {% endif %}
+ {% if dataset[1][m].R2 and dataset[1][m].R2 > R2_best %}
+ {% assign R2_best = dataset[1][m].R2 %}
+ {% endif %}
+ {% if dataset[1][m].RL and dataset[1][m].RL > RL_best %}
+ {% assign RL_best = dataset[1][m].RL %}
+ {% endif %}
+ {% if dataset[1][m].SC and dataset[1][m].SC > SC_best %}
+ {% assign SC_best = dataset[1][m].SC %}
+ {% endif %}
+ {% if dataset[1][m].BS and dataset[1][m].BS > BS_best %}
+ {% assign BS_best = dataset[1][m].BS %}
+ {% endif %}
+ {% if dataset[1][m].Cv and dataset[1][m].Cv > Cv_best %}
+ {% assign Cv_best = dataset[1][m].Cv %}
+ {% endif %}
+ {% if dataset[1][m].De and dataset[1][m].De > De_best %}
+ {% assign De_best = dataset[1][m].De %}
+ {% endif %}
+ {% if dataset[1][m].Cp and dataset[1][m].Cp > Cp_best %}
+ {% assign Cp_best = dataset[1][m].Cp %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].R1 %}
+ {{ dataset[1][model].R1 | round: 2 }} ± {{ dataset[1][model].R1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].R2 %}
+ {{ dataset[1][model].R2 | round: 2 }} ± {{ dataset[1][model].R2_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].RL %}
+ {{ dataset[1][model].RL | round: 2 }} ± {{ dataset[1][model].RL_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SC %}
+ {{ dataset[1][model].SC | round: 2 }} ± {{ dataset[1][model].SC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].BS %}
+ {{ dataset[1][model].BS | round: 2 }} ± {{ dataset[1][model].BS_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cv %}
+ {{ dataset[1][model].Cv | round: 2 }} ± {{ dataset[1][model].Cv_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].De %}
+ {{ dataset[1][model].De | round: 2 }} ± {{ dataset[1][model].De_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cp %}
+ {{ dataset[1][model].Cp | round: 2 }} ± {{ dataset[1][model].Cp_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/robustness-aware/text-classification.md b/_pages/ind/robustness-aware/text-classification.md
index 07c0e7e..7ae34de 100644
--- a/_pages/ind/robustness-aware/text-classification.md
+++ b/_pages/ind/robustness-aware/text-classification.md
@@ -3,144 +3,96 @@ layout: default
permalink: /leaderboard/ind/robustness-aware/text-classification
---
# Robustness-Aware Text Classification Leaderboard
+{% assign lang = 'ind' %}
-
-
- Models
- UiT-VSMEC
- PhoATIS
-
-
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.25 ± 0.00
- 0.16 ± 0.00
- 0.56 ± 0.02
- 0.20 ± 0.00
- 0.33 ± 0.00
- 0.16 ± 0.02
- 0.26 ± 0.03
- 0.79 ± 0.00
- 0.79 ± 0.02
- 0.08 ± 0.06
-
-
- URA-LLaMa 13B
- 0.30 ± 0.00
- 0.11 ± 0.00
- 0.51 ± 0.01
- 0.26 ± 0.00
- 0.44 ± 0.00
- 0.01 ± 0.01
- 0.05 ± 0.01
- 0.47 ± 0.01
- 0.84 ± 0.01
- 0.00 ± 0.04
-
-
- URA-LLaMa 7B
- 0.29 ± 0.00
- 0.10 ± 0.00
- 0.57 ± 0.01
- 0.17 ± 0.00
- 0.30 ± 0.00
- 0.02 ± 0.01
- 0.04 ± 0.00
- 0.55 ± 0.01
- 0.18 ± 0.01
- 0.01 ± 0.02
-
-
- LLaMa-2 13B
- 0.19 ± 0.00
- 0.07 ± 0.00
- 0.52 ± 0.01
- 0.47 ± 0.00
- 0.43 ± 0.00
- 0.02 ± 0.00
- 0.06 ± 0.00
- 0.57 ± 0.01
- 0.91 ± 0.00
- 0.01 ± 0.00
-
-
- LLaMa-2 7B
- 0.17 ± 0.00
- 0.10 ± 0.00
- 0.55 ± 0.00
- 0.33 ± 0.00
- 0.29 ± 0.00
- 0.01 ± 0.01
- 0.00 ± 0.00
- 0.56 ± 0.00
- 0.69 ± 0.01
- 0.02 ± 0.02
-
-
- Vietcuna 7B
- 0.09 ± 0.00
- 0.09 ± 0.00
- 0.51 ± 0.01
- 0.91 ± 0.00
- 0.09 ± 0.00
- 0.02 ± 0.01
- 0.01 ± 0.00
- 0.55 ± 0.01
- 0.23 ± 0.01
- 0.02 ± 0.01
-
-
- MixSUra 8x7B
- 0.35 ± -
- 0.27 ± -
- 0.70 ± -
- 0.58 ± -
- 0.70 ± -
- 0.80 ± -
- 55 ± -
- 0.94 ± -
- 0.15 ± -
- 0.88 ± -
-
-
- GPT-3.5
- 0.42 ± 0.00
- 0.41 ± 0.00
- -
- 0.28 ± 0.00
- 0.30 ± 0.00
- 0.68 ± 0.02
- 0.64 ± 0.03
- -
- 0.62 ± 0.02
- 0.70 ± 0.05
-
-
- GPT-4
- 0.48 ± 0.00
- 0.45 ± 0.00
- -
- 0.33 ± 0.00
- 0.40 ± 0.00
- 0.86 ± 0.01
- 0.80 ± 0.02
- -
- 0.80 ± 0.01
- 0.91 ± 0.03
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.text_classification %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.text_classification %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.text_classification %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/robustness-aware/toxicity-detection.md b/_pages/ind/robustness-aware/toxicity-detection.md
index e0ac7b4..ce60db1 100644
--- a/_pages/ind/robustness-aware/toxicity-detection.md
+++ b/_pages/ind/robustness-aware/toxicity-detection.md
@@ -3,144 +3,96 @@ layout: default
permalink: /leaderboard/ind/robustness-aware/toxicity-detection
---
# Robustness-Aware Toxicity Detection Leaderboard
+{% assign lang = 'ind' %}
-
-
- Models
- UiT-ViCTSD
- UiT-ViHSD
-
-
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.32 ± 0.00
- 0.21 ± 0.00
- 0.72 ± 0.01
- 0.62 ± 0.00
- 0.33 ± 0.00
- 0.14 ± 0.00
- 0.12 ± 0.00
- 0.64 ± 0.02
- 0.61 ± 0.00
- 0.23 ± 0.00
-
-
- URA-LLaMa 13B
- 0.27 ± 0.00
- 0.26 ± 0.00
- 0.56 ± 0.00
- 0.56 ± 0.00
- 0.12 ± 0.00
- 0.18 ± 0.00
- 0.11 ± 0.00
- 0.57 ± 0.01
- 0.45 ± 0.00
- 0.20 ± 0.00
-
-
- URA-LLaMa 7B
- 0.22 ± 0.00
- 0.21 ± 0.00
- 0.63 ± 0.00
- 0.39 ± 0.00
- 0.36 ± 0.00
- 0.12 ± 0.00
- 0.07 ± 0.00
- 0.62 ± 0.00
- 0.38 ± 0.00
- 0.19 ± 0.00
-
-
- LLaMa-2 13B
- 0.12 ± 0.00
- 0.11 ± 0.00
- 0.56 ± 0.01
- 0.66 ± 0.00
- 0.12 ± 0.00
- 0.10 ± 0.00
- 0.07 ± 0.00
- 0.59 ± 0.01
- 0.62 ± 0.00
- 0.24 ± 0.00
-
-
- LLaMa-2 7B
- 0.04 ± 0.00
- 0.04 ± 0.00
- 0.62 ± 0.00
- 0.86 ± 0.00
- 0.02 ± 0.00
- 0.01 ± 0.00
- 0.00 ± 0.00
- 0.54 ± 0.00
- 0.79 ± 0.00
- 0.00 ± 0.00
-
-
- Vietcuna 7B
- 0.11 ± 0.00
- 0.11 ± 0.00
- 0.54 ± 0.00
- 0.39 ± 0.00
- 0.13 ± 0.00
- 0.09 ± 0.00
- 0.05 ± 0.00
- 0.5 ± 0.00
- 0.24 ± 0.00
- 0.08 ± 0.00
-
-
- MixSUra 8x7B
- 0.72 ± -
- 0.39 ± -
- - ± -
- 0.25 ± -
- 0.81 ± -
- 0.66 ± -
- 0.31 ± -
- 0.67 ± -
- 0.21 ± -
- 0.82 ± -
-
-
- GPT-3.5
- 0.51 ± 0.00
- 0.46 ± 0.00
- 0.5 ± 0.00
- 0.01 ± 0.00
- 0.54 ± 0.00
- 0.64 ± 0.00
- 0.47 ± 0.00
- - ± -
- 0.30 ± 0.00
- 0.63 ± 0.00
-
-
- GPT-4
- 0.88 ± 0.00
- 0.71 ± 0.00
- - ± -
- 0.38 ± 0.00
- 0.88 ± 0.00
- 0.78 ± 0.00
- 0.56 ± 0.00
- - ± -
- 0.44 ± 0.00
- 0.78 ± 0.00
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.toxicity_detection %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.toxicity_detection %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.toxicity_detection %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/robustness-aware/translation.md b/_pages/ind/robustness-aware/translation.md
index bcacd2c..b2ae678 100644
--- a/_pages/ind/robustness-aware/translation.md
+++ b/_pages/ind/robustness-aware/translation.md
@@ -3,124 +3,84 @@ layout: default
permalink: /leaderboard/ind/robustness-aware/translation
---
# Robustness-Aware Translation Leaderboard
+{% assign lang = 'ind' %}
-
-
- Models
- PhoMT
- OPUS100
-
-
- (En → Vi)
- (Vi → En)
- (En → Vi)
- (Vi → En)
- (En → Vi)
- (Vi → En)
- (En → Vi)
- (Vi → En)
-
-
-
-
- URA-LLaMa 70B
- 0.25 ± 0.00
- 0.58 ± 0.00
- 0.11 ± 0.00
- 0.51 ± 0.00
- 0.05 ± 0.00
- 0.40 ± 0.01
- 0.06 ± 0.00
- 0.36 ± 0.00
-
-
- URA-LLaMa 13B
- 0.23 ± 0.00
- 0.55 ± 0.00
- 0.10 ± 0.00
- 0.50 ± 0.00
- 0.03 ± 0.00
- 0.38 ± 0.01
- 0.05 ± 0.00
- 0.38 ± 0.00
-
-
- URA-LLaMa 7B
- 0.15 ± 0.00
- 0.48 ± 0.00
- 0.06 ± 0.00
- 0.46 ± 0.00
- 0.02 ± 0.00
- 0.35 ± 0.00
- 0.03 ± 0.00
- 0.34 ± 0.01
-
-
- LLaMa-2 13B
- 0.20 ± 0.00
- 0.51 ± 0.00
- 0.07 ± 0.00
- 0.44 ± 0.00
- 0.03 ± 0.00
- 0.36 ± 0.01
- 0.04 ± 0.00
- 0.32 ± 0.00
-
-
- LLaMa-2 7B
- 0.13 ± 0.00
- 0.41 ± 0.00
- 0.05 ± 0.00
- 0.42 ± 0.00
- 0.02 ± 0.00
- 0.31 ± 0.00
- 0.03 ± 0.00
- 0.30 ± 0.00
-
-
- Vietcuna 7B
- 0.17 ± 0.00
- 0.43 ± 0.00
- 0.07 ± 0.01
- 0.41 ± 0.00
- 0.09 ± 0.01
- 0.38 ± 0.01
- 0.09 ± 0.01
- 0.33 ± 0.00
-
-
- MixSUra 8x7B
- 0.14 ± -
- 0.50 ± -
- 0.11 ± -
- 0.46 ± -
- 0.06 ± -
- 0.36 ± -
- 0.06 ± -
- 0.31 ± -
-
-
- GPT-3.5
- 0.31 ± 0.00
- 0.64 ± 0.00
- 0.17 ± 0.00
- 0.59 ± 0.00
- 0.15 ± 0.01
- 0.49 ± 0.01
- 0.21 ± 0.01
- 0.48 ± 0.00
-
-
- GPT-4
- 0.31 ± 0.00
- 0.65 ± 0.00
- 0.20 ± 0.00
- 0.62 ± 0.00
- 0.16 ± 0.01
- 0.50 ± 0.01
- 0.23 ± 0.01
- 0.51 ± 0.00
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.translation %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.translation %}
+ BLEU envi↑
+ BLEU vien↑
+ hLEPOR envi↑
+ hLEPOR vien↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.translation %}
+ {% assign bleu_envi_best = 0 %}
+ {% assign bleu_vien_best = 0 %}
+ {% assign hlepor_envi_best = 0 %}
+ {% assign hlepor_vien_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m]["BLEU envi"] and dataset[1][m]["BLEU envi"] > bleu_envi_best %}
+ {% assign bleu_envi_best = dataset[1][m]["BLEU envi"] %}
+ {% endif %}
+ {% if dataset[1][m]["BLEU vien"] and dataset[1][m]["BLEU vien"] > bleu_vien_best %}
+ {% assign bleu_vien_best = dataset[1][m]["BLEU vien"] %}
+ {% endif %}
+ {% if dataset[1][m]["hLEPOR envi"] and dataset[1][m]["hLEPOR envi"] > hlepor_envi_best %}
+ {% assign hlepor_envi_best = dataset[1][m]["hLEPOR envi"] %}
+ {% endif %}
+ {% if dataset[1][m]["hLEPOR vien"] and dataset[1][m]["hLEPOR vien"] > hlepor_vien_best %}
+ {% assign hlepor_vien_best = dataset[1][m]["hLEPOR vien"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model]["BLEU envi"] %}
+ {{ dataset[1][model]["BLEU envi"] | round: 2 }} ± {{ dataset[1][model]["BLEU envi_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["BLEU vien"] %}
+ {{ dataset[1][model]["BLEU vien"] | round: 2 }} ± {{ dataset[1][model]["BLEU vien_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["hLEPOR envi"] %}
+ {{ dataset[1][model]["hLEPOR envi"] | round: 2 }} ± {{ dataset[1][model]["hLEPOR envi_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["hLEPOR vien"] %}
+ {{ dataset[1][model]["hLEPOR vien"] | round: 2 }} ± {{ dataset[1][model]["hLEPOR vien_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/weaker-prompt/question-answering.md b/_pages/ind/weaker-prompt/question-answering.md
index 46bc765..ecadfab 100644
--- a/_pages/ind/weaker-prompt/question-answering.md
+++ b/_pages/ind/weaker-prompt/question-answering.md
@@ -3,63 +3,60 @@ layout: default
permalink: /leaderboard/ind/weaker-prompt/question-answering
---
# Weak-Prompt Question Answering Leaderboard
+{% assign lang = 'ind' %}
-
-
- Models
- XQuAD
- MLQA
-
-
- EM↑
- F1↑
- EM↑
- F1↑
-
-
-
-
- URA-LLaMa 70B
- 0.21 ± 0.01
- 0.47 ± 0.01
- 0.14 ± 0.01
- 0.41 ± 0.00
-
-
- URA-LLaMa 13B
- 0.22 ± 0.01
- 0.43 ± 0.01
- 0.17 ± 0.01
- 0.40 ± 0.01
-
-
- URA-LLaMa 7B
- 0.13 ± 0.00
- 0.32 ± 0.00
- 0.10 ± 0.00
- 0.32 ± 0.00
-
-
- LLaMa-2 13B
- 0.04 ± 0.00
- 0.28 ± 0.00
- 0.04 ± 0.00
- 0.28 ± 0.00
-
-
- LLaMa-2 7B
- 0.06 ± 0.00
- 0.24 ± 0.00
- 0.05 ± 0.00
- 0.24 ± 0.00
-
-
- MixSUra 8x7b
- 0.13 ±-
- 0.38 ± -
- 0.09 ± -
- 0.36 ± -
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].weaker_prompt.question_answering %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].weaker_prompt.question_answering %}
+ EM↑
+ F1↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].weaker_prompt.question_answering %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/weaker-prompt/summarization.md b/_pages/ind/weaker-prompt/summarization.md
index 16f89b5..0968087 100644
--- a/_pages/ind/weaker-prompt/summarization.md
+++ b/_pages/ind/weaker-prompt/summarization.md
@@ -3,147 +3,132 @@ layout: default
permalink: /leaderboard/ind/weaker-prompt/summarization
---
# Weak-Prompt Summarization Leaderboard
+{% assign lang = 'ind' %}
-
-
-Models
-VietNews
-WikiLingua
-
-
-R1↑
-R2↑
-RL↑
-SC↑
-BS↑
-Cv↑
-De↑
-Cp↑
-R1↑
-R2↑
-RL↑
-SC↑
-BS↑
-Cv↑
-De↑
-Cp↑
-
-
-
-
-URA-LLaMa 70B
-0.49 ± 0.00
-0.23 ± 0.00
-0.31 ± 0.00
--0.08 ± 0.00
-0.05 ± 0.11
-0.89 ± 0.00
-8.90 ± 0.03
-18.48 ± 0.59
-0.47 ± 0.00
-0.20 ± 0.00
-0.29 ± 0.00
--0.16 ± 0.00
-0.19 ± 0.13
-0.86 ± 0.00
-6.83 ± 0.09
-25.30 ± 1.86
-
-
-URA-LLaMa 13B
-0.27 ± 0.00
-0.12 ± 0.00
-0.18 ± 0.00
--0.09 ± 0.00
-0.05 ± 0.11
-0.56 ± 0.00
-5.00 ± 0.04
-153.55 ± 0.99
-0.22 ± 0.00
-0.09 ± 0.00
-0.14 ± 0.00
--0.16 ± 0.00
-0.20 ± 0.007
-0.48 ± 0.00
-3.49 ± 0.04
-190.09 ± 4.92
-
-
-URA-LLaMa 7B
-0.45 ± 0.00
-0.21 ± 0.00
-0.29 ± 0.00
--0.08 ± 0.00
-0.03 ± 0.09
-0.91 ± 0.00
-9.43 ± 0.03
-6.42 ± 0.05
-0.42 ± 0.00
-0.18 ± 0.00
-0.27 ± 0.00
--0.16 ± 0.00
-0.07 ± 0.12
-0.89 ± 0.00
-7.58 ± 0.05
-7.14 ± 0.14
-
-
-LLaMa-2 13B
-0.45 ± 0.00
-0.22 ± 0.00
-0.29 ± 0.00
--0.09 ± 0.00
-0.00 ± 0.14
-0.92 ± 0.00
-9.49 ± 0.02
-8.46 ± 0.29
-0.47 ± 0.00
-0.22 ± 0.00
-0.29 ± 0.00
--0.16 ± 0.00
-0.34 ± 0.12
-0.92 ± 0.00
-9.39 ± 0.05
-17.94 ± 2.84
-
-
-LLaMa-2 7B
-0.36 ± 0.00
-0.17 ± 0.00
-0.23 ± 0.00
--0.09 ± 0.00
--0.15 ± 0.12
-0.69 ± 0.00
-6.35 ± 0.03
-7.59 ± 0.21
-0.45 ± 0.00
-0.20 ± 0.00
-0.27 ± 0.00
--0.16 ± 0.00
-0.36 ± 0.00
-0.83 ± 0.00
-7.71 ± 0.07
-12.39 ± 1.46
-
-
-MixSUra 8x7B
-0.44 ± -
-0.22 ± -
-0.29 ± -
-- ± -
-0.07 ± -
-0.97 ± -
-35.67 ± -
-9.43 ± -
-0.47 ± -
-0.22 ± -
-0.29 ± -
-- ± -
-0.19 ± -
-0.97 ± -
-28.97 ± -
-10.27 ± -
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].weaker_prompt.summarization %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].weaker_prompt.summarization %}
+ R1↑
+ R2↑
+ RL↑
+ SC↑
+ BS↑
+ Cv↑
+ De↑
+ Cp↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].weaker_prompt.summarization %}
+ {% assign R1_best = 0 %}
+ {% assign R2_best = 0 %}
+ {% assign RL_best = 0 %}
+ {% assign SC_best = -1 %}
+ {% assign BS_best = 0 %}
+ {% assign Cv_best = 0 %}
+ {% assign De_best = 0 %}
+ {% assign Cp_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].R1 and dataset[1][m].R1 > R1_best %}
+ {% assign R1_best = dataset[1][m].R1 %}
+ {% endif %}
+ {% if dataset[1][m].R2 and dataset[1][m].R2 > R2_best %}
+ {% assign R2_best = dataset[1][m].R2 %}
+ {% endif %}
+ {% if dataset[1][m].RL and dataset[1][m].RL > RL_best %}
+ {% assign RL_best = dataset[1][m].RL %}
+ {% endif %}
+ {% if dataset[1][m].SC and dataset[1][m].SC > SC_best %}
+ {% assign SC_best = dataset[1][m].SC %}
+ {% endif %}
+ {% if dataset[1][m].BS and dataset[1][m].BS > BS_best %}
+ {% assign BS_best = dataset[1][m].BS %}
+ {% endif %}
+ {% if dataset[1][m].Cv and dataset[1][m].Cv > Cv_best %}
+ {% assign Cv_best = dataset[1][m].Cv %}
+ {% endif %}
+ {% if dataset[1][m].De and dataset[1][m].De > De_best %}
+ {% assign De_best = dataset[1][m].De %}
+ {% endif %}
+ {% if dataset[1][m].Cp and dataset[1][m].Cp > Cp_best %}
+ {% assign Cp_best = dataset[1][m].Cp %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].R1 %}
+ {{ dataset[1][model].R1 | round: 2 }} ± {{ dataset[1][model].R1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].R2 %}
+ {{ dataset[1][model].R2 | round: 2 }} ± {{ dataset[1][model].R2_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].RL %}
+ {{ dataset[1][model].RL | round: 2 }} ± {{ dataset[1][model].RL_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SC %}
+ {{ dataset[1][model].SC | round: 2 }} ± {{ dataset[1][model].SC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].BS %}
+ {{ dataset[1][model].BS | round: 2 }} ± {{ dataset[1][model].BS_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cv %}
+ {{ dataset[1][model].Cv | round: 2 }} ± {{ dataset[1][model].Cv_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].De %}
+ {{ dataset[1][model].De | round: 2 }} ± {{ dataset[1][model].De_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cp %}
+ {{ dataset[1][model].Cp | round: 2 }} ± {{ dataset[1][model].Cp_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/zero-shot/information-retrieval.md b/_pages/ind/zero-shot/information-retrieval.md
index c7d20e4..5609ce6 100644
--- a/_pages/ind/zero-shot/information-retrieval.md
+++ b/_pages/ind/zero-shot/information-retrieval.md
@@ -3,113 +3,84 @@ layout: default
permalink: /leaderboard/ind/zero-shot/information-retrieval
---
# Zero-Shot Information Retrieval Leaderboard
+{% assign lang = 'ind' %}
-
-
- Models
- mMARCO
- mRobust04
-
-
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
-
-
-
-
- URA-LLaMa 70B
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- URA-LLaMa 13B
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- URA-LLaMa 7B
- 0.06 ± 0.00
- 0.14 ± 0.00
- 0.09 ± 0.00
- 0.21 ± 0.00
- -
- -
- -
- -
-
-
- LLaMa-2 13B
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- LLaMa-2 7B
- 0.06 ± 0.00
- 0.11 ± 0.00
- 0.08 ± 0.00
- 0.17 ± 0.00
- -
- -
- -
- -
-
-
- Vietcuna 7B
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- GPT-3.5
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- GPT-4
- -
- -
- -
- -
- -
- -
- -
- -
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.information_retrieval %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.information_retrieval %}
+ M@10↑
+ M@10B↑
+ N@10↑
+ N@10B↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.information_retrieval %}
+ {% assign M10_best = 0 %}
+ {% assign M10B_best = 0 %}
+ {% assign N10_best = 0 %}
+ {% assign N10B_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m]["M@10"] and dataset[1][m]["M@10"] > M10_best %}
+ {% assign M10_best = dataset[1][m]["M@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["M@10B"] and dataset[1][m]["M@10B"] > M10B_best %}
+ {% assign M10B_best = dataset[1][m]["M@10B"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10"] and dataset[1][m]["N@10"] > N10_best %}
+ {% assign N10_best = dataset[1][m]["N@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10B"] and dataset[1][m]["N@10B"] > N10B_best %}
+ {% assign N10B_best = dataset[1][m]["N@10B"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model]["M@10"] %}
+ {{ dataset[1][model]["M@10"] | round: 2 }} ± {{ dataset[1][model]["M@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["M@10B"] %}
+ {{ dataset[1][model]["M@10B"] | round: 2 }} ± {{ dataset[1][model]["M@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10"] %}
+ {{ dataset[1][model]["N@10"] | round: 2 }} ± {{ dataset[1][model]["N@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10B"] %}
+ {{ dataset[1][model]["N@10B"] | round: 2 }} ± {{ dataset[1][model]["N@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/zero-shot/knowledge.md b/_pages/ind/zero-shot/knowledge.md
index f610dbf..4f366ef 100644
--- a/_pages/ind/zero-shot/knowledge.md
+++ b/_pages/ind/zero-shot/knowledge.md
@@ -2,105 +2,129 @@
layout: default
permalink: /leaderboard/ind/zero-shot/knowledge
---
-# Zero-Shot Knowledge Leaderboard
+# Zero-shot Knowledge Leaderboard
+{% assign lang = 'ind' %}
-
-
-Models
-ZaloE2E
-ViMMRC
-
-
-EM↑
-F1↑
-AC↑
-F1↑
-AR↑
-ECE↓
-A@10↑
-
-
-
-
-URA-LLaMa 70B
-0.28 ± 0.02
-0.44 ± 0.02
-0.80 ± 0.02
-0.80 ± 0.02
-0.85 ± 0.01
-0.10 ± 0.02
-0.96 ± 0.03
-
-
-URA-LLaMa 13B
-0.12 ± 0.01
-0.22 ± 0.01
-0.40 ± 0.02
-0.31 ± 0.02
-0.57 ± 0.02
-0.48 ± 0.02
-0.42 ± 0.08
-
-
-URA-LLaMa 7B
-0.09 ± 0.01
-0.20 ± 0.02
-0.30 ± 0.02
-0.10 ± 0.01
-0.56 ± 0.02
-0.27 ± 0.02
-0.56 ± 0.07
-
-
-LLaMa-2 13B
-0.06 ± 0.01
-0.10 ± 0.01
-0.52 ± 0.02
-0.41 ± 0.02
-0.64 ± 0.02
-0.33 ± 0.02
-0.73 ± 0.07
-
-
-LLaMa-2 7B
-0.03 ± 0.01
-0.07 ± 0.01
-0.37 ± 0.02
-0.25 ± 0.02
-0.51 ± 0.02
-0.35 ± 0.02
-0.29 ± 0.06
-
-
-Vietcuna 7B
-0.03 ± 0.01
-0.06 ± 0.01
-0.32 ± 0.02
-0.22 ± 0.02
-0.50 ± 0.00
-0.07 ± 0.02
-0.33 ± 0.07
-
-
-GPT-3.5
-0.37 ± 0.02
-0.56 ± 0.02
-0.90 ± 0.01
-0.72 ± 0.01
--
-0.65 ± 0.01
-0.90 ± 0.04
-
-
-GPT-4
-0.38 ± 0.02
-0.55 ± 0.02
-0.92 ± 0.01
-0.73 ± 0.06
--
-0.67 ± 0.01
-0.90 ± 0.04
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.knowledge %}
+ {% if dataset[1].num_fields == 2 %}
+
+ {{ dataset[0] }}
+
+ {% else %}
+
+ {{ dataset[0] }}
+
+ {% endif %}
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.knowledge %}
+ {% if dataset[1].num_fields == 2 %}
+ EM↑
+ F1↑
+ {% else %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endif %}
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.knowledge %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AC_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+ {% if dataset[1].num_fields == 2 %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% else %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endif %}
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/zero-shot/language-modeling.md b/_pages/ind/zero-shot/language-modeling.md
index 6e42b7f..863b070 100644
--- a/_pages/ind/zero-shot/language-modeling.md
+++ b/_pages/ind/zero-shot/language-modeling.md
@@ -3,149 +3,108 @@ layout: default
permalink: /leaderboard/ind/zero-shot/language-modeling
---
# Zero-Shot Language Modeling Leaderboard
+{% assign lang = 'ind' %}
-
-
- Models
- MLQA-MLM
- VSEC
-
-
- EM↑
- CER↓
- WER↓
- CED↓
- WED↓
- PLX↓
- EM↑
- CER↓
- WER↓
- CED↓
- WED↓
- PLX↓
-
-
-
-
- URA-LLaMa 70B
- 0.00 ± 0.00
- 0.50 ± 0.01
- 0.64 ± 0.01
- 519.09 ± 10.96
- 115.82 ± 2.45
- 1.08 ± 0.01
- 0.00 ± 0.00
- 0.88 ± 0.00
- 1.01 ± 0.00
- 113.51 ± 0.57
- 29.91 ± 0.15
- 1.09 ± 0.00
-
-
- URA-LLaMa 13B
- 0.00 ± 0.00
- 0.67 ± 0.00
- 0.78 ± 0.00
- 697.85 ± 11.62
- 161.34 ± 2.64
- 1.16 ± 0.02
- 0.01 ± 0.00
- 0.42 ± 0.01
- 0.56 ± 0.01
- 54.88 ± 0.77
- 14.50 ± 0.19
- 1.26 ± 0.00
-
-
- URA-LLaMa 7B
- 0.00 ± 0.00
- 0.73 ± 0.00
- 0.88 ± 0.01
- 684.00 ± 13.18
- 166.87 ± 3.18
- 1.25 ± 0.01
- 0.01 ± 0.00
- 3.33 ± 0.04
- 3.14 ± 0.03
- 420.34 ± 5.66
- 85.79 ± 0.96
- 1.33 ± 0.00
-
-
- LLaMa-2 13B
- 0.00 ± 0.00
- 0.90 ± 0.00
- 1.00 ± 0.00
- 881.97 ± 11.23
- 208.52 ± 2.52
- 1.10 ± 0.01
- 0.00 ± 0.00
- 1.32 ± 0.01
- 1.40 ± 0.01
- 160.06 ± 1.16
- 38.12 ± 0.23
- 1.11 ± 0.00
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.95 ± 0.00
- 1.07 ± 0.01
- 860.42 ± 13.18
- 210.21 ± 3.18
- 1.25 ± 0.01
- 0.00 ± 0.00
- 1.54 ± 0.04
- 1.55 ± 0.03
- 171.28 ± 5.66
- 40.18 ± 0.96
- 1.14 ± 0.00
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 1.00 ± 0.00
- 1.00 ± 0.00
- 951.53 ± 12.37
- 208.57 ± 2.73
- 1.48 ± 0.01
- 0.01 ± 0.00
- 1.11 ± 0.01
- 1.20 ± 0.01
- 139.90 ± 1.39
- 33.94 ± 0.33
- 1.61 ± 0.00
-
-
- GPT-3.5
- 0.00 ± 0.00
- 0.34 ± 0.01
- 0.50 ± 0.01
- 422.30 ± 10.79
- 100.33 ± 2.44
- -
- 0.02 ± 0.00
- 0.16 ± 0.00
- 0.30 ± 0.00
- 12.63 ± 0.34
- 3.48 ± 0.09
- -
-
-
- GPT-4
- 0.04 ± 0.00
- 0.40 ± 0.01
- 0.45 ± 0.01
- 381.88 ± 10.26
- 93.34 ± 2.39
- -
- 0.60 ± 0.01
- 0.14 ± 0.00
- 0.26 ± 0.00
- 13.58 ± 0.45
- 3.67 ± 0.12
- -
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.language_modeling %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.language_modeling %}
+ EM↑
+ CER↓
+ WER↓
+ CED↓
+ WED↓
+ PLX↓
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.language_modeling %}
+ {% assign EM_best = 0 %}
+ {% assign CER_best = 1 %}
+ {% assign WER_best = 1 %}
+ {% assign CED_best = 10000 %}
+ {% assign WED_best = 10000 %}
+ {% assign PLX_best = 10000 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].CER and dataset[1][m].CER < CER_best %}
+ {% assign CER_best = dataset[1][m].CER %}
+ {% endif %}
+ {% if dataset[1][m].WER and dataset[1][m].WER < WER_best %}
+ {% assign WER_best = dataset[1][m].WER %}
+ {% endif %}
+ {% if dataset[1][m].CED and dataset[1][m].CED < CED_best %}
+ {% assign CED_best = dataset[1][m].CED %}
+ {% endif %}
+ {% if dataset[1][m].WED and dataset[1][m].WED < WED_best %}
+ {% assign WED_best = dataset[1][m].WED %}
+ {% endif %}
+ {% if dataset[1][m].PLX and dataset[1][m].PLX < PLX_best %}
+ {% assign PLX_best = dataset[1][m].PLX %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].CER %}
+ {{ dataset[1][model].CER | round: 2 }} ± {{ dataset[1][model].CER_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].WER %}
+ {{ dataset[1][model].WER | round: 2 }} ± {{ dataset[1][model].WER_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].CED %}
+ {{ dataset[1][model].CED | round: 2 }} ± {{ dataset[1][model].CED_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].WED %}
+ {{ dataset[1][model].WED | round: 2 }} ± {{ dataset[1][model].WED_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].PLX %}
+ {{ dataset[1][model].PLX | round: 2 }} ± {{ dataset[1][model].PLX_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/zero-shot/question-answering.md b/_pages/ind/zero-shot/question-answering.md
index c657a45..a453107 100644
--- a/_pages/ind/zero-shot/question-answering.md
+++ b/_pages/ind/zero-shot/question-answering.md
@@ -3,77 +3,60 @@ layout: default
permalink: /leaderboard/ind/zero-shot/question-answering
---
# Zero-Shot Question Answering Leaderboard
+{% assign lang = 'ind' %}
-
-
- Models
- XQuAD
- MLQA
-
-
- EM↑
- F1↑
- EM↑
- F1↑
-
-
-
-
- URA-LLaMa 70B
- 0.06 ± 0.00
- 0.30 ± 0.00
- 0.04 ± 0.00
- 0.28 ± 0.00
-
-
- URA-LLaMa 13B
- 0.00 ± 0.00
- 0.14 ± 0.00
- 0.00 ± 0.00
- 0.15 ± 0.00
-
-
- URA-LLaMa 7B
- 0.00 ± 0.00
- 0.14 ± 0.00
- 0.00 ± 0.00
- 0.16 ± 0.00
-
-
- LLaMa-2 13B
- 0.00 ± 0.00
- 0.04 ± 0.00
- 0.00 ± 0.02
- 0.05 ± 0.00
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.05 ± 0.00
- 0.00 ± 0.00
- 0.06 ± 0.00
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
-
-
- GPT-3.5
- 0.00 ± 0.00
- 0.24 ± 0.00
- 0.00 ± 0.00
- 0.25 ± 0.00
-
-
- GPT-4
- 0.00 ± 0.00
- 0.27 ± 0.00
- 0.00 ± 0.00
- 0.27 ± 0.00
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.question_answering %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.question_answering %}
+ EM↑
+ F1↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.question_answering %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/zero-shot/reasoning.md b/_pages/ind/zero-shot/reasoning.md
index 60b3834..2a20d6d 100644
--- a/_pages/ind/zero-shot/reasoning.md
+++ b/_pages/ind/zero-shot/reasoning.md
@@ -3,123 +3,72 @@ layout: default
permalink: /leaderboard/ind/zero-shot/reasoning
---
# Zero-Shot Reasoning Leaderboard
+{% assign lang = 'ind' %}
- Models
- SR - Natural
- SR - Abstract symbol
- MATH
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.reasoning %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- EM↑
- F1↑
- Equ.↑
- EM↑
- F1↑
- Equ.↑
- EM↑
- F1↑
- Equ.↑
+ {% for dataset in site.data.leaderboard[lang].zero_shot.reasoning %}
+ EM↑
+ F1↑
+ Equ↑
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.06 ± 0.00
- 0.34 ± 0.00
- 0.06 ± 0.00
- 0.02 ± 0.00
- 0.24 ± 0.00
- 0.01 ± 0.00
- 0.00 ± 0.00
- 0.01 ± 0.00
- 0.24 ± 0.02
-
-
- URA-LLaMa 13B
- 0.01 ± 0.00
- 0.31 ± 0.00
- 0.02 ± 0.00
- 0.02 ± 0.00
- 0.24 ± 0.00
- 0.01 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.14 ± 0.02
-
-
- URA-LLaMa 7B
- 0.00 ± 0.00
- 0.26 ± 0.00
- 0.00 ± 0.00
- 0.01 ± 0.00
- 0.17 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.05 ± 0.01
-
-
- LLaMa-2 13B
- 0.00 ± 0.00
- 0.06 ± 0.00
- 0.00 ± 0.00
- 0.02 ± 0.00
- 0.19 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.16 ± 0.02
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.04 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.05 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.06 ± 0.01
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 0.04 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.10 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.01 ± 0.00
-
-
- GPT-3.5
- 0.21 ± 0.00
- 0.59 ± 0.00
- 0.32 ± 0.00
- 0.09 ± 0.00
- 0.28 ± 0.00
- 0.13 ± 0.00
- 0.00 ± 0.00
- 0.01 ± 0.00
- 0.72 ± 0.02
-
-
- GPT-4
- 0.21 ± 0.00
- 0.59 ± 0.00
- 0.32 ± 0.00
- 0.09 ± 0.00
- 0.28 ± 0.00
- 0.13 ± 0.00
- 0.00 ± 0.00
- 0.01 ± 0.00
- 0.76 ± 0.02
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.reasoning %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign Equ_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m]["Equ"] and dataset[1][m]["Equ"] > Equ_best %}
+ {% assign Equ_best = dataset[1][m]["Equ"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["Equ"] %}
+ {{ dataset[1][model]["Equ"] | round: 2 }} ± {{ dataset[1][model]["Equ_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/ind/zero-shot/sentiment-analysis.md b/_pages/ind/zero-shot/sentiment-analysis.md
index 320d74d..e2ee956 100644
--- a/_pages/ind/zero-shot/sentiment-analysis.md
+++ b/_pages/ind/zero-shot/sentiment-analysis.md
@@ -3,157 +3,96 @@ layout: default
permalink: /leaderboard/ind/zero-shot/sentiment-analysis
---
# Zero-Shot Sentiment Analysis Leaderboard
+{% assign lang = 'ind' %}
- Models
- VLSP 2016
- UiT-VSFC
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.sentiment_analysis %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
+ {% for dataset in site.data.leaderboard[lang].zero_shot.sentiment_analysis %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.63 ± 0.02
- 0.63 ± 0.02
- 0.74 ± 0.01
- 0.15 ± 0.01
- 0.87 ± 0.03
- 0.64 ± 0.01
- 0.54 ± 0.01
- 0.85 ± 0.01
- 0.14 ± 0.00
- 0.98 ± 0.01
-
-
- URA-LLaMa 13B
- 0.52 ± 0.02
- 0.35 ± 0.01
- 0.60 ± 0.01
- 0.10 ± 0.01
- 0.64 ± 0.05
- 0.70 ± 0.01
- 0.40 ± 0.01
- 0.72 ± 0.01
- 0.23 ± 0.01
- 0.95 ± 0.01
-
-
- URA-LLaMa 7B
- 0.35 ± 0.02
- 0.24 ± 0.01
- 0.54 ± 0.01
- 0.24 ± 0.01
- 0.31 ± 0.05
- 0.27 ± 0.01
- 0.18 ± 0.00
- 0.52 ± 0.01
- 0.37 ± 0.01
- 0.03 ± 0.01
-
-
- LLaMa-2 13B
- 0.25 ± 0.01
- 0.25 ± 0.01
- 0.49 ± 0.01
- 0.39 ± 0.01
- 0.29 ± 0.05
- 0.29 ± 0.01
- 0.24 ± 0.01
- 0.52 ± 0.01
- 0.42 ± 0.01
- 0.30 ± 0.03
-
-
- LLaMa-2 7B
- 0.15 ± 0.01
- 0.15 ± 0.01
- 0.58 ± 0.01
- 0.73 ± 0.01
- 0.12 ± 0.03
- 0.04 ± 0.00
- 0.06 ± 0.01
- 0.49 ± 0.01
- 0.79 ± 0.00
- 0.01 ± 0.01
-
-
- Vietcuna 7B
- 0.11 ± 0.01
- 0.12 ± 0.01
- 0.49 ± 0.01
- 0.68 ± 0.01
- 0.11 ± 0.03
- 0.05 ± 0.00
- 0.06 ± 0.00
- 0.56 ± 0.01
- 0.73 ± 0.00
- 0.05 ± 0.01
-
-
- MixSUra 8x7B
- 0.45 ± -
- 0.30 ± -
- 0.62 ± -
- 0.50 ± -
- 0.49 ± -
- 0.55 ± -
- 0.40 ± -
- 0.66 ± -
- 0.41 ± -
- 0.60 ± -
-
-
- Gemini Pro
- 0.64 ± -
- 0.47 ± -
- -
- 0.31 ± -
- 0.53 ± -
- 0.76 ± -
- 0.49 ± -
- -
- 0.43 ± -
- 0.77 ± -
-
-
- GPT-3.5
- 0.62 ± 0.02
- 0.56 ± 0.01
- -
- 0.29 ± 0.02
- 0.62 ± 0.05
- 0.81 ± 0.31
- 0.68 ± 0.31
- -
- 0.48 ± 0.01
- 0.83 ± 0.02
-
-
- GPT-4
- 0.71 ± 0.01
- 0.68 ± 0.01
- -
- 0.37 ± 0.01
- 0.70 ± 0.04
- 0.80 ± 0.01
- 0.67 ± 0.01
- -
- 0.47 ± 0.01
- 0.85 ± 0.02
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.sentiment_analysis %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/ind/zero-shot/summarization.md b/_pages/ind/zero-shot/summarization.md
index 8f1c268..f81e374 100644
--- a/_pages/ind/zero-shot/summarization.md
+++ b/_pages/ind/zero-shot/summarization.md
@@ -3,185 +3,132 @@ layout: default
permalink: /leaderboard/ind/zero-shot/summarization
---
# Zero-Shot Summarization Leaderboard
+{% assign lang = 'ind' %}
-
-
-Models
-VietNews
-WikiLingua
-
-
-R1↑
-R2↑
-RL↑
-SC↑
-BS↑
-Cv↑
-De↑
-Cp↑
-R1↑
-R2↑
-RL↑
-SC↑
-BS↑
-Cv↑
-De↑
-Cp↑
-
-
-
-
-URA-LLaMa 70B
-0.42 ± 0.17
-0.21 ± 0.12
-0.28 ± 0.00
--0.11 ± 0.00
-0.03 ± 0.19
-0.85 ± 0.00
-14.59 ± 0.05
-17.21 ± 0.33
-0.37 ± 0.00
-0.16 ± 0.00
-0.24 ± 0.00
--0.22 ± 0.00
-0.26 ± 0.16
-0.17 ± 0.00
-0.22 ± 0.00
-22.24 ± 0.97
-
-
-URA-LLaMa 13B
-0.38 ± 0.00
-0.18 ± 0.00
-0.25 ± 0.00
--0.09 ± 0.00
-0.01 ± 0.18
-0.71 ± 0.00
-6.01 ± 0.07
-24.27 ± 0.61
-0.22 ± 0.00
-0.08 ± 0.00
-0.14 ± 0.00
--0.16 ± 0.00
--0.13 ± 0.12
-0.42 ± 0.01
-3.06 ± 0.10
-49.58 ± 1.16
-
-
-URA-LLaMa 7B
-0.38 ± 0.00
-0.14 ± 0.00
-0.25 ± 0.00
--0.09 ± 0.00
-0.04 ± 0.12
-0.65 ± 0.00
-4.88 ± 0.03
-7.77 ± 0.05
-0.40 ± 0.00
-0.15 ± 0.00
-0.26 ± 0.00
--0.16 ± 0.00
-0.19 ± 0.07
-0.73 ± 0.00
-4.79 ± 0.07
-6.22 ± 0.07
-
-
-LLaMa-2 13B
-0.06 ± 0.00
-0.02 ± 0.00
-0.04 ± 0.00
--0.09 ± 0.00
--0.18 ± 0.04
-0.07 ± 0.00
-0.43 ± 0.01
-28.25 ± 0.24
-0.04 ± 0.00
-0.00 ± 0.00
-0.03 ± 0.00
--0.16 ± 0.00
--0.11 ± 0.08
-0.03 ± 0.00
-0.07 ± 0.01
-19.55 ± 0.51
-
-
-LLaMa-2 7B
-0.06 ± 0.00
-0.01 ± 0.00
-0.05 ± 0.00
--0.09 ± 0.00
--0.23 ± 0.04
-0.06 ± 0.00
-0.21 ± 0.00
-15.75 ± 0.20
-0.04 ± 0.00
-0.00 ± 0.00
-0.03 ± 0.00
--0.16 ± 0.00
--0.14 ± 0.07
-0.03 ± 0.00
-0.06 ± 0.00
-17.84 ± 0.50
-
-
-Vietcuna 7B
-0.28 ± 0.00
-0.06 ± 0.00
-0.18 ± 0.00
--0.09 ± 0.00
--0.09 ± 0.09
-0.31 ± 0.00
-0.80 ± 0.01
-171.63 ± 1.71
-0.24 ± 0.00
-0.06 ± 0.00
-0.15 ± 0.00
--0.16 ± 0.00
--0.18 ± 0.07
-0.51 ± 0.01
-1.16 ± 0.01
-238.67 ± 3.37
-
-
-GPT-3.5
-0.36 ± 0.00
-0.20 ± 0.00
-0.24 ± 0.00
--0.09 ± 0.00
-0.04 ± 0.13
-0.86 ± 0.00
-3.97 ± 0.02
-13.32 ± 0.65
-0.43 ± 0.00
-0.21 ± 0.00
-0.27 ± 0.00
--0.16 ± 0.00
-0.22 ± 0.03
-0.87 ± 0.00
-3.29 ± 0.03
-35.50 ± 0.82
-
-
-GPT-4
-0.41 ± 0.00
-0.21 ± 0.00
-0.26 ± 0.00
--0.08 ± 0.00
--0.04 ± 0.11
-0.84 ± 0.00
-3.45 ± 0.00
-15.43 ± 0.49
-0.44 ± 0.00
-0.21 ± 0.00
-0.27 ± 0.00
--0.16 ± 0.00
-0.24 ± 0.04
-0.82 ± 0.00
-2.37 ± 0.01
-6.61 ± 0.16
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.summarization %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.summarization %}
+ R1↑
+ R2↑
+ RL↑
+ SC↑
+ BS↑
+ Cv↑
+ De↑
+ Cp↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.summarization %}
+ {% assign R1_best = 0 %}
+ {% assign R2_best = 0 %}
+ {% assign RL_best = 0 %}
+ {% assign SC_best = -1 %}
+ {% assign BS_best = 0 %}
+ {% assign Cv_best = 0 %}
+ {% assign De_best = 0 %}
+ {% assign Cp_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].R1 and dataset[1][m].R1 > R1_best %}
+ {% assign R1_best = dataset[1][m].R1 %}
+ {% endif %}
+ {% if dataset[1][m].R2 and dataset[1][m].R2 > R2_best %}
+ {% assign R2_best = dataset[1][m].R2 %}
+ {% endif %}
+ {% if dataset[1][m].RL and dataset[1][m].RL > RL_best %}
+ {% assign RL_best = dataset[1][m].RL %}
+ {% endif %}
+ {% if dataset[1][m].SC and dataset[1][m].SC > SC_best %}
+ {% assign SC_best = dataset[1][m].SC %}
+ {% endif %}
+ {% if dataset[1][m].BS and dataset[1][m].BS > BS_best %}
+ {% assign BS_best = dataset[1][m].BS %}
+ {% endif %}
+ {% if dataset[1][m].Cv and dataset[1][m].Cv > Cv_best %}
+ {% assign Cv_best = dataset[1][m].Cv %}
+ {% endif %}
+ {% if dataset[1][m].De and dataset[1][m].De > De_best %}
+ {% assign De_best = dataset[1][m].De %}
+ {% endif %}
+ {% if dataset[1][m].Cp and dataset[1][m].Cp > Cp_best %}
+ {% assign Cp_best = dataset[1][m].Cp %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].R1 %}
+ {{ dataset[1][model].R1 | round: 2 }} ± {{ dataset[1][model].R1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].R2 %}
+ {{ dataset[1][model].R2 | round: 2 }} ± {{ dataset[1][model].R2_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].RL %}
+ {{ dataset[1][model].RL | round: 2 }} ± {{ dataset[1][model].RL_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SC %}
+ {{ dataset[1][model].SC | round: 2 }} ± {{ dataset[1][model].SC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].BS %}
+ {{ dataset[1][model].BS | round: 2 }} ± {{ dataset[1][model].BS_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cv %}
+ {{ dataset[1][model].Cv | round: 2 }} ± {{ dataset[1][model].Cv_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].De %}
+ {{ dataset[1][model].De | round: 2 }} ± {{ dataset[1][model].De_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cp %}
+ {{ dataset[1][model].Cp | round: 2 }} ± {{ dataset[1][model].Cp_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/zero-shot/text-classification.md b/_pages/ind/zero-shot/text-classification.md
index ca8ebdb..0d0fb92 100644
--- a/_pages/ind/zero-shot/text-classification.md
+++ b/_pages/ind/zero-shot/text-classification.md
@@ -3,131 +3,96 @@ layout: default
permalink: /leaderboard/ind/zero-shot/text-classification
---
# Zero-Shot Text Classification Leaderboard
+{% assign lang = 'ind' %}
-
-
- Models
- UiT-VSMEC
- PhoATIS
-
-
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.40 ± 0.02
- 0.32 ± 0.02
- 0.68 ± 0.01
- 0.14 ± 0.02
- 0.60 ± 0.06
- 0.56 ± 0.02
- 0.48 ± 0.03
- 0.85 ± 0.00
- 0.25 ± 0.02
- 0.56 ± 0.06
-
-
- URA-LLaMa 13B
- 0.29 ± 0.02
- 0.25 ± 0.02
- 0.52 ± 0.01
- 0.09 ± 0.01
- 0.23 ± 0.05
- 0.10 ± 0.01
- 0.10 ± 0.01
- 0.72 ± 0.00
- 0.52 ± 0.01
- 0.14 ± 0.04
-
-
- URA-LLaMa 7B
- 0.13 ± 0.01
- 0.11 ± 0.01
- 0.50 ± 0.01
- 0.15 ± 0.01
- 0.21 ± 0.05
- 0.04 ± 0.01
- 0.04 ± 0.02
- 0.77 ± 0.00
- 0.30 ± 0.01
- 0.04 ± 0.02
-
-
- LLaMa-2 13B
- 0.11 ± 0.01
- 0.10 ± 0.01
- 0.49 ± 0.01
- 0.31 ± 0.01
- 0.09 ± 0.04
- 0.03 ± 0.01
- 0.02 ± 0.00
- 0.45 ± 0.01
- 0.28 ± 0.01
- 0.03 ± 0.02
-
-
- LLaMa-2 7B
- 0.07 ± 0.01
- 0.08 ± 0.01
- 0.52 ± 0.01
- 0.35 ± 0.01
- 0.07 ± 0.03
- 0.00 ± 0.06
- 0.00 ± 0.06
- 0.61 ± 0.01
- 0.32 ± 0.00
- 0.00 ± 0.00
-
-
- Vietcuna 7B
- 0.05 ± 0.01
- 0.02 ± 0.01
- 0.52 ± 0.01
- 0.95 ± 0.01
- 0.03 ± 0.02
- 0.05 ± 0.01
- 0.01 ± 0.00
- 0.66 ± 0.00
- 0.20 ± 0.01
- 0.01 ± 0.21
-
-
- GPT-3.5
- 0.43 ± 0.02
- 0.37 ± 0.02
- -
- 0.29 ± 0.02
- 0.43 ± 0.06
- 0.44 ± 0.02
- 0.38 ± 0.03
- -
- 0.38 ± 0.02
- 0.44 ± 0.05
-
-
- GPT-4
- 0.49 ± 0.02
- 0.46 ± 0.02
- -
- 0.35 ± 0.02
- 0.50 ± 0.06
- 0.89 ± 0.01
- 0.69 ± 0.02
- -
- 0.83 ± 0.01
- 0.89 ± 0.03
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.text_classification %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.text_classification %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.text_classification %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/ind/zero-shot/toxicity-detection.md b/_pages/ind/zero-shot/toxicity-detection.md
index 00e7152..eb3e0a9 100644
--- a/_pages/ind/zero-shot/toxicity-detection.md
+++ b/_pages/ind/zero-shot/toxicity-detection.md
@@ -3,131 +3,96 @@ layout: default
permalink: /leaderboard/ind/zero-shot/toxicity-detection
---
# Zero-Shot Toxicity Detection Leaderboard
+{% assign lang = 'ind' %}
- Models
- UiT-ViCTSD
- UiT-ViHSD
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.toxicity_detection %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
+ {% for dataset in site.data.leaderboard[lang].zero_shot.toxicity_detection %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.61 ± 0.01
- 0.52 ± 0.01
- 0.77 ± 0.01
- 0.17 ± 0.01
- 0.97 ± 0.01
- 0.38 ± 0.01
- 0.34 ± 0.01
- 0.74 ± 0.01
- 0.25 ± 0.01
- 0.91 ± 0.01
-
-
- URA-LLaMa 13B
- 0.46 ± 0.01
- 0.28 ± 0.03
- 0.53 ± 0.02
- 0.22 ± 0.01
- 0.48 ± 0.03
- 0.33 ± 0.01
- 0.18 ± 0.00
- 0.60 ± 0.01
- 0.35 ± 0.01
- 0.54 ± 0.02
-
-
- URA-LLaMa 7B
- 0.25 ± 0.01
- 0.19 ± 0.01
- 0.53 ± 0.01
- 0.38 ± 0.01
- 0.13 ± 0.02
- 0.19 ± 0.00
- 0.13 ± 0.00
- 0.55 ± 0.01
- 0.46 ± 0.01
- 0.13 ± 0.01
-
-
- LLaMa-2 13B
- 0.16 ± 0.01
- 0.14 ± 0.00
- 0.40 ± 0.01
- 0.50 ± 0.01
- 0.24 ± 0.02
- 0.09 ± 0.00
- 0.13 ± 0.00
- 0.38 ± 0.01
- 0.63 ± 0.00
- 0.10 ± 0.01
-
-
- LLaMa-2 7B
- 0.13 ± 0.01
- 0.14 ± 0.01
- 0.45 ± 0.02
- 0.69 ± 0.01
- 0.09 ± 0.01
- 0.03 ± 0.00
- 0.05 ± 0.01
- 0.56 ± 0.01
- 0.75 ± 0.00
- 0.00 ± 0.00
-
-
- Vietcuna 7B
- 0.09 ± 0.00
- 0.07 ± 0.00
- 0.50 ± 0.00
- 0.41 ± 0.00
- 0.10 ± 0.03
- 0.07 ± 0.00
- 0.04 ± 0.00
- 0.50 ± 0.00
- 0.26 ± 0.00
- 0.07 ± 0.01
-
-
- GPT-3.5
- 0.75 ± 0.01
- 0.61 ± 0.02
- -
- 0.25 ± 0.01
- 0.80 ± 0.04
- 0.55 ± 0.01
- 0.42 ± 0.01
- -
- 0.22 ± 0.01
- 0.55 ± 0.02
-
-
- GPT-4
- 0.89 ± 0.01
- 0.69 ± 0.01
- -
- 0.39 ± 0.01
- 0.89 ± 0.03
- 0.75 ± 0.01
- 0.53 ± 0.01
- -
- 0.42 ± 0.01
- 0.75 ± 0.02
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.toxicity_detection %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/kr/bias-toxicity/question-answering.md b/_pages/kr/bias-toxicity/question-answering.md
index 40fbb60..9e224f5 100644
--- a/_pages/kr/bias-toxicity/question-answering.md
+++ b/_pages/kr/bias-toxicity/question-answering.md
@@ -3,131 +3,96 @@ layout: default
permalink: /leaderboard/kr/bias-toxicity/question-answering
---
# Bias-Toxicity Question Answering Leaderboard
+{% assign lang = 'kr' %}
-
-
-Models
-XQuAD
-MLQA
-
-
-DRR→|
-DRG→|
-SAR→|
-SAG→|
-Tox↓
-DRR→|
-DRG→|
-SAR→|
-SAG→|
-Tox↓
-
-
-
-
-URA-LLaMa 70B
--
-0.39 ± 0.01
--
-0.41 ± 0.00
-0.02 ± 0.00
--
-0.14 ± 0.02
--
-0.42 ± 0.03
-0.02 ± 0.00
-
-
-URA-LLaMa 13B
--
-0.39 ± 0.01
--
-0.45 ± 0.01
-0.02 ± 0.00
--
-0.17 ± 0.1
--
-0.38 ± 0.00
-0.02 ± 0.00
-
-
-URA-LLaMa 7B
--
-0.43 ± 0.01
--
-0.48 ± 0.00
-0.03 ± 0.00
--
-0.18 ± 0.01
--
-0.37 ± 0.01
-0.02 ± 0.00
-
-
-LLaMa-2 13B
--
-0.35 ± 0.03
--
-0.46 ± 0.00
-0.01 ± 0.00
--
-0.27 ± 0.01
--
-0.43 ± 0.00
-0.01 ± 0.00
-
-
-LLaMa-2 7B
--
-0.46 ± 0.01
--
-0.42 ± 0.00
-0.01 ± 0.00
--
-0.21 ± 0.06
--
-0.45 ± 0.00
-0.01 ± 0.00
-
-
-Vietcuna 7B
--
-0.50 ± 0.00
--
--
-0.04 ± 0.00
--
-0.23 ± 0.09
--
-0.49 ± 0.01
-0.04 ± 0.00
-
-
-GPT-3.5
--
-0.43 ± 0.01
--
-0.48 ± 0.00
-0.02 ± 0.00
--
-0.18 ± 0.01
--
-0.40 ± 0.00
-0.02 ± 0.00
-
-
-GPT-4
--
-0.40 ± 0.01
--
-0.45 ± 0.00
-0.02 ± 0.00
--
-0.16 ± 0.01
--
-0.41 ± 0.01
-0.02 ± 0.00
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.question_answering %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.question_answering %}
+ DRR↓
+ DRG↓
+ SAR↓
+ SAG↓
+ Tox↓
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.question_answering %}
+ {% assign DRR_min = 1 %}
+ {% assign DRG_min = 1 %}
+ {% assign SAR_min = 1 %}
+ {% assign SAG_min = 1 %}
+ {% assign Tox_min = 1 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].DRR and dataset[1][m].DRR < DRR_min %}
+ {% assign DRR_min = dataset[1][m].DRR %}
+ {% endif %}
+ {% if dataset[1][m].DRG and dataset[1][m].DRG < DRG_min %}
+ {% assign DRG_min = dataset[1][m].DRG %}
+ {% endif %}
+ {% if dataset[1][m].SAR and dataset[1][m].SAR < SAR_min %}
+ {% assign SAR_min = dataset[1][m].SAR %}
+ {% endif %}
+ {% if dataset[1][m].SAG and dataset[1][m].SAG < SAG_min %}
+ {% assign SAG_min = dataset[1][m].SAG %}
+ {% endif %}
+ {% if dataset[1][m].Tox and dataset[1][m].Tox < Tox_min %}
+ {% assign Tox_min = dataset[1][m].Tox %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].DRR %}
+ {{ dataset[1][model].DRR | round: 2 }} ± {{ dataset[1][model].DRR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].DRG %}
+ {{ dataset[1][model].DRG | round: 2 }} ± {{ dataset[1][model].DRG_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SAR %}
+ {{ dataset[1][model].SAR | round: 2 }} ± {{ dataset[1][model].SAR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SAG %}
+ {{ dataset[1][model].SAG | round: 2 }} ± {{ dataset[1][model].SAG_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Tox %}
+ {{ dataset[1][model].Tox | round: 2 }} ± {{ dataset[1][model].Tox_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/bias-toxicity/summarization.md b/_pages/kr/bias-toxicity/summarization.md
index 4a632c6..14c703a 100644
--- a/_pages/kr/bias-toxicity/summarization.md
+++ b/_pages/kr/bias-toxicity/summarization.md
@@ -3,131 +3,96 @@ layout: default
permalink: /leaderboard/kr/bias-toxicity/summarization
---
# Bias-Toxicity Summarization Leaderboard
+{% assign lang = 'kr' %}
-
-
- Models
- VietNews
- WikiLingua
-
-
- DRR→|
- DRG→|
- SAR→|
- SAG→|
- Tox↓
- DRR→|
- DRG→|
- SAR→|
- SAG→|
- Tox↓
-
-
-
-
- URA-LLaMa 70B
- -
- 0.21 ± 0.01
- -
- 0.31 ± 0.01
- 0.05 ± 0.00
- -
- 0.03 ± 0.02
- -
- 0.25 ± 0.02
- 0.03 ± 0.00
-
-
- URA-LLaMa 13B
- -
- 0.20 ± 0.01
- -
- 0.29 ± 0.01
- 0.04 ± 0.00
- -
- 0.07 ± 0.04
- -
- 0.31 ± 0.03
- 0.02 ± 0.00
-
-
- URA-LLaMa 7B
- -
- 0.24 ± 0.02
- -
- 0.33 ± 0.01
- 0.04 ± 0.00
- -
- 0.07 ± 0.02
- -
- 0.38 ± 0.02
- 0.03 ± 0.00
-
-
- LLaMa-2 13B
- -
- 0.26 ± 0.01
- -
- 0.38 ± 0.01
- 0.01 ± 0.00
- -
- 0.17 ± 0.08
- -
- 0.50 ± 0.02
- 0.01 ± 0.00
-
-
- LLaMa-2 7B
- -
- 0.28 ± 0.02
- -
- 0.39 ± 0.01
- 0.01 ± 0.00
- -
- 0.39 ± 0.05
- -
- 0.50 ± 0.02
- 0.01 ± 0.00
-
-
- Vietcuna 7B
- -
- 0.21 ± 0.02
- -
- 0.32 ± 0.02
- 0.04 ± 0.00
- -
- 0.17 ± 0.04
- -
- 0.39 ± 0.03
- 0.03 ± 0.00
-
-
- GPT-3.5
- -
- 0.22 ± 0.01
- -
- 0.29 ± 0.01
- 0.04 ± 0.00
- -
- 0.03 ± 0.02
- -
- 0.28 ± 0.01
- 0.02 ± 0.00
-
-
- GPT-4
- -
- 0.19 ± 0.01
- -
- 0.28 ± 0.01
- 0.06 ± 0.00
- -
- 0.09 ± 0.02
- -
- 0.28 ± 0.01
- 0.02 ± 0.00
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.summarization %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.summarization %}
+ DRR↓
+ DRG↓
+ SAR↓
+ SAG↓
+ Tox↓
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.summarization %}
+ {% assign DRR_min = 1 %}
+ {% assign DRG_min = 1 %}
+ {% assign SAR_min = 1 %}
+ {% assign SAG_min = 1 %}
+ {% assign Tox_min = 1 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].DRR and dataset[1][m].DRR < DRR_min %}
+ {% assign DRR_min = dataset[1][m].DRR %}
+ {% endif %}
+ {% if dataset[1][m].DRG and dataset[1][m].DRG < DRG_min %}
+ {% assign DRG_min = dataset[1][m].DRG %}
+ {% endif %}
+ {% if dataset[1][m].SAR and dataset[1][m].SAR < SAR_min %}
+ {% assign SAR_min = dataset[1][m].SAR %}
+ {% endif %}
+ {% if dataset[1][m].SAG and dataset[1][m].SAG < SAG_min %}
+ {% assign SAG_min = dataset[1][m].SAG %}
+ {% endif %}
+ {% if dataset[1][m].Tox and dataset[1][m].Tox < Tox_min %}
+ {% assign Tox_min = dataset[1][m].Tox %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].DRR %}
+ {{ dataset[1][model].DRR | round: 2 }} ± {{ dataset[1][model].DRR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].DRG %}
+ {{ dataset[1][model].DRG | round: 2 }} ± {{ dataset[1][model].DRG_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SAR %}
+ {{ dataset[1][model].SAR | round: 2 }} ± {{ dataset[1][model].SAR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SAG %}
+ {{ dataset[1][model].SAG | round: 2 }} ± {{ dataset[1][model].SAG_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Tox %}
+ {{ dataset[1][model].Tox | round: 2 }} ± {{ dataset[1][model].Tox_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/bias-toxicity/translation.md b/_pages/kr/bias-toxicity/translation.md
index 5cb9225..1ea89b6 100644
--- a/_pages/kr/bias-toxicity/translation.md
+++ b/_pages/kr/bias-toxicity/translation.md
@@ -3,264 +3,94 @@ layout: default
permalink: /leaderboard/kr/bias-toxicity/translation
---
# Bias-Toxicity Translation Leaderboard
+{% assign lang = 'kr' %}
Models
- PhoMT (En - Vi)
- OPUS100 (En - Vi)
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.translation %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- DRR$→|
- DRG$→|
- SAR$→|
- SAG$→|
- Tox↓
- DRR$→|
- DRG$→|
- SAR$→|
- SAG$→|
- Tox↓
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.translation %}
+ DRR↓
+ DRG↓
+ SAR↓
+ SAG↓
+ Tox↓
+ {% endfor %}
-
- URA-LLaMa 70B
- -
- 0.03 ± 0.01
- -
- 0.30 ± 0.01
- 0.05 ± 0.00
- -
- 0.27 ± 0.01
- -
- 0.47 ± 0.01
- 0.06 ± 0.00
-
-
- URA-LLaMa 13B
- -
- 0.09 ± 0.00
- -
- 0.33 ± 0.01
- 0.05 ± 0.00
- -
- 0.27 ± 0.01
- -
- 0.43 ± 0.02
- 0.07 ± 0.00
-
-
- URA-LLaMa 7B
- -
- 0.13 ± 0.00
- -
- 0.33 ± 0.01
- 0.05 ± 0.00
- -
- 0.18 ± 0.03
- -
- 0.47 ± 0.01
- 0.07 ± 0.00
-
-
- LLaMa-2 13B
- -
- 0.08 ± 0.00
- -
- 0.33 ± 0.02
- 0.05 ± 0.00
- -
- 0.31 ± 0.02
- -
- 0.47 ± 0.01
- 0.06 ± 0.00
-
-
- LLaMa-2 7B
- -
- 0.17 ± 0.01
- -
- 0.29 ± 0.01
- 0.04 ± 0.00
- -
- 0.21 ± 0.02
- -
- 0.45 ± 0.02
- 0.05 ± 0.00
-
-
- Vietcuna 7B
- -
- 0.18 ± 0.01
- -
- 0.36 ± 0.01
- 0.04 ± 0.00
- -
- 0.16 ± 0.03
- -
- 0.43 ± 0.02
- 0.07 ± 0.00
-
-
- GPT-3.5
- -
- 0.11 ± 0.01
- -
- 0.34 ± 0.01
- 0.05 ± 0.00
- -
- 0.16 ± 0.03
- -
- 0.43 ± 0.03
- 0.07 ± 0.00
-
-
- GPT-4
- -
- 0.09 ± 0.01
- -
- 0.34 ± 0.01
- 0.05 ± 0.00
- -
- 0.14 ± 0.03
- -
- 0.41 ± 0.01
- 0.07 ± 0.00
-
-
-
----
-layout: default
-permalink: /leaderboard/kr/bias-toxicity/translation
----
-# Bias-Toxicity Translation Leaderboard
-
-
-
-
- Models
- PhoMT (En $\to$ Vi)
- OPUS100 (En $\to$ Vi)
-
-
- DRR$\to\mid$
- DRG$\to\mid$
- SAR$\to\mid$
- SAG$\to\mid$
- Tox↓
- DRR$\to\mid$
- DRG$\to\mid$
- SAR$\to\mid$
- SAG$\to\mid$
- Tox↓
-
-
-
-
- URA-LLaMa 70B
- -
- 0.03 ± 0.01
- -
- 0.30 ± 0.01
- 0.05 ± 0.00
- -
- 0.27 ± 0.01
- -
- 0.47 ± 0.01
- 0.06 ± 0.00
-
-
- URA-LLaMa 13B
- -
- 0.09 ± 0.00
- -
- 0.33 ± 0.01
- 0.05 ± 0.00
- -
- 0.27 ± 0.01
- -
- 0.43 ± 0.02
- 0.07 ± 0.00
-
-
- URA-LLaMa 7B
- -
- 0.13 ± 0.00
- -
- 0.33 ± 0.01
- 0.05 ± 0.00
- -
- 0.18 ± 0.03
- -
- 0.47 ± 0.01
- 0.07 ± 0.00
-
-
- LLaMa-2 13B
- -
- 0.08 ± 0.00
- -
- 0.33 ± 0.02
- 0.05 ± 0.00
- -
- 0.31 ± 0.02
- -
- 0.47 ± 0.01
- 0.06 ± 0.00
-
-
- LLaMa-2 7B
- -
- 0.17 ± 0.01
- -
- 0.29 ± 0.01
- 0.04 ± 0.00
- -
- 0.21 ± 0.02
- -
- 0.45 ± 0.02
- 0.05 ± 0.00
-
-
- Vietcuna 7B
- -
- 0.18 ± 0.01
- -
- 0.36 ± 0.01
- 0.04 ± 0.00
- -
- 0.16 ± 0.03
- -
- 0.43 ± 0.02
- 0.07 ± 0.00
-
-
- GPT-3.5
- -
- 0.11 ± 0.01
- -
- 0.34 ± 0.01
- 0.05 ± 0.00
- -
- 0.16 ± 0.03
- -
- 0.43 ± 0.03
- 0.07 ± 0.00
-
-
- GPT-4
- -
- 0.09 ± 0.01
- -
- 0.34 ± 0.01
- 0.05 ± 0.00
- -
- 0.14 ± 0.03
- -
- 0.41 ± 0.01
- 0.07 ± 0.00
-
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.translation %}
+ {% assign DRR_min = 1 %}
+ {% assign DRG_min = 1 %}
+ {% assign SAR_min = 1 %}
+ {% assign SAG_min = 1 %}
+ {% assign Tox_min = 1 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].DRR and dataset[1][m].DRR < DRR_min %}
+ {% assign DRR_min = dataset[1][m].DRR %}
+ {% endif %}
+ {% if dataset[1][m].DRG and dataset[1][m].DRG < DRG_min %}
+ {% assign DRG_min = dataset[1][m].DRG %}
+ {% endif %}
+ {% if dataset[1][m].SAR and dataset[1][m].SAR < SAR_min %}
+ {% assign SAR_min = dataset[1][m].SAR %}
+ {% endif %}
+ {% if dataset[1][m].SAG and dataset[1][m].SAG < SAG_min %}
+ {% assign SAG_min = dataset[1][m].SAG %}
+ {% endif %}
+ {% if dataset[1][m].Tox and dataset[1][m].Tox < Tox_min %}
+ {% assign Tox_min = dataset[1][m].Tox %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].DRR %}
+ {{ dataset[1][model].DRR | round: 2 }} ± {{ dataset[1][model].DRR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].DRG %}
+ {{ dataset[1][model].DRG | round: 2 }} ± {{ dataset[1][model].DRG_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SAR %}
+ {{ dataset[1][model].SAR | round: 2 }} ± {{ dataset[1][model].SAR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SAG %}
+ {{ dataset[1][model].SAG | round: 2 }} ± {{ dataset[1][model].SAG_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Tox %}
+ {{ dataset[1][model].Tox | round: 2 }} ± {{ dataset[1][model].Tox_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/kr/chain-of-thought/reasoning.md b/_pages/kr/chain-of-thought/reasoning.md
index 0b34d0f..31d87c6 100644
--- a/_pages/kr/chain-of-thought/reasoning.md
+++ b/_pages/kr/chain-of-thought/reasoning.md
@@ -3,73 +3,72 @@ layout: default
permalink: /leaderboard/kr/chain-of-thought/reasoning
---
# Chain-Of-Thought Reasoning Leaderboard
+{% assign lang = 'kr' %}
- Models
- Metrics
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].chain_of_thought.reasoning %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- EM ↑
- F1 ↑
- Equ. ↑
+ {% for dataset in site.data.leaderboard[lang].chain_of_thought.reasoning %}
+ EM↑
+ F1↑
+ Equ.↑
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.00 ± 0.00
- 0.12 ± 0.01
- 0.18 ± 0.02
-
-
- URA-LLaMa 13B
- 0.00 ± 0.00
- 0.23 ± 0.01
- 0.17 ± 0.01
-
-
- URA-LLaMa 7B
- 0.00 ± 0.00
- 0.23 ± 0.01
- 0.09 ± 0.01
-
-
- LLaMa-2 13B
- 0.00 ± 0.00
- 0.12 ± 0.01
- 0.18 ± 0.02
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.10 ± 0.00
- 0.12 ± 0.02
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 0.13 ± 0.01
- 0.10 ± 0.01
-
-
- MixSUra 8x7B
- 0.00 ± 0.00
- 0.17 ± 0.01
- 0.33 ± 0.00
-
-
- GPT-3.5
- 0.00 ± 0.00
- 0.32 ± 0.01
- 0.78 ± 0.02
-
-
- GPT-4
- 0.00 ± 0.00
- 0.32 ± 0.01
- 0.79 ± 0.02
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].chain_of_thought.reasoning %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign Equ_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].Equ and dataset[1][m].Equ > Equ_best %}
+ {% assign Equ_best = dataset[1][m].Equ %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Equ %}
+ {{ dataset[1][model].Equ | round: 2 }} ± {{ dataset[1][model].Equ_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/kr/fairness-aware/information-retrieval.md b/_pages/kr/fairness-aware/information-retrieval.md
index bc6feae..d3395a6 100644
--- a/_pages/kr/fairness-aware/information-retrieval.md
+++ b/_pages/kr/fairness-aware/information-retrieval.md
@@ -3,113 +3,84 @@ layout: default
permalink: /leaderboard/kr/fairness-aware/information-retrieval
---
# Fairness-Aware Information Retrieval Leaderboard
+{% assign lang = 'kr' %}
- Models
- mMARCO
- mRobust04
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.information_retrieval %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.information_retrieval %}
+ M@10↑
+ M@10B↑
+ N@10↑
+ N@10B↑
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
-
-
-
-
-
-
-
-
-
-
- URA-LLaMa 13B
-
-
-
-
-
-
-
-
-
-
- URA-LLaMa 7B
- 0.10 ± 0.00
- 0.10 ± 0.00
- 0.14 ± 0.00
- 0.14 ± 0.00
- 0.01 ± 0.00
- 0.01 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
-
-
- LLaMa-2 13B
-
-
-
-
-
-
-
-
-
-
- LLaMa-2 7B
- 0.05 ± 0.00
- 0.10 ± 0.00
- 0.07 ± 0.00
- 0.16 ± 0.00
- -
- -
- -
- -
-
-
- Vietcuna 7B
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- GPT-3.5
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- GPT-4
- -
- -
- -
- -
- -
- -
- -
- -
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.information_retrieval %}
+ {% assign M10_best = 0 %}
+ {% assign M10B_best = 0 %}
+ {% assign N10_best = 0 %}
+ {% assign N10B_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m]["M@10"] and dataset[1][m]["M@10"] > M10_best %}
+ {% assign M10_best = dataset[1][m]["M@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["M@10B"] and dataset[1][m]["M@10B"] > M10B_best %}
+ {% assign M10B_best = dataset[1][m]["M@10B"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10"] and dataset[1][m]["N@10"] > N10_best %}
+ {% assign N10_best = dataset[1][m]["N@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10B"] and dataset[1][m]["N@10B"] > N10B_best %}
+ {% assign N10B_best = dataset[1][m]["N@10B"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model]["M@10"] %}
+ {{ dataset[1][model]["M@10"] | round: 2 }} ± {{ dataset[1][model]["M@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["M@10B"] %}
+ {{ dataset[1][model]["M@10B"] | round: 2 }} ± {{ dataset[1][model]["M@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10"] %}
+ {{ dataset[1][model]["N@10"] | round: 2 }} ± {{ dataset[1][model]["N@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10B"] %}
+ {{ dataset[1][model]["N@10B"] | round: 2 }} ± {{ dataset[1][model]["N@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/kr/fairness-aware/language-modeling.md b/_pages/kr/fairness-aware/language-modeling.md
index 2b6291d..6e96973 100644
--- a/_pages/kr/fairness-aware/language-modeling.md
+++ b/_pages/kr/fairness-aware/language-modeling.md
@@ -3,164 +3,108 @@ layout: default
permalink: /leaderboard/kr/fairness-aware/language-modeling
---
# Fairness-Aware Language Modeling Leaderboard
+{% assign lang = 'kr' %}
- Models
- MLQA-MLM
- VSEC
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.language_modeling %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- EM↑
- CER↓
- WER↓
- CED↓
- WED↓
- PLX↓
- EM↑
- CER↓
- WER↓
- CED↓
- WED↓
- PLX↓
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.language_modeling %}
+ EM↑
+ CER↓
+ WER↓
+ CED↓
+ WED↓
+ PLX↓
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.01 ± 0.00
- 0.58 ± 0.01
- 0.70 ± 0.01
- 653.57 ± 12.05
- 150.64 ± 2.73
- 1.25 ± 0.06
- 0.30 ± 0.00
- 0.11 ± 0.00
- 0.14 ± 0.00
- 15.19 ± 0.42
- 4.12 ± 0.11
- 1.13 ± 0.00
-
-
- URA-LLaMa 13B
- 0.02 ± 0.00
- 0.40 ± 0.01
- 0.56 ± 0.01
- 518.38 ± 11.19
- 125.24 ± 2.66
- 1.48 ± 0.11
- 0.32 ± 0.00
- 0.07 ± 0.00
- 0.21 ± 0.00
- 2.98 ± 0.11
- 1.24 ± 0.03
- 1.15 ± 0.00
-
-
- URA-LLaMa 7B
- 0.01 ± 0.00
- 0.40 ± 0.01
- 0.55 ± 0.01
- 492.93 ± 11.32
- 117.82 ± 2.72
- 1.22 ± 0.01
- 0.20 ± 0.00
- 0.54 ± 0.01
- 0.67 ± 0.01
- 41.77 ± 1.57
- 10.12 ± 0.35
- 1.07 ± 0.00
-
-
- LLaMa-2 13B
- 0.01 ± 0.00
- 0.76 ± 0.00
- 0.89 ± 0.00
- 782.03 ± 11.71
- 192.66 ± 2.83
- 1.27 ± 0.04
- 0.15 ± 0.00
- 0.07 ± 0.00
- 0.22 ± 0.00
- 3.39 ± 0.16
- 1.52 ± 0.04
- 1.01 ± 0.00
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.79 ± 0.00
- 0.96 ± 0.00
- 761.38 ± 10.65
- 197.18 ± 2.66
- 1.75 ± 0.20
- 0.12 ± 0.00
- 0.35 ± 0.01
- 0.48 ± 0.01
- 47.54 ± 0.85
- 11.82 ± 0.19
- 1.06 ± 0.00
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 1.04 ± 0.00
- 1.06 ± 0.00
- 940.71 ± 12.48
- 208.05 ± 2.81
- 1.40 ± 0.00
- 0,06 ± 0.00
- 4.78 ± 0.06
- 4.80 ± 0.06
- 634.48 ± 8.58
- 145.12 ± 1.94
- 1.46 ± 0.01
-
-
- MixSUra 8x7B
- 0.00 ± -
- 0.56 ± -
- 0.63 ± -
- 535.76 ± -
- 133.64 ± -
- 1.00 ± -
- 0,07 ± -
- 0.20 ± -
- 0.29 ± -
- 25.96 ± -
- 8.79 ± -
- 1.00 ± -
-
-
- GPT-3.5
- 0.03 ± 0.00
- 0.29 ± 0.01
- 0.46 ± 0.01
- 398.19 ± 11.01
- 96.42 ± 2.54
- -
- 0.59 ± 0.00
- 0.06 ± 0.00
- 0.19 ± 0.00
- 1.99 ± 0.08
- 0.74 ± 0.02
- -
-
-
- GPT-4
- 0.06 ± 0.00
- 0.36 ± 0.01
- 0.41 ± 0.01
- 347.82 ± 10.23
- 86.96 ± 2.41
- -
- 0.67 ± 0.00
- 0.01 ± 0.00
- 0.02 ± 0.00
- 1.30 ± 0.04
- 0.54 ± 0.01
- -
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.language_modeling %}
+ {% assign EM_best = 0 %}
+ {% assign CER_best = 1 %}
+ {% assign WER_best = 1 %}
+ {% assign CED_best = 10000 %}
+ {% assign WED_best = 10000 %}
+ {% assign PLX_best = 10000 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].CER and dataset[1][m].CER < CER_best %}
+ {% assign CER_best = dataset[1][m].CER %}
+ {% endif %}
+ {% if dataset[1][m].WER and dataset[1][m].WER < WER_best %}
+ {% assign WER_best = dataset[1][m].WER %}
+ {% endif %}
+ {% if dataset[1][m].CED and dataset[1][m].CED < CED_best %}
+ {% assign CED_best = dataset[1][m].CED %}
+ {% endif %}
+ {% if dataset[1][m].WED and dataset[1][m].WED < WED_best %}
+ {% assign WED_best = dataset[1][m].WED %}
+ {% endif %}
+ {% if dataset[1][m].PLX and dataset[1][m].PLX < PLX_best %}
+ {% assign PLX_best = dataset[1][m].PLX %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].CER %}
+ {{ dataset[1][model].CER | round: 2 }} ± {{ dataset[1][model].CER_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].WER %}
+ {{ dataset[1][model].WER | round: 2 }} ± {{ dataset[1][model].WER_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].CED %}
+ {{ dataset[1][model].CED | round: 2 }} ± {{ dataset[1][model].CED_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].WED %}
+ {{ dataset[1][model].WED | round: 2 }} ± {{ dataset[1][model].WED_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].PLX %}
+ {{ dataset[1][model].PLX | round: 2 }} ± {{ dataset[1][model].PLX_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/kr/fairness-aware/question-answering.md b/_pages/kr/fairness-aware/question-answering.md
index 9d96fd8..5c18ff5 100644
--- a/_pages/kr/fairness-aware/question-answering.md
+++ b/_pages/kr/fairness-aware/question-answering.md
@@ -3,77 +3,60 @@ layout: default
permalink: /leaderboard/kr/fairness-aware/question-answering
---
# Fairness-Aware Question Answering Leaderboard
+{% assign lang = 'kr' %}
-
-
- Models
- XQuAD
- MLQA
-
-
- Exact Match↑
- F1↑
- Exact Match↑
- F1↑
-
-
-
-
- URA-LLaMa 70B
- 0.04 ± 0.00
- 0.27 ± 0.00
- 0.03 ± 0.00
- 0.25 ± 0.00
-
-
- URA-LLaMa 13B
- 0.00 ± 0.00
- 0.13 ± 0.00
- 0.00 ± 0.00
- 0.14 ± 0.00
-
-
- URA-LLaMa 7B
- 0.00 ± 0.00
- 0.13 ± 0.00
- 0.00 ± 0.00
- 0.15 ± 0.01
-
-
- LLaMa-2 13B
- 0.00 ± 0.00
- 0.03 ± 0.00
- 0.00 ± 0.00
- 0.04 ± 0.00
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.04 ± 0.00
- 0.00 ± 0.00
- 0.05 ± 0.00
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
-
-
- GPT-3.5
- 0.00 ± 0.00
- 0.24 ± 0.00
- 0.00 ± 0.00
- 0.23 ± 0.00
-
-
- GPT-4
- 0.00 ± 0.00
- 0.26 ± 0.00
- 0.00 ± 0.00
- 0.24 ± 0.00
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.question_answering %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.question_answering %}
+ EM↑
+ F1↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.question_answering %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/fairness-aware/sentiment-analysis.md b/_pages/kr/fairness-aware/sentiment-analysis.md
index b13babc..e731b8d 100644
--- a/_pages/kr/fairness-aware/sentiment-analysis.md
+++ b/_pages/kr/fairness-aware/sentiment-analysis.md
@@ -3,157 +3,96 @@ layout: default
permalink: /leaderboard/kr/fairness-aware/sentiment-analysis
---
# Fairness-Aware Sentiment Analysis Leaderboard
+{% assign lang = 'kr' %}
-
-
- Models
- VLSP 2016
- UiT-VSFC
-
-
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.65 ± 0.01
- 0.49 ± 0.01
- 0.58 ± 0.01
- 0.13 ± 0.01
- 0.77 ± 0.04
- 0.76 ± 0.01
- 0.48 ± 0.01
- 0.61 ± 0.01
- 0.17 ± 0.01
- 0.66 ± 0.03
-
-
- URA-LLaMa 13B
- 0.59 ± 0.01
- 0.57 ± 0.01
- 0.62 ± 0.01
- 0.07 ± 0.01
- 0.83 ± 0.04
- 0.75 ± 0.01
- 0.46 ± 0.08
- 0.83 ± 0.01
- 0.11 ± 0.01
- 0.88 ± 0.02
-
-
- URA-LLaMa 7B
- 0.74 ± 0.02
- 0.39 ± 0.06
- 0.83 ± 0.01
- 0.21 ± 0.02
- 0.98 ± 0.02
- 0.73 ± 0.01
- 0.73 ± 0.01
- 0.78 ± 0.01
- 0.13 ± 0.01
- 0.94 ± 0.01
-
-
- LLaMa-2 13B
- 0.51 ± 0.01
- 0.1 ± 0.06
- 0.56 ± 0.01
- 0.32 ± 0.02
- 0.79 ± 0.04
- 0.63 ± 0.01
- 0.41 ± 0.02
- 0.70 ± 0.01
- 0.13 ± 0.01
- 0.89 ± 0.02
-
-
- LLaMa-2 7B
- 0.45 ± 0.02
- 0.34 ± 0.01
- 0.53 ± 0.01
- 0.26 ± 0.02
- 0.50 ± 0.0
- 0.51 ± 0.01
- 0.55 ± 0.01
- 0.68 ± 0.01
- 0.22 ± 0.01
- 0.64 ± 0.03
-
-
- Vietcuna 7B
- 0.04 ± 0.01
- 0.04 ± 0.01
- 0.49 ± 0.01
- 0.71 ± 0.01
- 0.05 ± 0.02
- 0.03 ± 0.00
- 0.03 ± 0.00
- 0.55 ± 0.01
- 0.50 ± 0.00
- 0.01 ± 0.01
-
-
- MixSUra 8x7B
- 0.62 ± -
- 0.62 ± -
- 0.59 ± -
- 0.30 ± -
- 0.59 ± -
- 0.74 ± -
- 0.46 ± -
- 0.61 ± -
- 0.24 ± -
- 0.66 ± -
-
-
- Gemini Pro
- 0.67 ± -
- 0.50 ± -
- -
- 0.34 ± -
- 0.59 ± -
- 0.79 ± -
- 0.50 ± -
- -
- 0.46 ± -
- 0.82 ± -
-
-
- GPT-3.5
- 0.66 ± 0.01
- 0.60 ± 0.01
- -
- 0.33 ± 0.01
- 0.52 ± 0.05
- 0.86 ± 0.01
- 0.71 ± 0.01
- -
- 0.52 ± 0.01
- 0.86 ± 0.02
-
-
- GPT-4
- 0.75 ± 0.01
- 0.74 ± 0.01
- -
- 0.41 ± 0.00
- 0.73 ± 0.04
- 0.85 ± 0.01
- 0.71 ± 0.01
- -
- 0.52 ± 0.01
- 0.87 ± 0.02
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.sentiment_analysis %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.sentiment_analysis %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.sentiment_analysis %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/fairness-aware/text-classification.md b/_pages/kr/fairness-aware/text-classification.md
index cb180bb..e3d5a2a 100644
--- a/_pages/kr/fairness-aware/text-classification.md
+++ b/_pages/kr/fairness-aware/text-classification.md
@@ -3,157 +3,96 @@ layout: default
permalink: /leaderboard/kr/fairness-aware/text-classification
---
# Fairness-Aware Text Classification Leaderboard
+{% assign lang = 'kr' %}
- Models
- UiT-VSMEC
- PhoATIS
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.text_classification %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.text_classification %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.24 ± 0.02
- 0.14 ± 0.01
- 0.58 ± 0.01
- 0.26 ± 0.02
- 0.37 ± 0.06
- 0.15 ± 0.01
- 0.22 ± 0.03
- 0.31 ± 0.00
- 0.81 ± 0.01
- 0.13 ± 0.04
-
-
- URA-LLaMa 13B
- 0.31 ± 0.02
- 0.11 ± 0.01
- 0.58 ± 0.01
- 0.23 ± 0.02
- 0.57 ± 0.06
- 0.01 ± 0.01
- 0.05 ± 0.02
- 0.58 ± 0.00
- 0.84 ± 0.01
- 0.00 ± 0.01
-
-
- URA-LLaMa 7B
- 0.29 ± 0.02
- 0.11 ± 0.01
- 0.60 ± 0.01
- 0.12 ± 0.02
- 0.41 ± 0.06
- 0.00 ± 0.01
- 0.00 ± 0.00
- 0.55 ± 0.00
- 0.30 ± 0.01
- 0.01 ± 0.03
-
-
- LLaMa-2 13B
- 0.18 ± 0.02
- 0.08 ± 0.01
- 0.55 ± 0.01
- 0.45 ± 0.01
- 0.44 ± 0.06
- 0.02 ± 0.01
- 0.01 ± 0.02
- 0.57 ± 0.01
- 0.90 ± 0.01
- 0.01 ± 0.01
-
-
- LLaMa-2 7B
- 0.25 ± 0.02
- 0.11 ± 0.01
- 0.57 ± 0.01
- 0.22 ± 0.02
- 0.53 ± 0.06
- 0.02 ± 0.00
- 0.06 ± 0.01
- 0.57 ± 0.01
- 0.68 ± 0.01
- 0.01 ± 0.01
-
-
- Vietcuna 7B
- 0.15 ± 0.01
- 0.05 ± 0.01
- 0.46 ± 0.01
- 0.85 ± 0.01
- 0.16 ± 0.04
- 0.04 ± 0.01
- 0.01 ± 0.00
- 0.77 ± 0.01
- 0.21 ± 0.01
- 0.07 ± 0.03
-
-
- MixSUra 8x7B
- 0.40 ± -
- 0.36 ± -
- 0.72 ± -
- 0.53 ± -
- 0.79 ± -
- 0.81 ± -
- 0.58 ± -
- 0.96 ± -
- 0.14 ± -
- 0.91 ± -
-
-
- Gemini Pro
- 0.48 ± -
- 0.38 ± -
- -
- 0.34 ± -
- 0.43 ± -
- 0.79 ± -
- 0.67 ± -
- -
- 0.73 ± -
- 0.68 ± -
-
-
- GPT-3.5
- 0.44 ± 0.02
- 0.42 ± 0.02
- -
- 0.30 ± 0.02
- 0.36 ± 0.06
- 0.68 ± 0.02
- 0.66 ± 0.03
- -
- 0.62 ± 0.02
- 0.67 ± 0.05
-
-
- GPT-4
- 0.49 ± 0.02
- 0.47 ± 0.02
- -
- 0.35 ± 0.02
- 0.36 ± 0.06
- 0.83 ± 0.01
- 0.76 ± 0.03
- -
- 0.77 ± 0.01
- 0.87 ± 0.04
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.text_classification %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/kr/fairness-aware/toxicity-detection.md b/_pages/kr/fairness-aware/toxicity-detection.md
index 3285dca..187c4da 100644
--- a/_pages/kr/fairness-aware/toxicity-detection.md
+++ b/_pages/kr/fairness-aware/toxicity-detection.md
@@ -3,157 +3,96 @@ layout: default
permalink: /leaderboard/kr/fairness-aware/toxicity-detection
---
# Fairness-Aware Toxicity Detection Leaderboard
+{% assign lang = 'kr' %}
-
-
- Models
- UiT-ViCTSD
- UiT-ViHSD
-
-
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.41 ± 0.02
- 0.26 ± 0.01
- 0.75 ± 0.01
- 0.53 ± 0.01
- 0.33 ± 0.05
- 0.15 ± 0.00
- 0.40 ± 0.00
- 0.64 ± 0.01
- 0.58 ± 0.00
- 0.24 ± 0.02
-
-
- URA-LLaMa 13B
- 0.43 ± 0.02
- 0.29 ± 0.07
- 0.66 ± 0.01
- 0.36 ± 0.02
- 0.42 ± 0.05
- 0.24 ± 0.01
- 0.15 ± 0.00
- 0.61 ± 0.01
- 0.43 ± 0.01
- 0.21 ± 0.02
-
-
- URA-LLaMa 7B
- 0.42 ± 0.02
- 0.39 ± 0.01
- 0.60 ± 0.01
- 0.30 ± 0.01
- 0.66 ± 0.05
- 0.16 ± 0.00
- 0.10 ± 0.00
- 0.67 ± 0.01
- 0.33 ± 0.00
- 0.28 ± 0.02
-
-
- LLaMa-2 13B
- 0.27 ± 0.01
- 0.18 ± 0.01
- 0.67 ± 0.01
- 0.53 ± 0.01
- 0.57 ± 0.05
- 0.16 ± 0.00
- 0.10 ± 0.00
- 0.62 ± 0.01
- 0.59 ± 0.00
- 0.42 ± 0.02
-
-
- LLaMa-2 7B
- 0.15 ± 0.01
- 0.11 ± 0.01
- 0.62 ± 0.01
- 0.67 ± 0.01
- 0.07 ± 0.03
- 0.01 ± 0.00
- 0.01 ± 0.00
- 0.56 ± 0.01
- 0.71 ± 0.00
- 0.01 ± 0.00
-
-
- Vietcuna 7B
- 0.08 ± 0.01
- 0.09 ± 0.01
- 0.50 ± 0.01
- 0.42 ± 0.01
- 0.06 ± 0.03
- 0.62 ± 0.01
- 0.21 ± 0.00
- 0.50 ± 0.00
- 0.29 ± 0.01
- 0.62 ± 0.02
-
-
- MixSUra 8x7B
- 0.69 ± -
- 0.38 ± -
- - ± -
- 0.29 ± -
- 0.78 ± -
- 0.56 ± -
- 0.31 ± -
- 0.68 ± -
- 0.32 ± -
- 0.92 ± -
-
-
- Gemini Pro
- 0.81 ± -
- 0.43 ± -
- - ± -
- 0.31 ± -
- 0.82 ± -
- 0.70 ± -
- 0.37 ± -
- - ± -
- 0.36 ± -
- 0.69 ± -
-
-
- GPT-3.5
- 0.60 ± 0.02
- 0.52 ± 0.02
- - ± -
- 0.11 ± 0.02
- 0.63 ± 0.05
- 0.61 ± 0.01
- 0.46 ± 0.01
- - ± -
- 0.29 ± 0.01
- 0.62 ± 0.02
-
-
- GPT-4
- 0.87 ± 0.01
- 0.69 ± 0.02
- - ± -
- 0.37 ± 0.01
- 0.86 ± 0.03
- 0.76 ± 0.01
- 0.56 ± 0.01
- - ± -
- 0.43 ± 0.01
- 0.76 ± 0.02
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.toxicity_detection %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.toxicity_detection %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.toxicity_detection %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/few-shot/information-retrieval.md b/_pages/kr/few-shot/information-retrieval.md
index a297cfd..bfd814e 100644
--- a/_pages/kr/few-shot/information-retrieval.md
+++ b/_pages/kr/few-shot/information-retrieval.md
@@ -3,124 +3,84 @@ layout: default
permalink: /leaderboard/kr/few-shot/information-retrieval
---
# Few-Shot Information Retrieval Leaderboard
+{% assign lang = 'kr' %}
-
-
- Models
- mMARCO
- mRobust04
-
-
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
-
-
-
-
- URA-LLaMa 70B
- 0.05 ± 0.00
- 0.11 ± 0.00
- 0.06 ± 0.00
- 0.14 ± 0.00
- 0.04±0.00
- 0.04±0.00
- 0.03±0.00
- 0.04±0.00
-
-
- URA-LLaMa 13B
- 0.04 ± 0.00
- 0.10 ± 0.00
- 0.06 ± 0.00
- 0.14 ± 0.00
- 0.03±0.00
- 0.05±0.00
- 0.04±0.00
- 0.04±0.00
-
-
- URA-LLaMa 7B
- 0.04 ± 0.00
- 0.11 ± 0.00
- 0.06 ± 0.00
- 0.16 ± 0.00
- 0.03 ± 0.00
- 0.03 ± 0.00
- 0.02 ± 0.00
- 0.02 ± 0.00
-
-
- LLaMa-2 13B
- 0.07 ± 0.00
- 0.15 ± 0.00
- 0.09 ± 0.00
- 0.21 ± 0.00
- 0.05±0.00
- 0.04±0.00
- 0.04±0.00
- 0.04±0.00
-
-
- LLaMa-2 7B
- 0.05 ± 0.00
- 0.11 ± 0.00
- 0.07 ± 0.00
- 0.16 ± 0.00
- 0.02±0.00
- 0.03±0.00
- 0.03±0.00
- 0.02±0.00
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00±0.00
- 0.00±0.00
- 0.00±0.00
- 0.00±0.00
-
-
- MixSUra 8x7B
- 0.01 ± -
- 0.07 ± -
- 0.04 ± -
- 0.11 ± -
- 0.04±-
- 0.04±-
- 0.02±-
- 0.02±-
-
-
- GPT-3.5
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- GPT-4
- -
- -
- -
- -
- -
- -
- -
- -
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.information_retrieval %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.information_retrieval %}
+ M@10↑
+ M@10B↑
+ N@10↑
+ N@10B↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.information_retrieval %}
+ {% assign M10_best = 0 %}
+ {% assign M10B_best = 0 %}
+ {% assign N10_best = 0 %}
+ {% assign N10B_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m]["M@10"] and dataset[1][m]["M@10"] > M10_best %}
+ {% assign M10_best = dataset[1][m]["M@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["M@10B"] and dataset[1][m]["M@10B"] > M10B_best %}
+ {% assign M10B_best = dataset[1][m]["M@10B"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10"] and dataset[1][m]["N@10"] > N10_best %}
+ {% assign N10_best = dataset[1][m]["N@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10B"] and dataset[1][m]["N@10B"] > N10B_best %}
+ {% assign N10B_best = dataset[1][m]["N@10B"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model]["M@10"] %}
+ {{ dataset[1][model]["M@10"] | round: 2 }} ± {{ dataset[1][model]["M@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["M@10B"] %}
+ {{ dataset[1][model]["M@10B"] | round: 2 }} ± {{ dataset[1][model]["M@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10"] %}
+ {{ dataset[1][model]["N@10"] | round: 2 }} ± {{ dataset[1][model]["N@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10B"] %}
+ {{ dataset[1][model]["N@10B"] | round: 2 }} ± {{ dataset[1][model]["N@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/few-shot/knowledge.md b/_pages/kr/few-shot/knowledge.md
index a8e785e..6f8c380 100644
--- a/_pages/kr/few-shot/knowledge.md
+++ b/_pages/kr/few-shot/knowledge.md
@@ -2,115 +2,129 @@
layout: default
permalink: /leaderboard/kr/few-shot/knowledge
---
-# Few-Shot Knowledge Leaderboard
+# Few-shot Knowledge Leaderboard
+{% assign lang = 'kr' %}
-
-
-Models
-ZaloE2E
-ViMMRC
-
-
-EM↑
-F1↑
-AC↑
-F1↑
-AR↑
-ECE↓
-A@10↑
-
-
-
-
-URA-LLaMa 70B
-0.34 ± 0.02
-0.50 ± 0.02
-0.78 ± 0.02
-0.63 ± 0.03
-0.90 ± 0.01
-0.13 ± 0.02
-0.96 ± 0.03
-
-
-URA-LLaMa 13B
-0.26 ± 0.02
-0.40 ± 0.02
-0.62 ± 0.02
-0.50 ± 0.02
-0.69 ± 0.02
-0.18 ± 0.02
-0.65 ± 0.07
-
-
-URA-LLaMa 7B
-0.14 ± 0.02
-0.25 ± 0.02
-0.42 ± 0.02
-0.33 ± 0.02
-0.61 ± 0.02
-0.13 ± 0.02
-0.39 ± 0.07
-
-
-LLaMa-2 13B
-0.22 ± 0.02
-0.36 ± 0.02
-0.58 ± 0.02
-0.46 ± 0.02
-0.62 ± 0.02
-0.28 ± 0.02
-0.77 ± 0.06
-
-
-LLaMa-2 7B
-0.07 ± 0.01
-0.15 ± 0.01
-0.30 ± 0.02
-0.23 ± 0.02
-0.56 ± 0.02
-0.43 ± 0.02
-0.16 ± 0.05
-
-
-Vietcuna 7B
-0.07 ± 0.01
-0.19 ± 0.01
-0.31 ± 0.02
-0.18 ± 0.01
-0.50 ± 0.00
-0.06 ± 0.02
-0.31 ± 0.06
-
-
-MixSUra 8x7B
-0.19 ± -
-0.34 ± -
-0.65 ± -
-0.64 ± -
-0.54 ± -
-0.29 ± -
-0.65 ± -
-
-
-GPT-3.5
-0.49 ± 0.02
-0.64 ± 0.02
-0.90 ± 0.01
-0.73 ± 0.03
--
-0.66 ± 0.01
-0.91 ± 0.04
-
-
-GPT-4
-0.49 ± 0.02
-0.64 ± 0.02
-0.91 ± 0.01
-0.73 ± 0.04
--
-0.66 ± 0.01
-0.91 ± 0.04
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.knowledge %}
+ {% if dataset[1].num_fields == 2 %}
+
+ {{ dataset[0] }}
+
+ {% else %}
+
+ {{ dataset[0] }}
+
+ {% endif %}
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.knowledge %}
+ {% if dataset[1].num_fields == 2 %}
+ EM↑
+ F1↑
+ {% else %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endif %}
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.knowledge %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AC_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+ {% if dataset[1].num_fields == 2 %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% else %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endif %}
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/few-shot/language-modeling.md b/_pages/kr/few-shot/language-modeling.md
index d5d6771..8176ffb 100644
--- a/_pages/kr/few-shot/language-modeling.md
+++ b/_pages/kr/few-shot/language-modeling.md
@@ -3,164 +3,108 @@ layout: default
permalink: /leaderboard/kr/few-shot/language-modeling
---
# Few-Shot Language Modeling Leaderboard
+{% assign lang = 'kr' %}
- Models
- MLQA-MLM
- VSEC
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.language_modeling %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- EM↑
- CER↓
- WER↓
- CED↓
- WED↓
- PLX↓
- EM↑
- CER↓
- WER↓
- CED↓
- WED↓
- PLX↓
+ {% for dataset in site.data.leaderboard[lang].few_shot.language_modeling %}
+ EM↑
+ CER↓
+ WER↓
+ CED↓
+ WED↓
+ PLX↓
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.01 ± 0.00
- 0.54 ± 0.00
- 0.66 ± 0.00
- 669.74 ± 10.38
- 153.04 ± 2.33
- 1.32 ± 0.05
- 0.33 ± 0.00
- 0.11 ± 0.00
- 0.13 ± 0.00
- 15.09 ± 0.42
- 4.05 ± 0.11
- 1.13 ± 0.00
-
-
- URA-LLaMa 13B
- 0.01 ± 0.00
- 0.45 ± 0.01
- 0.61 ± 0.01
- 559.64 ± 11.23
- 136.97 ± 2.68
- 1.49 ± 0.10
- 0.35 ± 0.00
- 0.02 ± 0.00
- 0.04 ± 0.00
- 2.81 ± 0.12
- 1.18 ± 0.03
- 1.15 ± 0.00
-
-
- URA-LLaMa 7B
- 0.01 ± 0.00
- 0.40 ± 0.01
- 0.55 ± 0.01
- 498.36 ± 11.01
- 118.11 ± 2.58
- 1.24 ± 0.01
- 0.22 ± 0.00
- 0.32 ± 0.01
- 0.33 ± 0.01
- 41.89 ± 1.54
- 10.10 ± 0.34
- 1.07 ± 0.00
-
-
- LLaMa-2 13B
- 0.01 ± 0.00
- 0.74 ± 0.00
- 0.87 ± 0.00
- 760.98 ± 11.91
- 186.90 ± 2.85
- 1.24 ± 0.03
- 0.16 ± 0.00
- 0.03 ± 0.00
- 0.05 ± 0.00
- 3.38 ± 0.16
- 1.51 ± 0.04
- 1.01 ± 0.00
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.81 ± 0.00
- 0.98 ± 0.00
- 769.36 ± 10.51
- 198.53 ± 2.57
- 1.74 ± 0.19
- 0.12 ± 0.00
- 0.36 ± 0.01
- 0.39 ± 0.01
- 47.50 ± 0.86
- 11.80 ± 0.19
- 1.06 ± 0.00
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 1.04 ± 0.00
- 1.06 ± 0.00
- 935.65 ± 12.47
- 204.98 ± 2.79
- 1.40 ± 0.00
- 0.00 ± 0.00
- 8.00 ± 0.07
- 8.01 ± 0.07
- 1063.93 ± 7.64
- 241.74 ± 1.74
- 1.46 ± 0.00
-
-
- MixSUra 8x7B
- 0.00 ± -
- 0.55 ± -
- 0.63 ± -
- 526.79 ± -
- 131.02 ± -
- 1.00 ± -
- 0.08 ± -
- 0.19 ± -
- 0.28 ± -
- 25.13 ± -
- 8.58 ± -
- 1.00 ± -
-
-
- GPT-3.5
- 0.04 ± 0.00
- 0.28 ± 0.01
- 0.44 ± 0.01
- 387.37 ± 10.86
- 92.78 ± 2.46
- -
- 0.66 ± 0.00
- 0.01 ± 0.00
- 0.02 ± 0.00
- 1.63 ± 0.08
- 0.61 ± 0.02
- -
-
-
- GPT-4
- 0.08 ± 0.00
- 0.23 ± 0.01
- 0.40 ± 0.01
- 336.53 ± 10.18
- 83.55 ± 2.34
- -
- 0.75 ± 0.00
- 0.01 ± 0.00
- 0.01 ± 0.00
- 0.89 ± 0.04
- 0.37 ± 0.01
- -
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.language_modeling %}
+ {% assign EM_best = 0 %}
+ {% assign CER_best = 1 %}
+ {% assign WER_best = 1 %}
+ {% assign CED_best = 10000 %}
+ {% assign WED_best = 10000 %}
+ {% assign PLX_best = 10000 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].CER and dataset[1][m].CER < CER_best %}
+ {% assign CER_best = dataset[1][m].CER %}
+ {% endif %}
+ {% if dataset[1][m].WER and dataset[1][m].WER < WER_best %}
+ {% assign WER_best = dataset[1][m].WER %}
+ {% endif %}
+ {% if dataset[1][m].CED and dataset[1][m].CED < CED_best %}
+ {% assign CED_best = dataset[1][m].CED %}
+ {% endif %}
+ {% if dataset[1][m].WED and dataset[1][m].WED < WED_best %}
+ {% assign WED_best = dataset[1][m].WED %}
+ {% endif %}
+ {% if dataset[1][m].PLX and dataset[1][m].PLX < PLX_best %}
+ {% assign PLX_best = dataset[1][m].PLX %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].CER %}
+ {{ dataset[1][model].CER | round: 2 }} ± {{ dataset[1][model].CER_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].WER %}
+ {{ dataset[1][model].WER | round: 2 }} ± {{ dataset[1][model].WER_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].CED %}
+ {{ dataset[1][model].CED | round: 2 }} ± {{ dataset[1][model].CED_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].WED %}
+ {{ dataset[1][model].WED | round: 2 }} ± {{ dataset[1][model].WED_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].PLX %}
+ {{ dataset[1][model].PLX | round: 2 }} ± {{ dataset[1][model].PLX_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/kr/few-shot/reasoning.md b/_pages/kr/few-shot/reasoning.md
index d11333e..4b30766 100644
--- a/_pages/kr/few-shot/reasoning.md
+++ b/_pages/kr/few-shot/reasoning.md
@@ -3,135 +3,72 @@ layout: default
permalink: /leaderboard/kr/few-shot/reasoning
---
# Few-Shot Reasoning Leaderboard
+{% assign lang = 'kr' %}
-
-
- Models
- SR - Natural
- SR - Abstract symbol
- MATH
-
-
- EM↑
- F1↑
- Equ.↑
- EM↑
- F1↑
- Equ.↑
- EM↑
- F1↑
- Equ.↑
-
-
-
-
- URA-LLaMa 70B
- 0.14 ± 0.00
- 0.48 ± 0.00
- 0.15 ± 0.00
- 0.27 ± 0.00
- 0.85 ± 0.00
- 0.30 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.12 ± 0.02
-
-
- URA-LLaMa 13B
- 0.08 ± 0.00
- 0.42 ± 0.00
- 0.08 ± 0.00
- 0.20 ± 0.00
- 0.70 ± 0.00
- 0.17 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.01
-
-
- URA-LLaMa 7B
- 0.04 ± 0.00
- 0.38 ± 0.00
- 0.04 ± 0.00
- 0.11 ± 0.00
- 0.61 ± 0.00
- 0.10 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.07 ± 0.01
-
-
- LLaMa-2 13B
- 0.03 ± 0.00
- 0.24 ± 0.00
- 0.04 ± 0.00
- 0.19 ± 0.00
- 0.69 ± 0.00
- 0.18 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.16 ± 0.02
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.01 ± 0.00
- 0.00 ± 0.00
- 0.06 ± 0.00
- 0.44 ± 0.00
- 0.06 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.11 ± 0.01
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.14 ± 0.00
- 0.71 ± 0.00
- 0.10 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.01 ± 0.00
-
-
- MixSUra 8x7B
- 0.07 ± 0.00
- 0.41 ± 0.00
- 0.07 ± 0.00
- 0.22 ± 0.00
- 0.78 ± 0.00
- 0.23 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
-
-
- GPT-3.5
- 0.15 ± 0.00
- 0.50 ± 0.00
- 0.16 ± 0.00
- 0.26 ± 0.00
- 0.83 ± 0.00
- 0.29 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.62 ± 0.02
-
-
- GPT-4
- 0.37 ± 0.00
- 0.74 ± 0.00
- 0.42 ± 0.00
- 0.37 ± 0.00
- 0.87 ± 0.00
- 0.44 ± 0.00
- 0.00 ± 0.00
- 0.01 ± 0.00
- 0.65 ± 0.02
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.reasoning %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.reasoning %}
+ EM↑
+ F1↑
+ Equ↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.reasoning %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign Equ_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m]["Equ"] and dataset[1][m]["Equ"] > Equ_best %}
+ {% assign Equ_best = dataset[1][m]["Equ"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["Equ"] %}
+ {{ dataset[1][model]["Equ"] | round: 2 }} ± {{ dataset[1][model]["Equ_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/few-shot/sentiment-analysis.md b/_pages/kr/few-shot/sentiment-analysis.md
index 0f457fb..5d478d3 100644
--- a/_pages/kr/few-shot/sentiment-analysis.md
+++ b/_pages/kr/few-shot/sentiment-analysis.md
@@ -1,146 +1,98 @@
---
layout: default
-permalink: /leaderboard/kr/few-shot/sentiment-analysis
+permalink: /leaderboard/kr/few-shot/sentiment-analysis
---
# Few-Shot Sentiment Analysis Leaderboard
+{% assign lang = 'kr' %}
-
-
- Models
- VLSP 2016
- UiT-VSFC
-
-
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.66 ± 0.01
- 0.49 ± 0.01
- 0.72 ± 0.01
- 0.13 ± 0.01
- 0.77 ± 0.04
- 0.75 ± 0.01
- 0.48 ± 0.01
- 0.81 ± 0.01
- 0.16 ± 0.01
- 0.71 ± 0.02
-
-
- URA-LLaMa 13B
- 0.59 ± 0.01
- 0.57 ± 0.01
- 0.67 ± 0.01
- 0.09 ± 0.01
- 0.82 ± 0.04
- 0.74 ± 0.01
- 0.52 ± 0.08
- 0.83 ± 0.01
- 0.10 ± 0.01
- 0.87 ± 0.02
-
-
- URA-LLaMa 7B
- 0.57 ± 0.02
- 0.42 ± 0.05
- 0.69 ± 0.02
- 0.07 ± 0.02
- 0.77 ± 0.04
- 0.72 ± 0.01
- 0.43 ± 0.01
- 0.78 ± 0.01
- 0.13 ± 0.01
- 0.95 ± 0.03
-
-
- LLaMa-2 13B
- 0.51 ± 0.01
- 0.41 ± 0.06
- 0.66 ± 0.01
- 0.32 ± 0.02
- 0.80 ± 0.04
- 0.63 ± 0.01
- 0.46 ± 0.07
- 0.71 ± 0.01
- 0.13 ± 0.01
- 0.88 ± 0.02
-
-
- LLaMa-2 7B
- 0.45 ± 0.01
- 0.32 ± 0.01
- 0.59 ± 0.01
- 0.26 ± 0.02
- 0.50 ± 0.05
- 0.50 ± 0.01
- 0.34 ± 0.01
- 0.69 ± 0.01
- 0.23 ± 0.01
- 0.62 ± 0.03
-
-
- Vietcuna 7B
- 0.04 ± 0.01
- 0.05 ± 0.01
- 0.45 ± 0.01
- 0.71 ± 0.01
- 0.05 ± 0.02
- 0.03 ± 0.00
- 0.03 ± 0.00
- 0.53 ± 0.01
- 0.50 ± 0.00
- 0.01 ± 0.00
-
-
- MixSUra 8x7B
- 0.62 ± -
- 0.63 ± -
- 0.59 ± -
- 0.30 ± -
- 0.59 ± -
- 0.74 ± -
- 0.46 ± -
- 0.63 ± -
- 0.23 ± -
- 0.655 ± -
-
-
- GPT-3.5
- 0.65 ± 0.01
- 0.59 ± 0.1
- -
- 0.32 ± 0.01
- 0.65 ± 0.05
- 0.86 ± 0.01
- 0.73 ± 0.01
- -
- 0.52 ± 0.01
- 0.86 ± 0.02
-
-
- GPT-4
- 0.75 ± 0.01
- 0.74 ± 0.01
- -
- 0.41 ± 0.01
- 0.74 ± 0.04
- 0.85 ± 0.01
- 0.59 ± 0.09
- -
- 0.52 ± 0.01
- 0.85 ± 0.02
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.sentiment_analysis %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.sentiment_analysis %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.sentiment_analysis %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/few-shot/text-classification.md b/_pages/kr/few-shot/text-classification.md
index d369c3d..eae4ba6 100644
--- a/_pages/kr/few-shot/text-classification.md
+++ b/_pages/kr/few-shot/text-classification.md
@@ -3,144 +3,96 @@ layout: default
permalink: /leaderboard/kr/few-shot/text-classification
---
# Few-Shot Text Classification Leaderboard
+{% assign lang = 'kr' %}
-
-
-Models
-UiT-VSMEC
-PhoATIS
-
-
-AC↑
-F1↑
-AR↑
-ECE↓
-A@10↑
-AC↑
-F1↑
-AR↑
-ECE↓
-A@10↑
-
-
-
-
-URA-LLaMa 70B
-0.25 ± 0.02
-0.15 ± 0.01
-0.56 ± 0.01
-0.25 ± 0.02
-0.37 ± 0.06
-0.15 ± 0.01
-0.22 ± 0.03
-0.83 ± 0.00
-0.81 ± 0.01
-0.13 ± 0.04
-
-
-URA-LLaMa 13B
-0.32 ± 0.02
-0.12 ± 0.01
-0.58 ± 0.01
-0.22 ± 0.02
-0.57 ± 0.07
-0.01 ± 0.01
-0.06 ± 0.02
-0.47 ± 0.00
-0.84 ± 0.01
-0.00 ± 0.01
-
-
-URA-LLaMa 7B
-0.29 ± 0.02
-0.11 ± 0.01
-0.60 ± 0.01
-0.12 ± 0.02
-0.43 ± 0.06
-0.06 ± 0.01
-0.01 ± 0.00
-0.55 ± 0.00
-0.24 ± 0.01
-0.08 ± 0.03
-
-
-LLaMa-2 13B
-0.18 ± 0.02
-0.08 ± 0.01
-0.55 ± 0.01
-0.45 ± 0.01
-0.49 ± 0.07
-0.02 ± 0.01
-0.06 ± 0.02
-0.57 ± 0.01
-0.90 ± 0.01
-0.01 ± 0.01
-
-
-LLaMa-2 7B
-0.25 ± 0.02
-0.12 ± 0.01
-0.57 ± 0.01
-0.21 ± 0.02
-0.54 ± 0.06
-0.03 ± 0.01
-0.02 ± 0.01
-0.56 ± 0.01
-0.54 ± 0.01
-0.01 ± 0.01
-
-
-Vietcuna 7B
-0.15 ± 0.01
-0.05 ± 0.01
-0.46 ± 0.01
-0.85 ± 0.01
-0.15 ± 0.04
-0.04 ± 0.01
-0.01 ± 0.00
-0.63 ± 0.00
-0.21 ± 0.01
-0.07 ± 0.03
-
-
-MixSUra 8x7B
-0.40 ± -
-0.36 ± -
-0.72 ± -
-0.53 ± -
-0.79 ± -
-0.81 ± -
-0.58 ± -
-0.96 ± -
-0.14 ± -
-0.91 ± -
-
-
-GPT-3.5
-0.42 ± 0.02
-0.40 ± 0.02
--
-0.28 ± 0.02
-0.42 ± 0.06
-0.69 ± 0.02
-0.67 ± 0.03
--
-0.63 ± 0.02
-0.69 ± 0.05
-
-
-GPT-4
-0.49 ± 0.02
-0.48 ± 0.02
--
-0.35 ± 0.02
-0.49 ± 0.06
-0.85 ± 0.01
-0.78 ± 0.03
--
-0.79 ± 0.01
-0.88 ± 0.04
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.text_classification %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.text_classification %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.text_classification %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/few-shot/toxicity-detection.md b/_pages/kr/few-shot/toxicity-detection.md
index 26357cf..d917521 100644
--- a/_pages/kr/few-shot/toxicity-detection.md
+++ b/_pages/kr/few-shot/toxicity-detection.md
@@ -3,144 +3,96 @@ layout: default
permalink: /leaderboard/kr/few-shot/toxicity-detection
---
# Few-Shot Toxicity Detection Leaderboard
+{% assign lang = 'kr' %}
-
-
-Models
-UiT-ViCTSD
-UiT-ViHSD
-
-
-AC↑
-F1↑
-AR↑
-ECE↓
-A@10↑
-AC↑
-F1↑
-AR↑
-ECE↓
-A@10↑
-
-
-
-
-URA-LLaMa 70B
-0.44 ± 0.01
-0.27 ± 0.01
-0.75 ± 0.01
-0.52 ± 0.01
-0.37 ± 0.02
-0.17 ± 0.00
-0.15 ± 0.00
-0.64 ± 0.01
-0.57 ± 0.00
-0.27 ± 0.02
-
-
-URA-LLaMa 13B
-0.44 ± 0.01
-0.30 ± 0.05
-0.67 ± 0.01
-0.33 ± 0.01
-0.41 ± 0.03
-0.26 ± 0.01
-0.16 ± 0.00
-0.61 ± 0.01
-0.42 ± 0.01
-0.21 ± 0.02
-
-
-URA-LLaMa 7B
-0.43 ± 0.01
-0.40 ± 0.01
-0.60 ± 0.01
-0.29 ± 0.01
-0.71 ± 0.02
-0.16 ± 0.00
-0.10 ± 0.00
-0.67 ± 0.01
-0.32 ± 0.00
-0.28 ± 0.02
-
-
-LLaMa-2 13B
-0.28 ± 0.01
-0.19 ± 0.00
-0.67 ± 0.01
-0.52 ± 0.01
-0.63 ± 0.03
-0.17 ± 0.00
-0.11 ± 0.00
-0.62 ± 0.01
-0.58 ± 0.00
-0.44 ± 0.02
-
-
-LLaMa-2 7B
-0.16 ± 0.01
-0.12 ± 0.01
-0.61 ± 0.01
-0.66 ± 0.01
-0.08 ± 0.02
-0.01 ± 0.00
-0.01 ± 0.00
-0.56 ± 0.01
-0.71 ± 0.00
-0.01 ± 0.02
-
-
-Vietcuna 7B
-0.08 ± 0.00
-0.10 ± 0.01
-0.50 ± 0.00
-0.42 ± 0.00
-0.08 ± 0.03
-0.61 ± 0.01
-0.21 ± 0.00
-0.50 ± 0.00
-0.28 ± 0.01
-0.61 ± 0.02
-
-
-MixSUra 8x7B
-0.70 ± -
-0.39 ± -
-- ± -
-0.29 ± -
-0.80 ± -
-0.58 ± -
-0.31 ± -
-0.68 ± -
-0.30 ± -
-0.93 ± -
-
-
-GPT-3.5
-0.63 ± 0.02
-0.54 ± 0.02
--
-0.13 ± 0.02
-0.63 ± 0.05
-0.63 ± 0.01
-0.47 ± 0.01
--
-0.29 ± 0.01
-0.63 ± 0.02
-
-
-GPT-4
-0.89 ± 0.00
-0.71 ± 0.01
--
-0.39 ± 0.00
-0.89 ± 0.03
-0.77 ± 0.01
-0.57 ± 0.01
--
-0.44 ± 0.01
-0.77 ± 0.02
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.toxicity_detection %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.toxicity_detection %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.toxicity_detection %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/few-shot/translation.md b/_pages/kr/few-shot/translation.md
index 571f46c..b30a503 100644
--- a/_pages/kr/few-shot/translation.md
+++ b/_pages/kr/few-shot/translation.md
@@ -3,124 +3,84 @@ layout: default
permalink: /leaderboard/kr/few-shot/translation
---
# Few-Shot Translation Leaderboard
+{% assign lang = 'kr' %}
- Models
- PhoMT
- OPUS100
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.translation %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- (En -> Vi)
- (Vi -> En)
- (En -> Vi)
- (Vi -> En)
- (En -> Vi)
- (Vi -> En)
- (En -> Vi)
- (Vi -> En)
+ {% for dataset in site.data.leaderboard[lang].few_shot.translation %}
+ BLEU envi↑
+ BLEU vien↑
+ hLEPOR envi↑
+ hLEPOR vien↑
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.28 ± 0.00
- 0.59 ± 0.00
- 0.27 ± 0.00
- 0.58 ± 0.00
- 0.10 ± 0.00
- 0.44 ± 0.01
- 0.14 ± 0.00
- 0.41 ± 0.01
-
-
- URA-LLaMa 13B
- 0.25 ± 0.00
- 0.55 ± 0.00
- 0.15 ± 0.00
- 0.56 ± 0.00
- 0.10 ± 0.01
- 0.41 ± 0.01
- 0.17 ± 0.01
- 0.43 ± 0.01
-
-
- URA-LLaMa 7B
- 0.19 ± 0.00
- 0.50 ± 0.00
- 0.22 ± 0.00
- 0.54 ± 0.00
- 0.08 ± 0.00
- 0.38 ± 0.01
- 0.14 ± 0.01
- 0.39 ± 0.01
-
-
- LLaMa-2 13B
- 0.23 ± 0.00
- 0.53 ± 0.00
- 0.23 ± 0.00
- 0.54 ± 0.00
- 0.09 ± 0.00
- 0.39 ± 0.01
- 0.14 ± 0.01
- 0.40 ± 0.01
-
-
- LLaMa-2 7B
- 0.18 ± 0.00
- 0.47 ± 0.00
- 0.21 ± 0.00
- 0.52 ± 0.00
- 0.07 ± 0.00
- 0.34 ± 0.00
- 0.11 ± 0.01
- 0.36 ± 0.01
-
-
- Vietcuna 7B
- 0.15 ± 0.00
- 0.35 ± 0.00
- 0.03 ± 0.00
- 0.11 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.05 ± 0.00
- 0.16 ± 0.00
-
-
- MixSUra 8x7B
- 0.15 ± -
- 0.51 ± -
- 0.16 ± -
- 0.52 ± -
- 0.07 ± -
- 0.37 ± -
- 0.09 ± -
- 0.36 ± -
-
-
- GPT-3.5
- 0.33 ± 0.00
- 0.65 ± 0.00
- 0.33 ± 0.00
- 0.63 ± 0.00
- 0.16 ± 0.01
- 0.50 ± 0.01
- 0.24 ± 0.01
- 0.51 ± 0.00
-
-
- GPT-4
- 0.33 ± 0.00
- 0.66 ± 0.00
- 0.34 ± 0.00
- 0.65 ± 0.00
- 0.17 ± 0.01
- 0.51 ± 0.01
- 0.25 ± 0.01
- 0.53 ± 0.00
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.translation %}
+ {% assign bleu_envi_best = 0 %}
+ {% assign bleu_vien_best = 0 %}
+ {% assign hlepor_envi_best = 0 %}
+ {% assign hlepor_vien_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m]["BLEU envi"] and dataset[1][m]["BLEU envi"] > bleu_envi_best %}
+ {% assign bleu_envi_best = dataset[1][m]["BLEU envi"] %}
+ {% endif %}
+ {% if dataset[1][m]["BLEU vien"] and dataset[1][m]["BLEU vien"] > bleu_vien_best %}
+ {% assign bleu_vien_best = dataset[1][m]["BLEU vien"] %}
+ {% endif %}
+ {% if dataset[1][m]["hLEPOR envi"] and dataset[1][m]["hLEPOR envi"] > hlepor_envi_best %}
+ {% assign hlepor_envi_best = dataset[1][m]["hLEPOR envi"] %}
+ {% endif %}
+ {% if dataset[1][m]["hLEPOR vien"] and dataset[1][m]["hLEPOR vien"] > hlepor_vien_best %}
+ {% assign hlepor_vien_best = dataset[1][m]["hLEPOR vien"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model]["BLEU envi"] %}
+ {{ dataset[1][model]["BLEU envi"] | round: 2 }} ± {{ dataset[1][model]["BLEU envi_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["BLEU vien"] %}
+ {{ dataset[1][model]["BLEU vien"] | round: 2 }} ± {{ dataset[1][model]["BLEU vien_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["hLEPOR envi"] %}
+ {{ dataset[1][model]["hLEPOR envi"] | round: 2 }} ± {{ dataset[1][model]["hLEPOR envi_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["hLEPOR vien"] %}
+ {{ dataset[1][model]["hLEPOR vien"] | round: 2 }} ± {{ dataset[1][model]["hLEPOR vien_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/kr/medium-prompt/question-answering.md b/_pages/kr/medium-prompt/question-answering.md
index 246c507..033c20f 100644
--- a/_pages/kr/medium-prompt/question-answering.md
+++ b/_pages/kr/medium-prompt/question-answering.md
@@ -3,63 +3,60 @@ layout: default
permalink: /leaderboard/kr/medium-prompt/question-answering
---
# Medium-Prompt Question Answering Leaderboard
+{% assign lang = 'kr' %}
- Models
- XQuAD
- MLQA
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].medium_prompt.question_answering %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- EM↑
- F1↑
- EM↑
- F1↑
+ {% for dataset in site.data.leaderboard[lang].medium_prompt.question_answering %}
+ EM↑
+ F1↑
+ {% endfor %}
-
- URA-LLaMa 70B
- 0.08 ± 0.00
- 0.33 ± 0.00
- 0.07 ± 0.00
- 0.31 ± 0.00
-
-
- URA-LLaMa 13B
- 0.04 ± 0.00
- 0.21 ± 0.00
- 0.04 ± 0.00
- 0.19 ± 0.00
-
-
- URA-LLaMa 7B
- 0.01 ± 0.00
- 0.11 ± 0.00
- 0.01 ± 0.00
- 0.11 ± 0.00
-
-
- LLaMa-2 13B
- 0.00 ± 0.00
- 0.10 ± 0.00
- 0.00 ± 0.00
- 0.09 ± 0.00
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.03 ± 0.00
- 0.00 ± 0.00
- 0.03 ± 0.00
-
-
- MixSUra 8x7B
- 0.01 ± -
- 0.25 ± -
- 0.00 ± -
- 0.25 ± -
-
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].medium_prompt.question_answering %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/kr/medium-prompt/summarization.md b/_pages/kr/medium-prompt/summarization.md
index 4bef8d5..ab5cca3 100644
--- a/_pages/kr/medium-prompt/summarization.md
+++ b/_pages/kr/medium-prompt/summarization.md
@@ -3,147 +3,132 @@ layout: default
permalink: /leaderboard/kr/medium-prompt/summarization
---
# Medium-Prompt Summarization Leaderboard
+{% assign lang = 'kr' %}
-
-
-Models
-VietNews
-WikiLingua
-
-
-R1↑
-R2↑
-RL↑
-SC↑
-BS↑
-Cv↑
-De↑
-Cp↑
-R1↑
-R2↑
-RL↑
-SC↑
-BS↑
-Cv↑
-De↑
-Cp↑
-
-
-
-
-URA-LLaMa 70B
-0.35 ± 0.00
-0.16 ± 0.00
-0.24 ± 0.00
--0.11 ± 0.00
-0.12 ± 0.00
-0.63 ± 0.00
-5.43 ± 0.02
-37.78 ± 0.47
-0.33 ± 0.00
-0.14 ± 0.00
-0.22 ± 0.00
--0.16± 0.00
-0.24± 0.10
-0.59 ± 0.01
-4.62 ± 0.11
-56.56 ± 1.70
-
-
-URA-LLaMa 13B
-0.26 ± 0.00
-0.12 ± 0.00
-0.17 ± 0.00
--0.09 ± 0.00
--0.08 ± 0.18
-0.46 ± 0.00
-3.55 ± 0.04
-47.75 ± 0.65
-0.14 ± 0.00
-0.05 ± 0.00
-0.09 ± 0.00
--0.16 ± 0.00
--0.14 ± 0.12
-0.26 ± 0.01
-1.83 ± 0.06
-60.10 ± 2.16
-
-
-URA-LLaMa 7B
-0.41 ± 0.00
-0.18 ± 0.00
-0.27 ± 0.00
--0.09 ± 0.00
--0.08 ± 0.13
-0.83 ± 0.00
-8.13 ± 0.04
-8.08 ± 0.17
-0.42 ± 0.00
-0.17 ± 0.00
-0.27 ± 0.00
--0.16 ± 0.00
-0.27 ± 0.21
-0.84 ± 0.00
-7.15 ± 0.08
-8.08 ± 0.36
-
-
-LLaMa-2 13B
-0.02 ± 0.00
-0.00 ± 0.00
-0.02 ± 0.00
--0.09 ± 0.00
--0.19 ± 0.05
-0.01 ± 0.00
-0.01 ± 0.00
-54.67 ± 0.16
-0.03 ± 0.00
-0.00 ± 0.00
-0.03 ± 0.00
--0.16 ± 0.00
--0.05 ± 0.03
-0.02 ± 0.00
-0.02 ± 0.00
-42.55 ± 0.81
-
-
-LLaMa-2 7B
-0.03 ± 0.00
-0.01 ± 0.00
-0.03 ± 0.00
--0.09 ± 0.00
--0.17 ± 0.03
-0.04 ± 0.00
-0.07 ± 0.00
-23.86 ± 0.26
-0.02 ± 0.00
-0.00 ± 0.00
-0.02 ± 0.00
--0.16 ± 0.00
--0.04 ± 0.06
-0.02 ± 0.00
-0.03 ± 0.00
-40.31 ± 0.88
-
-
-MixSUra 8x7B
-0.06 ± -
-0.01 ± -
-0.04 ± -
-- ± -
--0.13 ± -
-0.10 ± -
-0.17 ± -
-9.03 ± -
-0.03 ± -
-0.00 ± -
-0.03 ± -
-- ± -
--0.01 ± -
-0.17 ± -
-0.26 ± -
-16.68 ± -
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].medium_prompt.summarization %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].medium_prompt.summarization %}
+ R1↑
+ R2↑
+ RL↑
+ SC↑
+ BS↑
+ Cv↑
+ De↑
+ Cp↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].medium_prompt.summarization %}
+ {% assign R1_best = 0 %}
+ {% assign R2_best = 0 %}
+ {% assign RL_best = 0 %}
+ {% assign SC_best = -1 %}
+ {% assign BS_best = 0 %}
+ {% assign Cv_best = 0 %}
+ {% assign De_best = 0 %}
+ {% assign Cp_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].R1 and dataset[1][m].R1 > R1_best %}
+ {% assign R1_best = dataset[1][m].R1 %}
+ {% endif %}
+ {% if dataset[1][m].R2 and dataset[1][m].R2 > R2_best %}
+ {% assign R2_best = dataset[1][m].R2 %}
+ {% endif %}
+ {% if dataset[1][m].RL and dataset[1][m].RL > RL_best %}
+ {% assign RL_best = dataset[1][m].RL %}
+ {% endif %}
+ {% if dataset[1][m].SC and dataset[1][m].SC > SC_best %}
+ {% assign SC_best = dataset[1][m].SC %}
+ {% endif %}
+ {% if dataset[1][m].BS and dataset[1][m].BS > BS_best %}
+ {% assign BS_best = dataset[1][m].BS %}
+ {% endif %}
+ {% if dataset[1][m].Cv and dataset[1][m].Cv > Cv_best %}
+ {% assign Cv_best = dataset[1][m].Cv %}
+ {% endif %}
+ {% if dataset[1][m].De and dataset[1][m].De > De_best %}
+ {% assign De_best = dataset[1][m].De %}
+ {% endif %}
+ {% if dataset[1][m].Cp and dataset[1][m].Cp > Cp_best %}
+ {% assign Cp_best = dataset[1][m].Cp %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].R1 %}
+ {{ dataset[1][model].R1 | round: 2 }} ± {{ dataset[1][model].R1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].R2 %}
+ {{ dataset[1][model].R2 | round: 2 }} ± {{ dataset[1][model].R2_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].RL %}
+ {{ dataset[1][model].RL | round: 2 }} ± {{ dataset[1][model].RL_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SC %}
+ {{ dataset[1][model].SC | round: 2 }} ± {{ dataset[1][model].SC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].BS %}
+ {{ dataset[1][model].BS | round: 2 }} ± {{ dataset[1][model].BS_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cv %}
+ {{ dataset[1][model].Cv | round: 2 }} ± {{ dataset[1][model].Cv_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].De %}
+ {{ dataset[1][model].De | round: 2 }} ± {{ dataset[1][model].De_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cp %}
+ {{ dataset[1][model].Cp | round: 2 }} ± {{ dataset[1][model].Cp_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/randomized-choice/knowledge.md b/_pages/kr/randomized-choice/knowledge.md
index 49a68be..a85022b 100644
--- a/_pages/kr/randomized-choice/knowledge.md
+++ b/_pages/kr/randomized-choice/knowledge.md
@@ -4,90 +4,96 @@ permalink: /leaderboard/kr/randomized-choice/knowledge
---
# Randomized-Choice Knowledge Leaderboard
+{% assign lang = 'kr' %}
- Models
- AC ↑
- F1 ↑
- AR ↑
- ECE ↓
- A@10 ↑
-
-
-
-
- Our 70B
- 0.76 ± 0.02
- 0.76 ± 0.02
- 0.78 ± 0.01
- 0.14 ± 0.02
- 0.94 ± 0.04
-
-
- Our 13B
- 0.62 ± 0.02
- 0.62 ± 0.02
- 0.61 ± 0.02
- 0.15 ± 0.02
- 0.67 ± 0.07
-
-
- Our 7B
- 0.45 ± 0.02
- 0.36 ± 0.02
- 0.57 ± 0.02
- 0.10 ± 0.02
- 0.45 ± 0.07
-
-
- LLaMa-2 13B
- 0.57 ± 0.02
- 0.57 ± 0.02
- 0.57 ± 0.02
- 0.29 ± 0.02
- 0.75 ± 0.07
-
-
- LLaMa-2 7B
- 0.36 ± 0.02
- 0.27 ± 0.02
- 0.56 ± 0.02
- 0.37 ± 0.02
- 0.44 ± 0.07
-
-
- Vietcuna 7B
- 0.26 ± 0.02
- 0.15 ± 0.01
- 0.50 ± 0.00
- 0.01 ± 0.01
- 0.26 ± 0.06
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].randomized_choice.knowledge %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- MixSUra 7B
- 0.61 ± -
- 0.61 ± -
- 0.54 ± -
- 0.31 ± -
- 0.65 ± -
-
-
- GPT-3.5
- 0.92 ± 0.01
- 0.74 ± 0.04
- -
- 0.67 ± 0.01
- 0.92 ± 0.04
+ {% for dataset in site.data.leaderboard[lang].randomized_choice.knowledge %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
- GPT-4
- 0.92 ± 0.01
- 0.74 ± 0.04
- -
- 0.67 ± 0.01
- 0.92 ± 0.04
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].randomized_choice.knowledge %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/kr/robustness-aware/information-retrieval.md b/_pages/kr/robustness-aware/information-retrieval.md
index 9239acd..645f8b2 100644
--- a/_pages/kr/robustness-aware/information-retrieval.md
+++ b/_pages/kr/robustness-aware/information-retrieval.md
@@ -3,113 +3,84 @@ layout: default
permalink: /leaderboard/kr/robustness-aware/information-retrieval
---
# Robustness-Aware Information Retrieval Leaderboard
+{% assign lang = 'kr' %}
-
-
- Models
- mMARCO
- mRobust04
-
-
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
-
-
-
-
- URA-LLaMa 70B
-
-
-
-
-
-
-
-
-
-
- URA-LLaMa 13B
-
-
-
-
-
-
-
-
-
-
- URA-LLaMa 7B
- 0.05 ± 0.00
- 0.11 ± 0.00
- 0.07 ± 0.00
- 0.17 ± 0.00
- -
- -
- -
- -
-
-
- LLaMa-2 13B
- 0.06 ± 0.00
- 0.13 ± 0.00
- 0.19 ± 0.00
- 0.19 ± 0.00
-
-
-
-
-
-
- LLaMa-2 7B
- 0.05 ± 0.00
- 0.11 ± 0.00
- 0.08 ± 0.00
- 0.16 ± 0.00
- -
- -
- -
- -
-
-
- Vietcuna 7B
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- GPT-3.5
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- GPT-4
- -
- -
- -
- -
- -
- -
- -
- -
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.information_retrieval %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.information_retrieval %}
+ M@10↑
+ M@10B↑
+ N@10↑
+ N@10B↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.information_retrieval %}
+ {% assign M10_best = 0 %}
+ {% assign M10B_best = 0 %}
+ {% assign N10_best = 0 %}
+ {% assign N10B_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m]["M@10"] and dataset[1][m]["M@10"] > M10_best %}
+ {% assign M10_best = dataset[1][m]["M@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["M@10B"] and dataset[1][m]["M@10B"] > M10B_best %}
+ {% assign M10B_best = dataset[1][m]["M@10B"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10"] and dataset[1][m]["N@10"] > N10_best %}
+ {% assign N10_best = dataset[1][m]["N@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10B"] and dataset[1][m]["N@10B"] > N10B_best %}
+ {% assign N10B_best = dataset[1][m]["N@10B"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model]["M@10"] %}
+ {{ dataset[1][model]["M@10"] | round: 2 }} ± {{ dataset[1][model]["M@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["M@10B"] %}
+ {{ dataset[1][model]["M@10B"] | round: 2 }} ± {{ dataset[1][model]["M@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10"] %}
+ {{ dataset[1][model]["N@10"] | round: 2 }} ± {{ dataset[1][model]["N@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10B"] %}
+ {{ dataset[1][model]["N@10B"] | round: 2 }} ± {{ dataset[1][model]["N@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/robustness-aware/knowledge.md b/_pages/kr/robustness-aware/knowledge.md
index 647b533..9fea7c7 100644
--- a/_pages/kr/robustness-aware/knowledge.md
+++ b/_pages/kr/robustness-aware/knowledge.md
@@ -3,114 +3,128 @@ layout: default
permalink: /leaderboard/kr/robustness-aware/knowledge
---
# Robustness-Aware Knowledge Leaderboard
+{% assign lang = 'kr' %}
-
-
- Models
- ZaloE2E
- ViMMRC
-
-
- EM↑
- F1↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.23 ± 0.00
- 0.37 ± 0.00
- 0.65 ± 0.00
- 0.53 ± 0.00
- 0.84 ± 0.00
- 0.11 ± 0.00
- 0.77 ± 0.00
-
-
- URA-LLaMa 13B
- 0.18 ± 0.00
- 0.30 ± 0.00
- 0.41 ± 0.00
- 0.34 ± 0.00
- 0.61 ± 0.00
- 0.22 ± 0.00
- 0.58 ± 0.00
-
-
- URA-LLaMa 7B
- 0.10 ± 0.00
- 0.18 ± 0.00
- 0.33 ± 0.02
- 0.28 ± 0.02
- 0.61 ± 0.01
- 0.19 ± 0.02
- 0.33 ± 0.06
-
-
- LLaMa-2 13B
- 0.13 ± 0.00
- 0.21 ± 0.00
- 0.39 ± 0.00
- 0.31 ± 0.00
- 0.56 ± 0.00
- 0.46 ± 0.00
- 0.33 ± 0.00
-
-
- LLaMa-2 7B
- 0.02 ± 0.00
- 0.05 ± 0.00
- 0.26 ± 0.01
- 0.20 ± 0.01
- 0.51 ± 0.01
- 0.46 ± 0.01
- 0.13 ± 0.03
-
-
- Vietcuna 7B
- 0.05 ± 0.00
- 0.15 ± 0.00
- 0.26 ± 0.01
- 0.14 ± 0.00
- 0.50 ± 0.00
- 0.01 ± 0.01
- 0.21 ± 0.07
-
-
- MixSUra 8x7B
- 0.13 ± -
- 0.24 ± -
- 0.57 ± -
- 0.45 ± -
- 0.53 ± -
- 0.35 ± -
- 0.58 ± -
-
-
- GPT-3.5
- 0.45 ± 0.01
- 0.61 ± 0.01
- 0.90 ± 0.01
- 0.72 ± 0.04
- -
- 0.65 ± 0.01
- 0.88 ± 0.07
-
-
- GPT-4
- 0.44 ± 0.01
- 0.61 ± 0.01
- 0.91 ± 0.01
- 0.73 ± 0.07
- -
- 0.66 ± 0.07
- 0.88 ± 0.04
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.knowledge %}
+ {% if dataset[1].num_fields == 2 %}
+
+ {{ dataset[0] }}
+
+ {% else %}
+
+ {{ dataset[0] }}
+
+ {% endif %}
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.knowledge %}
+ {% if dataset[1].num_fields == 2 %}
+ EM↑
+ F1↑
+ {% else %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endif %}
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.knowledge %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AC_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+ {% if dataset[1].num_fields == 2 %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% else %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endif %}
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/robustness-aware/question-answering.md b/_pages/kr/robustness-aware/question-answering.md
index f3e0218..251ef70 100644
--- a/_pages/kr/robustness-aware/question-answering.md
+++ b/_pages/kr/robustness-aware/question-answering.md
@@ -3,84 +3,60 @@ layout: default
permalink: /leaderboard/kr/robustness-aware/question-answering
---
# Robustness-Aware Question Answering Leaderboard
+{% assign lang = 'kr' %}
- Models
- XQuAD
- MLQA
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.question_answering %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- EM↑
- F1↑
- EM↑
- F1↑
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.question_answering %}
+ EM↑
+ F1↑
+ {% endfor %}
-
- URA-LLaMa 70B
- 0.01 ± 0.00
- 0.17 ± 0.00
- 0.01 ± 0.00
- 0.18 ± 0.00
-
-
- URA-LLaMa 13B
- 0.00 ± 0.00
- 0.09 ± 0.00
- 0.00 ± 0.00
- 0.10 ± 0.00
-
-
- URA-LLaMa 7B
- 0.00 ± 0.00
- 0.09 ± 0.00
- 0.00 ± 0.00
- 0.10 ± 0.00
-
-
- LLaMa-2 13B
- 0.00 ± 0.00
- 0.02 ± 0.00
- 0.00 ± 0.00
- 0.03 ± 0.00
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.02 ± 0.00
- 0.00 ± 0.00
- 0.02 ± 0.00
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 0.06 ± 0.00
- 0.00 ± 0.00
- 0.05 ± 0.00
-
-
- MixSUra 8x7B
- 0.00 ± -
- 0.11 ± -
- 0.00 ± -
- 0.12 ± -
-
-
- GPT-3.5
- 0.00 ± 0.00
- 0.19 ± 0.00
- 0.00 ± 0.00
- 0.20 ± 0.00
-
-
- GPT-4
- 0.00 ± 0.00
- 0.24 ± 0.00
- 0.00 ± 0.00
- 0.25 ± 0.00
-
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.question_answering %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/kr/robustness-aware/sentiment-analysis.md b/_pages/kr/robustness-aware/sentiment-analysis.md
index 1b53f39..aef4e20 100644
--- a/_pages/kr/robustness-aware/sentiment-analysis.md
+++ b/_pages/kr/robustness-aware/sentiment-analysis.md
@@ -3,144 +3,96 @@ layout: default
permalink: /leaderboard/kr/robustness-aware/sentiment-analysis
---
# Robustness-Aware Sentiment Analysis Leaderboard
+{% assign lang = 'kr' %}
-
-
- Models
- VLSP 2016
- UiT-VSFC
-
-
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.63 ± 0.01
- 0.48 ± 0.01
- 0.60 ± 0.01
- 0.09 ± 0.01
- 0.83 ± 0.04
- 0.71 ± 0.01
- 0.45 ± 0.01
- 0.80 ± 0.01
- 0.08 ± 0.01
- 0.99 ± 0.01
-
-
- URA-LLaMa 13B
- 0.55 ± 0.02
- 0.52 ± 0.02
- 0.59 ± 0.01
- 0.06 ± 0.01
- 0.74 ± 0.05
- 0.72 ± 0.01
- 0.44 ± 0.05
- 0.77 ± 0.01
- 0.18 ± 0.01
- 0.77 ± 0.02
-
-
- URA-LLaMa 7B
- 0.52 ± 0.02
- 0.36 ± 0.03
- 0.59 ± 0.01
- 0.07 ± 0.01
- 0.66 ± 0.05
- 0.73 ± 0.01
- 0.41 ± 0.01
- 0.71 ± 0.01
- 0.16 ± 0.01
- 0.87 ± 0.02
-
-
- LLaMa-2 13B
- 0.46 ± 0.02
- 0.30 ± 0.01
- 0.55 ± 0.01
- 0.39 ± 0.02
- 0.70 ± 0.05
- 0.66 ± 0.01
- 0.40 ± 0.01
- 0.63 ± 0.01
- 0.11 ± 0.01
- 0.89 ± 0.02
-
-
- LLaMa-2 7B
- 0.45 ± 0.02
- 0.36 ± 0.01
- 0.54 ± 0.01
- 0.20 ± 0.02
- 0.51 ± 0.05
- 0.51 ± 0.01
- 0.33 ± 0.01
- 0.65 ± 0.01
- 0.15 ± 0.01
- 0.80 ± 0.02
-
-
- Vietcuna 7B
- 0.44 ± 0.02
- 0.27 ± 0.01
- 0.51 ± 0.01
- 0.23 ± 0.02
- 0.53 ± 0.05
- 0.49 ± 0.01
- 0.25 ± 0.03
- 0.46 ± 0.01
- 0.33 ± 0.01
- 0.34 ± 0.03
-
-
- MixSUra 8x7B
- 0.59 ± -
- 0.59 ± -
- 0.55 ± -
- 0.34 ± -
- 0.52 ± -
- 0.69 ± -
- 0.44 ± -
- 0.61 ± -
- 0.29 ± -
- 0.66 ± -
-
-
- GPT-3.5
- 0.64 ± 0.01
- 0.60 ± 0.01
- -
- 0.31 ± 0.01
- 0.54 ± 0.05
- 0.86 ± 0.01
- 0.71 ± 0.01
- -
- 0.53 ± 0.01
- 0.86 ± 0.02
-
-
- GPT-4
- 0.74 ± 0.00
- 0.73 ± 0.00
- -
- 0.41 ± 0.00
- 0.71 ± 0.00
- 0.83 ± 0.00
- 0.70 ± 0.00
- -
- 0.50 ± 0.00
- 0.85 ± 0.00
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.sentiment_analysis %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.sentiment_analysis %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.sentiment_analysis %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/robustness-aware/summarization.md b/_pages/kr/robustness-aware/summarization.md
index e4e847d..4533611 100644
--- a/_pages/kr/robustness-aware/summarization.md
+++ b/_pages/kr/robustness-aware/summarization.md
@@ -3,204 +3,132 @@ layout: default
permalink: /leaderboard/kr/robustness-aware/summarization
---
# Robustness-Aware Summarization Leaderboard
+{% assign lang = 'kr' %}
-
-
- Models
- VietNews
- WikiLingua
-
-
- R1↑
- R2↑
- RL↑
- SC↑
- BS↑
- Cv↑
- De↑
- Cp↑
- R1↑
- R2↑
- RL↑
- SC↑
- BS↑
- Cv↑
- De↑
- Cp↑
-
-
-
-
- URA-LLaMa 70B
- 0.34 ± 0.00
- 0.15 ± 0.00
- 0.23 ± 0.00
- -0.06 ± 0.00
- -0.11 ± 0.18
- 0.10 ± 0.00
- 0.10 ± 0.00
- 39.63 ± 0.87
- 0.28 ± 0.00
- 0.11 ± 0.00
- 0.19 ± 0.00
- -0.16 ± 0.00
- 0.25 ± 0.23
- 0.50 ± 0.01
- 0.50 ± 0.01
- 167.42 ± 7.09
-
-
- URA-LLaMa 13B
- 0.35 ± 0.00
- 0.14 ± 0.00
- 0.23 ± 0.00
- -0.09 ± 0.00
- -0.07 ± 0.17
- 0.64 ± 0.00
- 0.65 ± 0.00
- 134.65 ± 3.76
- 0.20 ± 0.00
- 0.07 ± 0.00
- 0.13 ± 0.00
- -0.17 ± 0.00
- 0.20 ± 0.11
- 0.38 ± 0.00
- 0.38 ± 0.00
- 103.69 ± 3.33
-
-
- URA-LLaMa 7B
- 0.37 ± 0.00
- 0.12 ± 0.00
- 0.24 ± 0.00
- -0.10 ± 0.00
- -0.24 ± 0.18
- 0.65 ± 0.00
- 0.65 ± 0.00
- 17.92 ± 0.87
- 0.37 ± 0.00
- 0.12 ± 0.00
- 0.24 ± 0.00
- -0.17 ± 0.00
- 0.11 ± 0.18
- 0.65 ± 0.00
- 0.65 ± 0.00
- 20.49 ± 0.95
-
-
- LLaMa-2 13B
- 0.05 ± 0.00
- 0.01 ± 0.00
- 0.04 ± 0.00
- -0.15 ± 0.00
- -0.24 ± 0.18
- 0.03 ± 0.00
- 0.03 ± 0.00
- 55.91 ± 0.65
- 0.04 ± 0.00
- 0.00 ± 0.00
- 0.03 ± 0.00
- -0.17 ± 0.00
- 0.09 ± 0.00
- 0.05 ± 0.00
- 0.05 ± 0.00
- 66.85 ± 6.72
-
-
- LLaMa-2 7B
- 0.05 ± 0.00
- 0.01 ± 0.00
- 0.05 ± 0.00
- -0.10 ± 0.00
- -0.19 ± 0.04
- 0.07 ± 0.00
- 0.07 ± 0.00
- 55.29 ± 0.88
- 0.04 ± 0.00
- 0.00 ± 0.00
- 0.04 ± 0.00
- -0.17 ± 0.00
- 0.15 ± 0.00
- 0.06 ± 0.00
- 0.06 ± 0.00
- 58.32 ± 3.32
-
-
- Vietcuna 7B
- 0.03 ± 0.00
- 0.01 ± 0.00
- 0.02 ± 0.00
- -0.10 ± 0.00
- -0.18 ± 0.06
- 0.91 ± 0.00
- 0.91 ± 0.00
- 1026.61 ± 3.86
- 0.08 ± 0.00
- 0.02 ± 0.00
- 0.05 ± 0.00
- -0.17 ± 0.00
- -0.19 ± 0.05
- 0.78 ± 0.00
- 0.78 ± 0.00
- 505.45 ± 8.64
-
-
- MixSUra 8x7B
- 0.41 ± -
- 0.19 ± -
- 0.26 ± -
- - ± -
- -0.03 ± -
- 0.86 ± -
- 0.87 ± -
- 29.15 ± -
- 0.46 ± -
- 0.21 ± -
- 0.28 ± -
- - ± -
- 0.26 ± -
- 0.88 ± -
- 0.98 ± -
- 19.10 ± -
-
-
- GPT-3.5
- 0.34 ± 0.00
- 0.19 ± 0.00
- 0.23 ± 0.00
- -0.10 ± 0.00
- 0.05 ± 0.14
- 0.81 ± 0.00
- 0.81 ± 0.00
- 128.44 ± 2.94
- 0.39 ± 0.00
- 0.19 ± 0.00
- 0.25 ± 0.00
- -0.17 ± 0.00
- 0.28 ± 0.11
- 0.82 ± 0.00
- 0.82 ± 0.00
- 200.90 ± 7.40
-
-
- GPT-4
- 0.39 ± 0.00
- 0.21 ± 0.00
- 0.26 ± 0.00
- -0.10 ± 0.09
- 0.04 ± 0.00
- 0.83 ± 0.00
- 0.83 ± 0.71
- 24.48 ± 0.00
- 0.45 ± 0.00
- 0.20 ± 0.00
- 0.27 ± 0.00
- -0.17 ± 0.00
- 0.28 ± 0.00
- 0.80 ± 0.03
- 0.81 ± 0.00
- 20.40 ± 1.59
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.summarization %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.summarization %}
+ R1↑
+ R2↑
+ RL↑
+ SC↑
+ BS↑
+ Cv↑
+ De↑
+ Cp↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.summarization %}
+ {% assign R1_best = 0 %}
+ {% assign R2_best = 0 %}
+ {% assign RL_best = 0 %}
+ {% assign SC_best = -1 %}
+ {% assign BS_best = 0 %}
+ {% assign Cv_best = 0 %}
+ {% assign De_best = 0 %}
+ {% assign Cp_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].R1 and dataset[1][m].R1 > R1_best %}
+ {% assign R1_best = dataset[1][m].R1 %}
+ {% endif %}
+ {% if dataset[1][m].R2 and dataset[1][m].R2 > R2_best %}
+ {% assign R2_best = dataset[1][m].R2 %}
+ {% endif %}
+ {% if dataset[1][m].RL and dataset[1][m].RL > RL_best %}
+ {% assign RL_best = dataset[1][m].RL %}
+ {% endif %}
+ {% if dataset[1][m].SC and dataset[1][m].SC > SC_best %}
+ {% assign SC_best = dataset[1][m].SC %}
+ {% endif %}
+ {% if dataset[1][m].BS and dataset[1][m].BS > BS_best %}
+ {% assign BS_best = dataset[1][m].BS %}
+ {% endif %}
+ {% if dataset[1][m].Cv and dataset[1][m].Cv > Cv_best %}
+ {% assign Cv_best = dataset[1][m].Cv %}
+ {% endif %}
+ {% if dataset[1][m].De and dataset[1][m].De > De_best %}
+ {% assign De_best = dataset[1][m].De %}
+ {% endif %}
+ {% if dataset[1][m].Cp and dataset[1][m].Cp > Cp_best %}
+ {% assign Cp_best = dataset[1][m].Cp %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].R1 %}
+ {{ dataset[1][model].R1 | round: 2 }} ± {{ dataset[1][model].R1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].R2 %}
+ {{ dataset[1][model].R2 | round: 2 }} ± {{ dataset[1][model].R2_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].RL %}
+ {{ dataset[1][model].RL | round: 2 }} ± {{ dataset[1][model].RL_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SC %}
+ {{ dataset[1][model].SC | round: 2 }} ± {{ dataset[1][model].SC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].BS %}
+ {{ dataset[1][model].BS | round: 2 }} ± {{ dataset[1][model].BS_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cv %}
+ {{ dataset[1][model].Cv | round: 2 }} ± {{ dataset[1][model].Cv_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].De %}
+ {{ dataset[1][model].De | round: 2 }} ± {{ dataset[1][model].De_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cp %}
+ {{ dataset[1][model].Cp | round: 2 }} ± {{ dataset[1][model].Cp_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/robustness-aware/text-classification.md b/_pages/kr/robustness-aware/text-classification.md
index 3600278..1951d98 100644
--- a/_pages/kr/robustness-aware/text-classification.md
+++ b/_pages/kr/robustness-aware/text-classification.md
@@ -3,144 +3,96 @@ layout: default
permalink: /leaderboard/kr/robustness-aware/text-classification
---
# Robustness-Aware Text Classification Leaderboard
+{% assign lang = 'kr' %}
-
-
- Models
- UiT-VSMEC
- PhoATIS
-
-
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.25 ± 0.00
- 0.16 ± 0.00
- 0.56 ± 0.02
- 0.20 ± 0.00
- 0.33 ± 0.00
- 0.16 ± 0.02
- 0.26 ± 0.03
- 0.79 ± 0.00
- 0.79 ± 0.02
- 0.08 ± 0.06
-
-
- URA-LLaMa 13B
- 0.30 ± 0.00
- 0.11 ± 0.00
- 0.51 ± 0.01
- 0.26 ± 0.00
- 0.44 ± 0.00
- 0.01 ± 0.01
- 0.05 ± 0.01
- 0.47 ± 0.01
- 0.84 ± 0.01
- 0.00 ± 0.04
-
-
- URA-LLaMa 7B
- 0.29 ± 0.00
- 0.10 ± 0.00
- 0.57 ± 0.01
- 0.17 ± 0.00
- 0.30 ± 0.00
- 0.02 ± 0.01
- 0.04 ± 0.00
- 0.55 ± 0.01
- 0.18 ± 0.01
- 0.01 ± 0.02
-
-
- LLaMa-2 13B
- 0.19 ± 0.00
- 0.07 ± 0.00
- 0.52 ± 0.01
- 0.47 ± 0.00
- 0.43 ± 0.00
- 0.02 ± 0.00
- 0.06 ± 0.00
- 0.57 ± 0.01
- 0.91 ± 0.00
- 0.01 ± 0.00
-
-
- LLaMa-2 7B
- 0.17 ± 0.00
- 0.10 ± 0.00
- 0.55 ± 0.00
- 0.33 ± 0.00
- 0.29 ± 0.00
- 0.01 ± 0.01
- 0.00 ± 0.00
- 0.56 ± 0.00
- 0.69 ± 0.01
- 0.02 ± 0.02
-
-
- Vietcuna 7B
- 0.09 ± 0.00
- 0.09 ± 0.00
- 0.51 ± 0.01
- 0.91 ± 0.00
- 0.09 ± 0.00
- 0.02 ± 0.01
- 0.01 ± 0.00
- 0.55 ± 0.01
- 0.23 ± 0.01
- 0.02 ± 0.01
-
-
- MixSUra 8x7B
- 0.35 ± -
- 0.27 ± -
- 0.70 ± -
- 0.58 ± -
- 0.70 ± -
- 0.80 ± -
- 55 ± -
- 0.94 ± -
- 0.15 ± -
- 0.88 ± -
-
-
- GPT-3.5
- 0.42 ± 0.00
- 0.41 ± 0.00
- -
- 0.28 ± 0.00
- 0.30 ± 0.00
- 0.68 ± 0.02
- 0.64 ± 0.03
- -
- 0.62 ± 0.02
- 0.70 ± 0.05
-
-
- GPT-4
- 0.48 ± 0.00
- 0.45 ± 0.00
- -
- 0.33 ± 0.00
- 0.40 ± 0.00
- 0.86 ± 0.01
- 0.80 ± 0.02
- -
- 0.80 ± 0.01
- 0.91 ± 0.03
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.text_classification %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.text_classification %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.text_classification %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/robustness-aware/toxicity-detection.md b/_pages/kr/robustness-aware/toxicity-detection.md
index 275a770..1f72cd4 100644
--- a/_pages/kr/robustness-aware/toxicity-detection.md
+++ b/_pages/kr/robustness-aware/toxicity-detection.md
@@ -3,144 +3,96 @@ layout: default
permalink: /leaderboard/kr/robustness-aware/toxicity-detection
---
# Robustness-Aware Toxicity Detection Leaderboard
+{% assign lang = 'kr' %}
-
-
- Models
- UiT-ViCTSD
- UiT-ViHSD
-
-
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.32 ± 0.00
- 0.21 ± 0.00
- 0.72 ± 0.01
- 0.62 ± 0.00
- 0.33 ± 0.00
- 0.14 ± 0.00
- 0.12 ± 0.00
- 0.64 ± 0.02
- 0.61 ± 0.00
- 0.23 ± 0.00
-
-
- URA-LLaMa 13B
- 0.27 ± 0.00
- 0.26 ± 0.00
- 0.56 ± 0.00
- 0.56 ± 0.00
- 0.12 ± 0.00
- 0.18 ± 0.00
- 0.11 ± 0.00
- 0.57 ± 0.01
- 0.45 ± 0.00
- 0.20 ± 0.00
-
-
- URA-LLaMa 7B
- 0.22 ± 0.00
- 0.21 ± 0.00
- 0.63 ± 0.00
- 0.39 ± 0.00
- 0.36 ± 0.00
- 0.12 ± 0.00
- 0.07 ± 0.00
- 0.62 ± 0.00
- 0.38 ± 0.00
- 0.19 ± 0.00
-
-
- LLaMa-2 13B
- 0.12 ± 0.00
- 0.11 ± 0.00
- 0.56 ± 0.01
- 0.66 ± 0.00
- 0.12 ± 0.00
- 0.10 ± 0.00
- 0.07 ± 0.00
- 0.59 ± 0.01
- 0.62 ± 0.00
- 0.24 ± 0.00
-
-
- LLaMa-2 7B
- 0.04 ± 0.00
- 0.04 ± 0.00
- 0.62 ± 0.00
- 0.86 ± 0.00
- 0.02 ± 0.00
- 0.01 ± 0.00
- 0.00 ± 0.00
- 0.54 ± 0.00
- 0.79 ± 0.00
- 0.00 ± 0.00
-
-
- Vietcuna 7B
- 0.11 ± 0.00
- 0.11 ± 0.00
- 0.54 ± 0.00
- 0.39 ± 0.00
- 0.13 ± 0.00
- 0.09 ± 0.00
- 0.05 ± 0.00
- 0.5 ± 0.00
- 0.24 ± 0.00
- 0.08 ± 0.00
-
-
- MixSUra 8x7B
- 0.72 ± -
- 0.39 ± -
- - ± -
- 0.25 ± -
- 0.81 ± -
- 0.66 ± -
- 0.31 ± -
- 0.67 ± -
- 0.21 ± -
- 0.82 ± -
-
-
- GPT-3.5
- 0.51 ± 0.00
- 0.46 ± 0.00
- 0.5 ± 0.00
- 0.01 ± 0.00
- 0.54 ± 0.00
- 0.64 ± 0.00
- 0.47 ± 0.00
- - ± -
- 0.30 ± 0.00
- 0.63 ± 0.00
-
-
- GPT-4
- 0.88 ± 0.00
- 0.71 ± 0.00
- - ± -
- 0.38 ± 0.00
- 0.88 ± 0.00
- 0.78 ± 0.00
- 0.56 ± 0.00
- - ± -
- 0.44 ± 0.00
- 0.78 ± 0.00
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.toxicity_detection %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.toxicity_detection %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.toxicity_detection %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/robustness-aware/translation.md b/_pages/kr/robustness-aware/translation.md
index a710c7a..e116582 100644
--- a/_pages/kr/robustness-aware/translation.md
+++ b/_pages/kr/robustness-aware/translation.md
@@ -3,124 +3,84 @@ layout: default
permalink: /leaderboard/kr/robustness-aware/translation
---
# Robustness-Aware Translation Leaderboard
+{% assign lang = 'kr' %}
-
-
- Models
- PhoMT
- OPUS100
-
-
- (En → Vi)
- (Vi → En)
- (En → Vi)
- (Vi → En)
- (En → Vi)
- (Vi → En)
- (En → Vi)
- (Vi → En)
-
-
-
-
- URA-LLaMa 70B
- 0.25 ± 0.00
- 0.58 ± 0.00
- 0.11 ± 0.00
- 0.51 ± 0.00
- 0.05 ± 0.00
- 0.40 ± 0.01
- 0.06 ± 0.00
- 0.36 ± 0.00
-
-
- URA-LLaMa 13B
- 0.23 ± 0.00
- 0.55 ± 0.00
- 0.10 ± 0.00
- 0.50 ± 0.00
- 0.03 ± 0.00
- 0.38 ± 0.01
- 0.05 ± 0.00
- 0.38 ± 0.00
-
-
- URA-LLaMa 7B
- 0.15 ± 0.00
- 0.48 ± 0.00
- 0.06 ± 0.00
- 0.46 ± 0.00
- 0.02 ± 0.00
- 0.35 ± 0.00
- 0.03 ± 0.00
- 0.34 ± 0.01
-
-
- LLaMa-2 13B
- 0.20 ± 0.00
- 0.51 ± 0.00
- 0.07 ± 0.00
- 0.44 ± 0.00
- 0.03 ± 0.00
- 0.36 ± 0.01
- 0.04 ± 0.00
- 0.32 ± 0.00
-
-
- LLaMa-2 7B
- 0.13 ± 0.00
- 0.41 ± 0.00
- 0.05 ± 0.00
- 0.42 ± 0.00
- 0.02 ± 0.00
- 0.31 ± 0.00
- 0.03 ± 0.00
- 0.30 ± 0.00
-
-
- Vietcuna 7B
- 0.17 ± 0.00
- 0.43 ± 0.00
- 0.07 ± 0.01
- 0.41 ± 0.00
- 0.09 ± 0.01
- 0.38 ± 0.01
- 0.09 ± 0.01
- 0.33 ± 0.00
-
-
- MixSUra 8x7B
- 0.14 ± -
- 0.50 ± -
- 0.11 ± -
- 0.46 ± -
- 0.06 ± -
- 0.36 ± -
- 0.06 ± -
- 0.31 ± -
-
-
- GPT-3.5
- 0.31 ± 0.00
- 0.64 ± 0.00
- 0.17 ± 0.00
- 0.59 ± 0.00
- 0.15 ± 0.01
- 0.49 ± 0.01
- 0.21 ± 0.01
- 0.48 ± 0.00
-
-
- GPT-4
- 0.31 ± 0.00
- 0.65 ± 0.00
- 0.20 ± 0.00
- 0.62 ± 0.00
- 0.16 ± 0.01
- 0.50 ± 0.01
- 0.23 ± 0.01
- 0.51 ± 0.00
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.translation %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.translation %}
+ BLEU envi↑
+ BLEU vien↑
+ hLEPOR envi↑
+ hLEPOR vien↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.translation %}
+ {% assign bleu_envi_best = 0 %}
+ {% assign bleu_vien_best = 0 %}
+ {% assign hlepor_envi_best = 0 %}
+ {% assign hlepor_vien_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m]["BLEU envi"] and dataset[1][m]["BLEU envi"] > bleu_envi_best %}
+ {% assign bleu_envi_best = dataset[1][m]["BLEU envi"] %}
+ {% endif %}
+ {% if dataset[1][m]["BLEU vien"] and dataset[1][m]["BLEU vien"] > bleu_vien_best %}
+ {% assign bleu_vien_best = dataset[1][m]["BLEU vien"] %}
+ {% endif %}
+ {% if dataset[1][m]["hLEPOR envi"] and dataset[1][m]["hLEPOR envi"] > hlepor_envi_best %}
+ {% assign hlepor_envi_best = dataset[1][m]["hLEPOR envi"] %}
+ {% endif %}
+ {% if dataset[1][m]["hLEPOR vien"] and dataset[1][m]["hLEPOR vien"] > hlepor_vien_best %}
+ {% assign hlepor_vien_best = dataset[1][m]["hLEPOR vien"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model]["BLEU envi"] %}
+ {{ dataset[1][model]["BLEU envi"] | round: 2 }} ± {{ dataset[1][model]["BLEU envi_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["BLEU vien"] %}
+ {{ dataset[1][model]["BLEU vien"] | round: 2 }} ± {{ dataset[1][model]["BLEU vien_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["hLEPOR envi"] %}
+ {{ dataset[1][model]["hLEPOR envi"] | round: 2 }} ± {{ dataset[1][model]["hLEPOR envi_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["hLEPOR vien"] %}
+ {{ dataset[1][model]["hLEPOR vien"] | round: 2 }} ± {{ dataset[1][model]["hLEPOR vien_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/weaker-prompt/question-answering.md b/_pages/kr/weaker-prompt/question-answering.md
index 9e75486..f15a887 100644
--- a/_pages/kr/weaker-prompt/question-answering.md
+++ b/_pages/kr/weaker-prompt/question-answering.md
@@ -3,63 +3,60 @@ layout: default
permalink: /leaderboard/kr/weaker-prompt/question-answering
---
# Weak-Prompt Question Answering Leaderboard
+{% assign lang = 'kr' %}
-
-
- Models
- XQuAD
- MLQA
-
-
- EM↑
- F1↑
- EM↑
- F1↑
-
-
-
-
- URA-LLaMa 70B
- 0.21 ± 0.01
- 0.47 ± 0.01
- 0.14 ± 0.01
- 0.41 ± 0.00
-
-
- URA-LLaMa 13B
- 0.22 ± 0.01
- 0.43 ± 0.01
- 0.17 ± 0.01
- 0.40 ± 0.01
-
-
- URA-LLaMa 7B
- 0.13 ± 0.00
- 0.32 ± 0.00
- 0.10 ± 0.00
- 0.32 ± 0.00
-
-
- LLaMa-2 13B
- 0.04 ± 0.00
- 0.28 ± 0.00
- 0.04 ± 0.00
- 0.28 ± 0.00
-
-
- LLaMa-2 7B
- 0.06 ± 0.00
- 0.24 ± 0.00
- 0.05 ± 0.00
- 0.24 ± 0.00
-
-
- MixSUra 8x7b
- 0.13 ±-
- 0.38 ± -
- 0.09 ± -
- 0.36 ± -
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].weaker_prompt.question_answering %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].weaker_prompt.question_answering %}
+ EM↑
+ F1↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].weaker_prompt.question_answering %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/weaker-prompt/summarization.md b/_pages/kr/weaker-prompt/summarization.md
index 76b649e..c079f68 100644
--- a/_pages/kr/weaker-prompt/summarization.md
+++ b/_pages/kr/weaker-prompt/summarization.md
@@ -3,147 +3,132 @@ layout: default
permalink: /leaderboard/kr/weaker-prompt/summarization
---
# Weak-Prompt Summarization Leaderboard
+{% assign lang = 'kr' %}
-
-
-Models
-VietNews
-WikiLingua
-
-
-R1↑
-R2↑
-RL↑
-SC↑
-BS↑
-Cv↑
-De↑
-Cp↑
-R1↑
-R2↑
-RL↑
-SC↑
-BS↑
-Cv↑
-De↑
-Cp↑
-
-
-
-
-URA-LLaMa 70B
-0.49 ± 0.00
-0.23 ± 0.00
-0.31 ± 0.00
--0.08 ± 0.00
-0.05 ± 0.11
-0.89 ± 0.00
-8.90 ± 0.03
-18.48 ± 0.59
-0.47 ± 0.00
-0.20 ± 0.00
-0.29 ± 0.00
--0.16 ± 0.00
-0.19 ± 0.13
-0.86 ± 0.00
-6.83 ± 0.09
-25.30 ± 1.86
-
-
-URA-LLaMa 13B
-0.27 ± 0.00
-0.12 ± 0.00
-0.18 ± 0.00
--0.09 ± 0.00
-0.05 ± 0.11
-0.56 ± 0.00
-5.00 ± 0.04
-153.55 ± 0.99
-0.22 ± 0.00
-0.09 ± 0.00
-0.14 ± 0.00
--0.16 ± 0.00
-0.20 ± 0.007
-0.48 ± 0.00
-3.49 ± 0.04
-190.09 ± 4.92
-
-
-URA-LLaMa 7B
-0.45 ± 0.00
-0.21 ± 0.00
-0.29 ± 0.00
--0.08 ± 0.00
-0.03 ± 0.09
-0.91 ± 0.00
-9.43 ± 0.03
-6.42 ± 0.05
-0.42 ± 0.00
-0.18 ± 0.00
-0.27 ± 0.00
--0.16 ± 0.00
-0.07 ± 0.12
-0.89 ± 0.00
-7.58 ± 0.05
-7.14 ± 0.14
-
-
-LLaMa-2 13B
-0.45 ± 0.00
-0.22 ± 0.00
-0.29 ± 0.00
--0.09 ± 0.00
-0.00 ± 0.14
-0.92 ± 0.00
-9.49 ± 0.02
-8.46 ± 0.29
-0.47 ± 0.00
-0.22 ± 0.00
-0.29 ± 0.00
--0.16 ± 0.00
-0.34 ± 0.12
-0.92 ± 0.00
-9.39 ± 0.05
-17.94 ± 2.84
-
-
-LLaMa-2 7B
-0.36 ± 0.00
-0.17 ± 0.00
-0.23 ± 0.00
--0.09 ± 0.00
--0.15 ± 0.12
-0.69 ± 0.00
-6.35 ± 0.03
-7.59 ± 0.21
-0.45 ± 0.00
-0.20 ± 0.00
-0.27 ± 0.00
--0.16 ± 0.00
-0.36 ± 0.00
-0.83 ± 0.00
-7.71 ± 0.07
-12.39 ± 1.46
-
-
-MixSUra 8x7B
-0.44 ± -
-0.22 ± -
-0.29 ± -
-- ± -
-0.07 ± -
-0.97 ± -
-35.67 ± -
-9.43 ± -
-0.47 ± -
-0.22 ± -
-0.29 ± -
-- ± -
-0.19 ± -
-0.97 ± -
-28.97 ± -
-10.27 ± -
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].weaker_prompt.summarization %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].weaker_prompt.summarization %}
+ R1↑
+ R2↑
+ RL↑
+ SC↑
+ BS↑
+ Cv↑
+ De↑
+ Cp↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].weaker_prompt.summarization %}
+ {% assign R1_best = 0 %}
+ {% assign R2_best = 0 %}
+ {% assign RL_best = 0 %}
+ {% assign SC_best = -1 %}
+ {% assign BS_best = 0 %}
+ {% assign Cv_best = 0 %}
+ {% assign De_best = 0 %}
+ {% assign Cp_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].R1 and dataset[1][m].R1 > R1_best %}
+ {% assign R1_best = dataset[1][m].R1 %}
+ {% endif %}
+ {% if dataset[1][m].R2 and dataset[1][m].R2 > R2_best %}
+ {% assign R2_best = dataset[1][m].R2 %}
+ {% endif %}
+ {% if dataset[1][m].RL and dataset[1][m].RL > RL_best %}
+ {% assign RL_best = dataset[1][m].RL %}
+ {% endif %}
+ {% if dataset[1][m].SC and dataset[1][m].SC > SC_best %}
+ {% assign SC_best = dataset[1][m].SC %}
+ {% endif %}
+ {% if dataset[1][m].BS and dataset[1][m].BS > BS_best %}
+ {% assign BS_best = dataset[1][m].BS %}
+ {% endif %}
+ {% if dataset[1][m].Cv and dataset[1][m].Cv > Cv_best %}
+ {% assign Cv_best = dataset[1][m].Cv %}
+ {% endif %}
+ {% if dataset[1][m].De and dataset[1][m].De > De_best %}
+ {% assign De_best = dataset[1][m].De %}
+ {% endif %}
+ {% if dataset[1][m].Cp and dataset[1][m].Cp > Cp_best %}
+ {% assign Cp_best = dataset[1][m].Cp %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].R1 %}
+ {{ dataset[1][model].R1 | round: 2 }} ± {{ dataset[1][model].R1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].R2 %}
+ {{ dataset[1][model].R2 | round: 2 }} ± {{ dataset[1][model].R2_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].RL %}
+ {{ dataset[1][model].RL | round: 2 }} ± {{ dataset[1][model].RL_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SC %}
+ {{ dataset[1][model].SC | round: 2 }} ± {{ dataset[1][model].SC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].BS %}
+ {{ dataset[1][model].BS | round: 2 }} ± {{ dataset[1][model].BS_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cv %}
+ {{ dataset[1][model].Cv | round: 2 }} ± {{ dataset[1][model].Cv_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].De %}
+ {{ dataset[1][model].De | round: 2 }} ± {{ dataset[1][model].De_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cp %}
+ {{ dataset[1][model].Cp | round: 2 }} ± {{ dataset[1][model].Cp_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/zero-shot/information-retrieval.md b/_pages/kr/zero-shot/information-retrieval.md
index b43d715..1f21b31 100644
--- a/_pages/kr/zero-shot/information-retrieval.md
+++ b/_pages/kr/zero-shot/information-retrieval.md
@@ -3,113 +3,84 @@ layout: default
permalink: /leaderboard/kr/zero-shot/information-retrieval
---
# Zero-Shot Information Retrieval Leaderboard
+{% assign lang = 'kr' %}
-
-
- Models
- mMARCO
- mRobust04
-
-
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
-
-
-
-
- URA-LLaMa 70B
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- URA-LLaMa 13B
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- URA-LLaMa 7B
- 0.06 ± 0.00
- 0.14 ± 0.00
- 0.09 ± 0.00
- 0.21 ± 0.00
- -
- -
- -
- -
-
-
- LLaMa-2 13B
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- LLaMa-2 7B
- 0.06 ± 0.00
- 0.11 ± 0.00
- 0.08 ± 0.00
- 0.17 ± 0.00
- -
- -
- -
- -
-
-
- Vietcuna 7B
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- GPT-3.5
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- GPT-4
- -
- -
- -
- -
- -
- -
- -
- -
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.information_retrieval %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.information_retrieval %}
+ M@10↑
+ M@10B↑
+ N@10↑
+ N@10B↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.information_retrieval %}
+ {% assign M10_best = 0 %}
+ {% assign M10B_best = 0 %}
+ {% assign N10_best = 0 %}
+ {% assign N10B_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m]["M@10"] and dataset[1][m]["M@10"] > M10_best %}
+ {% assign M10_best = dataset[1][m]["M@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["M@10B"] and dataset[1][m]["M@10B"] > M10B_best %}
+ {% assign M10B_best = dataset[1][m]["M@10B"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10"] and dataset[1][m]["N@10"] > N10_best %}
+ {% assign N10_best = dataset[1][m]["N@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10B"] and dataset[1][m]["N@10B"] > N10B_best %}
+ {% assign N10B_best = dataset[1][m]["N@10B"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model]["M@10"] %}
+ {{ dataset[1][model]["M@10"] | round: 2 }} ± {{ dataset[1][model]["M@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["M@10B"] %}
+ {{ dataset[1][model]["M@10B"] | round: 2 }} ± {{ dataset[1][model]["M@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10"] %}
+ {{ dataset[1][model]["N@10"] | round: 2 }} ± {{ dataset[1][model]["N@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10B"] %}
+ {{ dataset[1][model]["N@10B"] | round: 2 }} ± {{ dataset[1][model]["N@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/zero-shot/knowledge.md b/_pages/kr/zero-shot/knowledge.md
index 10caf83..d54075f 100644
--- a/_pages/kr/zero-shot/knowledge.md
+++ b/_pages/kr/zero-shot/knowledge.md
@@ -2,105 +2,129 @@
layout: default
permalink: /leaderboard/kr/zero-shot/knowledge
---
-# Zero-Shot Knowledge Leaderboard
+# Zero-shot Knowledge Leaderboard
+{% assign lang = 'kr' %}
-
-
-Models
-ZaloE2E
-ViMMRC
-
-
-EM↑
-F1↑
-AC↑
-F1↑
-AR↑
-ECE↓
-A@10↑
-
-
-
-
-URA-LLaMa 70B
-0.28 ± 0.02
-0.44 ± 0.02
-0.80 ± 0.02
-0.80 ± 0.02
-0.85 ± 0.01
-0.10 ± 0.02
-0.96 ± 0.03
-
-
-URA-LLaMa 13B
-0.12 ± 0.01
-0.22 ± 0.01
-0.40 ± 0.02
-0.31 ± 0.02
-0.57 ± 0.02
-0.48 ± 0.02
-0.42 ± 0.08
-
-
-URA-LLaMa 7B
-0.09 ± 0.01
-0.20 ± 0.02
-0.30 ± 0.02
-0.10 ± 0.01
-0.56 ± 0.02
-0.27 ± 0.02
-0.56 ± 0.07
-
-
-LLaMa-2 13B
-0.06 ± 0.01
-0.10 ± 0.01
-0.52 ± 0.02
-0.41 ± 0.02
-0.64 ± 0.02
-0.33 ± 0.02
-0.73 ± 0.07
-
-
-LLaMa-2 7B
-0.03 ± 0.01
-0.07 ± 0.01
-0.37 ± 0.02
-0.25 ± 0.02
-0.51 ± 0.02
-0.35 ± 0.02
-0.29 ± 0.06
-
-
-Vietcuna 7B
-0.03 ± 0.01
-0.06 ± 0.01
-0.32 ± 0.02
-0.22 ± 0.02
-0.50 ± 0.00
-0.07 ± 0.02
-0.33 ± 0.07
-
-
-GPT-3.5
-0.37 ± 0.02
-0.56 ± 0.02
-0.90 ± 0.01
-0.72 ± 0.01
--
-0.65 ± 0.01
-0.90 ± 0.04
-
-
-GPT-4
-0.38 ± 0.02
-0.55 ± 0.02
-0.92 ± 0.01
-0.73 ± 0.06
--
-0.67 ± 0.01
-0.90 ± 0.04
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.knowledge %}
+ {% if dataset[1].num_fields == 2 %}
+
+ {{ dataset[0] }}
+
+ {% else %}
+
+ {{ dataset[0] }}
+
+ {% endif %}
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.knowledge %}
+ {% if dataset[1].num_fields == 2 %}
+ EM↑
+ F1↑
+ {% else %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endif %}
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.knowledge %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AC_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+ {% if dataset[1].num_fields == 2 %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% else %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endif %}
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/zero-shot/language-modeling.md b/_pages/kr/zero-shot/language-modeling.md
index 2c2a949..64e1df9 100644
--- a/_pages/kr/zero-shot/language-modeling.md
+++ b/_pages/kr/zero-shot/language-modeling.md
@@ -3,149 +3,108 @@ layout: default
permalink: /leaderboard/kr/zero-shot/language-modeling
---
# Zero-Shot Language Modeling Leaderboard
+{% assign lang = 'kr' %}
-
-
- Models
- MLQA-MLM
- VSEC
-
-
- EM↑
- CER↓
- WER↓
- CED↓
- WED↓
- PLX↓
- EM↑
- CER↓
- WER↓
- CED↓
- WED↓
- PLX↓
-
-
-
-
- URA-LLaMa 70B
- 0.00 ± 0.00
- 0.50 ± 0.01
- 0.64 ± 0.01
- 519.09 ± 10.96
- 115.82 ± 2.45
- 1.08 ± 0.01
- 0.00 ± 0.00
- 0.88 ± 0.00
- 1.01 ± 0.00
- 113.51 ± 0.57
- 29.91 ± 0.15
- 1.09 ± 0.00
-
-
- URA-LLaMa 13B
- 0.00 ± 0.00
- 0.67 ± 0.00
- 0.78 ± 0.00
- 697.85 ± 11.62
- 161.34 ± 2.64
- 1.16 ± 0.02
- 0.01 ± 0.00
- 0.42 ± 0.01
- 0.56 ± 0.01
- 54.88 ± 0.77
- 14.50 ± 0.19
- 1.26 ± 0.00
-
-
- URA-LLaMa 7B
- 0.00 ± 0.00
- 0.73 ± 0.00
- 0.88 ± 0.01
- 684.00 ± 13.18
- 166.87 ± 3.18
- 1.25 ± 0.01
- 0.01 ± 0.00
- 3.33 ± 0.04
- 3.14 ± 0.03
- 420.34 ± 5.66
- 85.79 ± 0.96
- 1.33 ± 0.00
-
-
- LLaMa-2 13B
- 0.00 ± 0.00
- 0.90 ± 0.00
- 1.00 ± 0.00
- 881.97 ± 11.23
- 208.52 ± 2.52
- 1.10 ± 0.01
- 0.00 ± 0.00
- 1.32 ± 0.01
- 1.40 ± 0.01
- 160.06 ± 1.16
- 38.12 ± 0.23
- 1.11 ± 0.00
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.95 ± 0.00
- 1.07 ± 0.01
- 860.42 ± 13.18
- 210.21 ± 3.18
- 1.25 ± 0.01
- 0.00 ± 0.00
- 1.54 ± 0.04
- 1.55 ± 0.03
- 171.28 ± 5.66
- 40.18 ± 0.96
- 1.14 ± 0.00
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 1.00 ± 0.00
- 1.00 ± 0.00
- 951.53 ± 12.37
- 208.57 ± 2.73
- 1.48 ± 0.01
- 0.01 ± 0.00
- 1.11 ± 0.01
- 1.20 ± 0.01
- 139.90 ± 1.39
- 33.94 ± 0.33
- 1.61 ± 0.00
-
-
- GPT-3.5
- 0.00 ± 0.00
- 0.34 ± 0.01
- 0.50 ± 0.01
- 422.30 ± 10.79
- 100.33 ± 2.44
- -
- 0.02 ± 0.00
- 0.16 ± 0.00
- 0.30 ± 0.00
- 12.63 ± 0.34
- 3.48 ± 0.09
- -
-
-
- GPT-4
- 0.04 ± 0.00
- 0.40 ± 0.01
- 0.45 ± 0.01
- 381.88 ± 10.26
- 93.34 ± 2.39
- -
- 0.60 ± 0.01
- 0.14 ± 0.00
- 0.26 ± 0.00
- 13.58 ± 0.45
- 3.67 ± 0.12
- -
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.language_modeling %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.language_modeling %}
+ EM↑
+ CER↓
+ WER↓
+ CED↓
+ WED↓
+ PLX↓
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.language_modeling %}
+ {% assign EM_best = 0 %}
+ {% assign CER_best = 1 %}
+ {% assign WER_best = 1 %}
+ {% assign CED_best = 10000 %}
+ {% assign WED_best = 10000 %}
+ {% assign PLX_best = 10000 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].CER and dataset[1][m].CER < CER_best %}
+ {% assign CER_best = dataset[1][m].CER %}
+ {% endif %}
+ {% if dataset[1][m].WER and dataset[1][m].WER < WER_best %}
+ {% assign WER_best = dataset[1][m].WER %}
+ {% endif %}
+ {% if dataset[1][m].CED and dataset[1][m].CED < CED_best %}
+ {% assign CED_best = dataset[1][m].CED %}
+ {% endif %}
+ {% if dataset[1][m].WED and dataset[1][m].WED < WED_best %}
+ {% assign WED_best = dataset[1][m].WED %}
+ {% endif %}
+ {% if dataset[1][m].PLX and dataset[1][m].PLX < PLX_best %}
+ {% assign PLX_best = dataset[1][m].PLX %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].CER %}
+ {{ dataset[1][model].CER | round: 2 }} ± {{ dataset[1][model].CER_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].WER %}
+ {{ dataset[1][model].WER | round: 2 }} ± {{ dataset[1][model].WER_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].CED %}
+ {{ dataset[1][model].CED | round: 2 }} ± {{ dataset[1][model].CED_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].WED %}
+ {{ dataset[1][model].WED | round: 2 }} ± {{ dataset[1][model].WED_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].PLX %}
+ {{ dataset[1][model].PLX | round: 2 }} ± {{ dataset[1][model].PLX_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/zero-shot/question-answering.md b/_pages/kr/zero-shot/question-answering.md
index cfe29fc..e4eb8c2 100644
--- a/_pages/kr/zero-shot/question-answering.md
+++ b/_pages/kr/zero-shot/question-answering.md
@@ -3,77 +3,60 @@ layout: default
permalink: /leaderboard/kr/zero-shot/question-answering
---
# Zero-Shot Question Answering Leaderboard
+{% assign lang = 'kr' %}
-
-
- Models
- XQuAD
- MLQA
-
-
- EM↑
- F1↑
- EM↑
- F1↑
-
-
-
-
- URA-LLaMa 70B
- 0.06 ± 0.00
- 0.30 ± 0.00
- 0.04 ± 0.00
- 0.28 ± 0.00
-
-
- URA-LLaMa 13B
- 0.00 ± 0.00
- 0.14 ± 0.00
- 0.00 ± 0.00
- 0.15 ± 0.00
-
-
- URA-LLaMa 7B
- 0.00 ± 0.00
- 0.14 ± 0.00
- 0.00 ± 0.00
- 0.16 ± 0.00
-
-
- LLaMa-2 13B
- 0.00 ± 0.00
- 0.04 ± 0.00
- 0.00 ± 0.02
- 0.05 ± 0.00
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.05 ± 0.00
- 0.00 ± 0.00
- 0.06 ± 0.00
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
-
-
- GPT-3.5
- 0.00 ± 0.00
- 0.24 ± 0.00
- 0.00 ± 0.00
- 0.25 ± 0.00
-
-
- GPT-4
- 0.00 ± 0.00
- 0.27 ± 0.00
- 0.00 ± 0.00
- 0.27 ± 0.00
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.question_answering %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.question_answering %}
+ EM↑
+ F1↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.question_answering %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/zero-shot/reasoning.md b/_pages/kr/zero-shot/reasoning.md
index 8dd405a..4eac335 100644
--- a/_pages/kr/zero-shot/reasoning.md
+++ b/_pages/kr/zero-shot/reasoning.md
@@ -3,123 +3,72 @@ layout: default
permalink: /leaderboard/kr/zero-shot/reasoning
---
# Zero-Shot Reasoning Leaderboard
+{% assign lang = 'kr' %}
- Models
- SR - Natural
- SR - Abstract symbol
- MATH
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.reasoning %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- EM↑
- F1↑
- Equ.↑
- EM↑
- F1↑
- Equ.↑
- EM↑
- F1↑
- Equ.↑
+ {% for dataset in site.data.leaderboard[lang].zero_shot.reasoning %}
+ EM↑
+ F1↑
+ Equ↑
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.06 ± 0.00
- 0.34 ± 0.00
- 0.06 ± 0.00
- 0.02 ± 0.00
- 0.24 ± 0.00
- 0.01 ± 0.00
- 0.00 ± 0.00
- 0.01 ± 0.00
- 0.24 ± 0.02
-
-
- URA-LLaMa 13B
- 0.01 ± 0.00
- 0.31 ± 0.00
- 0.02 ± 0.00
- 0.02 ± 0.00
- 0.24 ± 0.00
- 0.01 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.14 ± 0.02
-
-
- URA-LLaMa 7B
- 0.00 ± 0.00
- 0.26 ± 0.00
- 0.00 ± 0.00
- 0.01 ± 0.00
- 0.17 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.05 ± 0.01
-
-
- LLaMa-2 13B
- 0.00 ± 0.00
- 0.06 ± 0.00
- 0.00 ± 0.00
- 0.02 ± 0.00
- 0.19 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.16 ± 0.02
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.04 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.05 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.06 ± 0.01
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 0.04 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.10 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.01 ± 0.00
-
-
- GPT-3.5
- 0.21 ± 0.00
- 0.59 ± 0.00
- 0.32 ± 0.00
- 0.09 ± 0.00
- 0.28 ± 0.00
- 0.13 ± 0.00
- 0.00 ± 0.00
- 0.01 ± 0.00
- 0.72 ± 0.02
-
-
- GPT-4
- 0.21 ± 0.00
- 0.59 ± 0.00
- 0.32 ± 0.00
- 0.09 ± 0.00
- 0.28 ± 0.00
- 0.13 ± 0.00
- 0.00 ± 0.00
- 0.01 ± 0.00
- 0.76 ± 0.02
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.reasoning %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign Equ_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m]["Equ"] and dataset[1][m]["Equ"] > Equ_best %}
+ {% assign Equ_best = dataset[1][m]["Equ"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["Equ"] %}
+ {{ dataset[1][model]["Equ"] | round: 2 }} ± {{ dataset[1][model]["Equ_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/kr/zero-shot/sentiment-analysis.md b/_pages/kr/zero-shot/sentiment-analysis.md
index 99c521a..88dccd8 100644
--- a/_pages/kr/zero-shot/sentiment-analysis.md
+++ b/_pages/kr/zero-shot/sentiment-analysis.md
@@ -3,157 +3,96 @@ layout: default
permalink: /leaderboard/kr/zero-shot/sentiment-analysis
---
# Zero-Shot Sentiment Analysis Leaderboard
+{% assign lang = 'kr' %}
- Models
- VLSP 2016
- UiT-VSFC
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.sentiment_analysis %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
+ {% for dataset in site.data.leaderboard[lang].zero_shot.sentiment_analysis %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.63 ± 0.02
- 0.63 ± 0.02
- 0.74 ± 0.01
- 0.15 ± 0.01
- 0.87 ± 0.03
- 0.64 ± 0.01
- 0.54 ± 0.01
- 0.85 ± 0.01
- 0.14 ± 0.00
- 0.98 ± 0.01
-
-
- URA-LLaMa 13B
- 0.52 ± 0.02
- 0.35 ± 0.01
- 0.60 ± 0.01
- 0.10 ± 0.01
- 0.64 ± 0.05
- 0.70 ± 0.01
- 0.40 ± 0.01
- 0.72 ± 0.01
- 0.23 ± 0.01
- 0.95 ± 0.01
-
-
- URA-LLaMa 7B
- 0.35 ± 0.02
- 0.24 ± 0.01
- 0.54 ± 0.01
- 0.24 ± 0.01
- 0.31 ± 0.05
- 0.27 ± 0.01
- 0.18 ± 0.00
- 0.52 ± 0.01
- 0.37 ± 0.01
- 0.03 ± 0.01
-
-
- LLaMa-2 13B
- 0.25 ± 0.01
- 0.25 ± 0.01
- 0.49 ± 0.01
- 0.39 ± 0.01
- 0.29 ± 0.05
- 0.29 ± 0.01
- 0.24 ± 0.01
- 0.52 ± 0.01
- 0.42 ± 0.01
- 0.30 ± 0.03
-
-
- LLaMa-2 7B
- 0.15 ± 0.01
- 0.15 ± 0.01
- 0.58 ± 0.01
- 0.73 ± 0.01
- 0.12 ± 0.03
- 0.04 ± 0.00
- 0.06 ± 0.01
- 0.49 ± 0.01
- 0.79 ± 0.00
- 0.01 ± 0.01
-
-
- Vietcuna 7B
- 0.11 ± 0.01
- 0.12 ± 0.01
- 0.49 ± 0.01
- 0.68 ± 0.01
- 0.11 ± 0.03
- 0.05 ± 0.00
- 0.06 ± 0.00
- 0.56 ± 0.01
- 0.73 ± 0.00
- 0.05 ± 0.01
-
-
- MixSUra 8x7B
- 0.45 ± -
- 0.30 ± -
- 0.62 ± -
- 0.50 ± -
- 0.49 ± -
- 0.55 ± -
- 0.40 ± -
- 0.66 ± -
- 0.41 ± -
- 0.60 ± -
-
-
- Gemini Pro
- 0.64 ± -
- 0.47 ± -
- -
- 0.31 ± -
- 0.53 ± -
- 0.76 ± -
- 0.49 ± -
- -
- 0.43 ± -
- 0.77 ± -
-
-
- GPT-3.5
- 0.62 ± 0.02
- 0.56 ± 0.01
- -
- 0.29 ± 0.02
- 0.62 ± 0.05
- 0.81 ± 0.31
- 0.68 ± 0.31
- -
- 0.48 ± 0.01
- 0.83 ± 0.02
-
-
- GPT-4
- 0.71 ± 0.01
- 0.68 ± 0.01
- -
- 0.37 ± 0.01
- 0.70 ± 0.04
- 0.80 ± 0.01
- 0.67 ± 0.01
- -
- 0.47 ± 0.01
- 0.85 ± 0.02
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.sentiment_analysis %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/kr/zero-shot/summarization.md b/_pages/kr/zero-shot/summarization.md
index d878b05..25b89a1 100644
--- a/_pages/kr/zero-shot/summarization.md
+++ b/_pages/kr/zero-shot/summarization.md
@@ -3,185 +3,132 @@ layout: default
permalink: /leaderboard/kr/zero-shot/summarization
---
# Zero-Shot Summarization Leaderboard
+{% assign lang = 'kr' %}
-
-
-Models
-VietNews
-WikiLingua
-
-
-R1↑
-R2↑
-RL↑
-SC↑
-BS↑
-Cv↑
-De↑
-Cp↑
-R1↑
-R2↑
-RL↑
-SC↑
-BS↑
-Cv↑
-De↑
-Cp↑
-
-
-
-
-URA-LLaMa 70B
-0.42 ± 0.17
-0.21 ± 0.12
-0.28 ± 0.00
--0.11 ± 0.00
-0.03 ± 0.19
-0.85 ± 0.00
-14.59 ± 0.05
-17.21 ± 0.33
-0.37 ± 0.00
-0.16 ± 0.00
-0.24 ± 0.00
--0.22 ± 0.00
-0.26 ± 0.16
-0.17 ± 0.00
-0.22 ± 0.00
-22.24 ± 0.97
-
-
-URA-LLaMa 13B
-0.38 ± 0.00
-0.18 ± 0.00
-0.25 ± 0.00
--0.09 ± 0.00
-0.01 ± 0.18
-0.71 ± 0.00
-6.01 ± 0.07
-24.27 ± 0.61
-0.22 ± 0.00
-0.08 ± 0.00
-0.14 ± 0.00
--0.16 ± 0.00
--0.13 ± 0.12
-0.42 ± 0.01
-3.06 ± 0.10
-49.58 ± 1.16
-
-
-URA-LLaMa 7B
-0.38 ± 0.00
-0.14 ± 0.00
-0.25 ± 0.00
--0.09 ± 0.00
-0.04 ± 0.12
-0.65 ± 0.00
-4.88 ± 0.03
-7.77 ± 0.05
-0.40 ± 0.00
-0.15 ± 0.00
-0.26 ± 0.00
--0.16 ± 0.00
-0.19 ± 0.07
-0.73 ± 0.00
-4.79 ± 0.07
-6.22 ± 0.07
-
-
-LLaMa-2 13B
-0.06 ± 0.00
-0.02 ± 0.00
-0.04 ± 0.00
--0.09 ± 0.00
--0.18 ± 0.04
-0.07 ± 0.00
-0.43 ± 0.01
-28.25 ± 0.24
-0.04 ± 0.00
-0.00 ± 0.00
-0.03 ± 0.00
--0.16 ± 0.00
--0.11 ± 0.08
-0.03 ± 0.00
-0.07 ± 0.01
-19.55 ± 0.51
-
-
-LLaMa-2 7B
-0.06 ± 0.00
-0.01 ± 0.00
-0.05 ± 0.00
--0.09 ± 0.00
--0.23 ± 0.04
-0.06 ± 0.00
-0.21 ± 0.00
-15.75 ± 0.20
-0.04 ± 0.00
-0.00 ± 0.00
-0.03 ± 0.00
--0.16 ± 0.00
--0.14 ± 0.07
-0.03 ± 0.00
-0.06 ± 0.00
-17.84 ± 0.50
-
-
-Vietcuna 7B
-0.28 ± 0.00
-0.06 ± 0.00
-0.18 ± 0.00
--0.09 ± 0.00
--0.09 ± 0.09
-0.31 ± 0.00
-0.80 ± 0.01
-171.63 ± 1.71
-0.24 ± 0.00
-0.06 ± 0.00
-0.15 ± 0.00
--0.16 ± 0.00
--0.18 ± 0.07
-0.51 ± 0.01
-1.16 ± 0.01
-238.67 ± 3.37
-
-
-GPT-3.5
-0.36 ± 0.00
-0.20 ± 0.00
-0.24 ± 0.00
--0.09 ± 0.00
-0.04 ± 0.13
-0.86 ± 0.00
-3.97 ± 0.02
-13.32 ± 0.65
-0.43 ± 0.00
-0.21 ± 0.00
-0.27 ± 0.00
--0.16 ± 0.00
-0.22 ± 0.03
-0.87 ± 0.00
-3.29 ± 0.03
-35.50 ± 0.82
-
-
-GPT-4
-0.41 ± 0.00
-0.21 ± 0.00
-0.26 ± 0.00
--0.08 ± 0.00
--0.04 ± 0.11
-0.84 ± 0.00
-3.45 ± 0.00
-15.43 ± 0.49
-0.44 ± 0.00
-0.21 ± 0.00
-0.27 ± 0.00
--0.16 ± 0.00
-0.24 ± 0.04
-0.82 ± 0.00
-2.37 ± 0.01
-6.61 ± 0.16
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.summarization %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.summarization %}
+ R1↑
+ R2↑
+ RL↑
+ SC↑
+ BS↑
+ Cv↑
+ De↑
+ Cp↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.summarization %}
+ {% assign R1_best = 0 %}
+ {% assign R2_best = 0 %}
+ {% assign RL_best = 0 %}
+ {% assign SC_best = -1 %}
+ {% assign BS_best = 0 %}
+ {% assign Cv_best = 0 %}
+ {% assign De_best = 0 %}
+ {% assign Cp_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].R1 and dataset[1][m].R1 > R1_best %}
+ {% assign R1_best = dataset[1][m].R1 %}
+ {% endif %}
+ {% if dataset[1][m].R2 and dataset[1][m].R2 > R2_best %}
+ {% assign R2_best = dataset[1][m].R2 %}
+ {% endif %}
+ {% if dataset[1][m].RL and dataset[1][m].RL > RL_best %}
+ {% assign RL_best = dataset[1][m].RL %}
+ {% endif %}
+ {% if dataset[1][m].SC and dataset[1][m].SC > SC_best %}
+ {% assign SC_best = dataset[1][m].SC %}
+ {% endif %}
+ {% if dataset[1][m].BS and dataset[1][m].BS > BS_best %}
+ {% assign BS_best = dataset[1][m].BS %}
+ {% endif %}
+ {% if dataset[1][m].Cv and dataset[1][m].Cv > Cv_best %}
+ {% assign Cv_best = dataset[1][m].Cv %}
+ {% endif %}
+ {% if dataset[1][m].De and dataset[1][m].De > De_best %}
+ {% assign De_best = dataset[1][m].De %}
+ {% endif %}
+ {% if dataset[1][m].Cp and dataset[1][m].Cp > Cp_best %}
+ {% assign Cp_best = dataset[1][m].Cp %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].R1 %}
+ {{ dataset[1][model].R1 | round: 2 }} ± {{ dataset[1][model].R1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].R2 %}
+ {{ dataset[1][model].R2 | round: 2 }} ± {{ dataset[1][model].R2_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].RL %}
+ {{ dataset[1][model].RL | round: 2 }} ± {{ dataset[1][model].RL_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SC %}
+ {{ dataset[1][model].SC | round: 2 }} ± {{ dataset[1][model].SC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].BS %}
+ {{ dataset[1][model].BS | round: 2 }} ± {{ dataset[1][model].BS_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cv %}
+ {{ dataset[1][model].Cv | round: 2 }} ± {{ dataset[1][model].Cv_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].De %}
+ {{ dataset[1][model].De | round: 2 }} ± {{ dataset[1][model].De_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cp %}
+ {{ dataset[1][model].Cp | round: 2 }} ± {{ dataset[1][model].Cp_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/zero-shot/text-classification.md b/_pages/kr/zero-shot/text-classification.md
index 9f55058..c297565 100644
--- a/_pages/kr/zero-shot/text-classification.md
+++ b/_pages/kr/zero-shot/text-classification.md
@@ -3,131 +3,96 @@ layout: default
permalink: /leaderboard/kr/zero-shot/text-classification
---
# Zero-Shot Text Classification Leaderboard
+{% assign lang = 'kr' %}
-
-
- Models
- UiT-VSMEC
- PhoATIS
-
-
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.40 ± 0.02
- 0.32 ± 0.02
- 0.68 ± 0.01
- 0.14 ± 0.02
- 0.60 ± 0.06
- 0.56 ± 0.02
- 0.48 ± 0.03
- 0.85 ± 0.00
- 0.25 ± 0.02
- 0.56 ± 0.06
-
-
- URA-LLaMa 13B
- 0.29 ± 0.02
- 0.25 ± 0.02
- 0.52 ± 0.01
- 0.09 ± 0.01
- 0.23 ± 0.05
- 0.10 ± 0.01
- 0.10 ± 0.01
- 0.72 ± 0.00
- 0.52 ± 0.01
- 0.14 ± 0.04
-
-
- URA-LLaMa 7B
- 0.13 ± 0.01
- 0.11 ± 0.01
- 0.50 ± 0.01
- 0.15 ± 0.01
- 0.21 ± 0.05
- 0.04 ± 0.01
- 0.04 ± 0.02
- 0.77 ± 0.00
- 0.30 ± 0.01
- 0.04 ± 0.02
-
-
- LLaMa-2 13B
- 0.11 ± 0.01
- 0.10 ± 0.01
- 0.49 ± 0.01
- 0.31 ± 0.01
- 0.09 ± 0.04
- 0.03 ± 0.01
- 0.02 ± 0.00
- 0.45 ± 0.01
- 0.28 ± 0.01
- 0.03 ± 0.02
-
-
- LLaMa-2 7B
- 0.07 ± 0.01
- 0.08 ± 0.01
- 0.52 ± 0.01
- 0.35 ± 0.01
- 0.07 ± 0.03
- 0.00 ± 0.06
- 0.00 ± 0.06
- 0.61 ± 0.01
- 0.32 ± 0.00
- 0.00 ± 0.00
-
-
- Vietcuna 7B
- 0.05 ± 0.01
- 0.02 ± 0.01
- 0.52 ± 0.01
- 0.95 ± 0.01
- 0.03 ± 0.02
- 0.05 ± 0.01
- 0.01 ± 0.00
- 0.66 ± 0.00
- 0.20 ± 0.01
- 0.01 ± 0.21
-
-
- GPT-3.5
- 0.43 ± 0.02
- 0.37 ± 0.02
- -
- 0.29 ± 0.02
- 0.43 ± 0.06
- 0.44 ± 0.02
- 0.38 ± 0.03
- -
- 0.38 ± 0.02
- 0.44 ± 0.05
-
-
- GPT-4
- 0.49 ± 0.02
- 0.46 ± 0.02
- -
- 0.35 ± 0.02
- 0.50 ± 0.06
- 0.89 ± 0.01
- 0.69 ± 0.02
- -
- 0.83 ± 0.01
- 0.89 ± 0.03
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.text_classification %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.text_classification %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.text_classification %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/kr/zero-shot/toxicity-detection.md b/_pages/kr/zero-shot/toxicity-detection.md
index 184d581..52336ea 100644
--- a/_pages/kr/zero-shot/toxicity-detection.md
+++ b/_pages/kr/zero-shot/toxicity-detection.md
@@ -3,131 +3,96 @@ layout: default
permalink: /leaderboard/kr/zero-shot/toxicity-detection
---
# Zero-Shot Toxicity Detection Leaderboard
+{% assign lang = 'kr' %}
- Models
- UiT-ViCTSD
- UiT-ViHSD
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.toxicity_detection %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
+ {% for dataset in site.data.leaderboard[lang].zero_shot.toxicity_detection %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.61 ± 0.01
- 0.52 ± 0.01
- 0.77 ± 0.01
- 0.17 ± 0.01
- 0.97 ± 0.01
- 0.38 ± 0.01
- 0.34 ± 0.01
- 0.74 ± 0.01
- 0.25 ± 0.01
- 0.91 ± 0.01
-
-
- URA-LLaMa 13B
- 0.46 ± 0.01
- 0.28 ± 0.03
- 0.53 ± 0.02
- 0.22 ± 0.01
- 0.48 ± 0.03
- 0.33 ± 0.01
- 0.18 ± 0.00
- 0.60 ± 0.01
- 0.35 ± 0.01
- 0.54 ± 0.02
-
-
- URA-LLaMa 7B
- 0.25 ± 0.01
- 0.19 ± 0.01
- 0.53 ± 0.01
- 0.38 ± 0.01
- 0.13 ± 0.02
- 0.19 ± 0.00
- 0.13 ± 0.00
- 0.55 ± 0.01
- 0.46 ± 0.01
- 0.13 ± 0.01
-
-
- LLaMa-2 13B
- 0.16 ± 0.01
- 0.14 ± 0.00
- 0.40 ± 0.01
- 0.50 ± 0.01
- 0.24 ± 0.02
- 0.09 ± 0.00
- 0.13 ± 0.00
- 0.38 ± 0.01
- 0.63 ± 0.00
- 0.10 ± 0.01
-
-
- LLaMa-2 7B
- 0.13 ± 0.01
- 0.14 ± 0.01
- 0.45 ± 0.02
- 0.69 ± 0.01
- 0.09 ± 0.01
- 0.03 ± 0.00
- 0.05 ± 0.01
- 0.56 ± 0.01
- 0.75 ± 0.00
- 0.00 ± 0.00
-
-
- Vietcuna 7B
- 0.09 ± 0.00
- 0.07 ± 0.00
- 0.50 ± 0.00
- 0.41 ± 0.00
- 0.10 ± 0.03
- 0.07 ± 0.00
- 0.04 ± 0.00
- 0.50 ± 0.00
- 0.26 ± 0.00
- 0.07 ± 0.01
-
-
- GPT-3.5
- 0.75 ± 0.01
- 0.61 ± 0.02
- -
- 0.25 ± 0.01
- 0.80 ± 0.04
- 0.55 ± 0.01
- 0.42 ± 0.01
- -
- 0.22 ± 0.01
- 0.55 ± 0.02
-
-
- GPT-4
- 0.89 ± 0.01
- 0.69 ± 0.01
- -
- 0.39 ± 0.01
- 0.89 ± 0.03
- 0.75 ± 0.01
- 0.53 ± 0.01
- -
- 0.42 ± 0.01
- 0.75 ± 0.02
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.toxicity_detection %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/leaderboard.md b/_pages/leaderboard.md
index 7d2cc16..0d417e0 100644
--- a/_pages/leaderboard.md
+++ b/_pages/leaderboard.md
@@ -152,6 +152,7 @@ Below are our detail evaluation results, please choose the task and scenario to
Zero-shot
Few-shot
Weaker Prompt
+ Medium Prompt
Fairness Aware
Robustness Aware
Chain-of-Thought
diff --git a/_pages/vi/bias-toxicity/question-answering.md b/_pages/vi/bias-toxicity/question-answering.md
index f932c3f..f06e7c4 100644
--- a/_pages/vi/bias-toxicity/question-answering.md
+++ b/_pages/vi/bias-toxicity/question-answering.md
@@ -3,131 +3,96 @@ layout: default
permalink: /leaderboard/vi/bias-toxicity/question-answering
---
# Bias-Toxicity Question Answering Leaderboard
+{% assign lang = 'vi' %}
-
-
-Models
-XQuAD
-MLQA
-
-
-DRR→|
-DRG→|
-SAR→|
-SAG→|
-Tox↓
-DRR→|
-DRG→|
-SAR→|
-SAG→|
-Tox↓
-
-
-
-
-URA-LLaMa 70B
--
-0.39 ± 0.01
--
-0.41 ± 0.00
-0.02 ± 0.00
--
-0.14 ± 0.02
--
-0.42 ± 0.03
-0.02 ± 0.00
-
-
-URA-LLaMa 13B
--
-0.39 ± 0.01
--
-0.45 ± 0.01
-0.02 ± 0.00
--
-0.17 ± 0.1
--
-0.38 ± 0.00
-0.02 ± 0.00
-
-
-URA-LLaMa 7B
--
-0.43 ± 0.01
--
-0.48 ± 0.00
-0.03 ± 0.00
--
-0.18 ± 0.01
--
-0.37 ± 0.01
-0.02 ± 0.00
-
-
-LLaMa-2 13B
--
-0.35 ± 0.03
--
-0.46 ± 0.00
-0.01 ± 0.00
--
-0.27 ± 0.01
--
-0.43 ± 0.00
-0.01 ± 0.00
-
-
-LLaMa-2 7B
--
-0.46 ± 0.01
--
-0.42 ± 0.00
-0.01 ± 0.00
--
-0.21 ± 0.06
--
-0.45 ± 0.00
-0.01 ± 0.00
-
-
-Vietcuna 7B
--
-0.50 ± 0.00
--
--
-0.04 ± 0.00
--
-0.23 ± 0.09
--
-0.49 ± 0.01
-0.04 ± 0.00
-
-
-GPT-3.5
--
-0.43 ± 0.01
--
-0.48 ± 0.00
-0.02 ± 0.00
--
-0.18 ± 0.01
--
-0.40 ± 0.00
-0.02 ± 0.00
-
-
-GPT-4
--
-0.40 ± 0.01
--
-0.45 ± 0.00
-0.02 ± 0.00
--
-0.16 ± 0.01
--
-0.41 ± 0.01
-0.02 ± 0.00
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.question_answering %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.question_answering %}
+ DRR↓
+ DRG↓
+ SAR↓
+ SAG↓
+ Tox↓
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.question_answering %}
+ {% assign DRR_min = 1 %}
+ {% assign DRG_min = 1 %}
+ {% assign SAR_min = 1 %}
+ {% assign SAG_min = 1 %}
+ {% assign Tox_min = 1 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].DRR and dataset[1][m].DRR < DRR_min %}
+ {% assign DRR_min = dataset[1][m].DRR %}
+ {% endif %}
+ {% if dataset[1][m].DRG and dataset[1][m].DRG < DRG_min %}
+ {% assign DRG_min = dataset[1][m].DRG %}
+ {% endif %}
+ {% if dataset[1][m].SAR and dataset[1][m].SAR < SAR_min %}
+ {% assign SAR_min = dataset[1][m].SAR %}
+ {% endif %}
+ {% if dataset[1][m].SAG and dataset[1][m].SAG < SAG_min %}
+ {% assign SAG_min = dataset[1][m].SAG %}
+ {% endif %}
+ {% if dataset[1][m].Tox and dataset[1][m].Tox < Tox_min %}
+ {% assign Tox_min = dataset[1][m].Tox %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].DRR %}
+ {{ dataset[1][model].DRR | round: 2 }} ± {{ dataset[1][model].DRR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].DRG %}
+ {{ dataset[1][model].DRG | round: 2 }} ± {{ dataset[1][model].DRG_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SAR %}
+ {{ dataset[1][model].SAR | round: 2 }} ± {{ dataset[1][model].SAR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SAG %}
+ {{ dataset[1][model].SAG | round: 2 }} ± {{ dataset[1][model].SAG_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Tox %}
+ {{ dataset[1][model].Tox | round: 2 }} ± {{ dataset[1][model].Tox_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/bias-toxicity/summarization.md b/_pages/vi/bias-toxicity/summarization.md
index a185baf..f93dbf3 100644
--- a/_pages/vi/bias-toxicity/summarization.md
+++ b/_pages/vi/bias-toxicity/summarization.md
@@ -3,131 +3,96 @@ layout: default
permalink: /leaderboard/vi/bias-toxicity/summarization
---
# Bias-Toxicity Summarization Leaderboard
+{% assign lang = 'vi' %}
-
-
- Models
- VietNews
- WikiLingua
-
-
- DRR→|
- DRG→|
- SAR→|
- SAG→|
- Tox↓
- DRR→|
- DRG→|
- SAR→|
- SAG→|
- Tox↓
-
-
-
-
- URA-LLaMa 70B
- -
- 0.21 ± 0.01
- -
- 0.31 ± 0.01
- 0.05 ± 0.00
- -
- 0.03 ± 0.02
- -
- 0.25 ± 0.02
- 0.03 ± 0.00
-
-
- URA-LLaMa 13B
- -
- 0.20 ± 0.01
- -
- 0.29 ± 0.01
- 0.04 ± 0.00
- -
- 0.07 ± 0.04
- -
- 0.31 ± 0.03
- 0.02 ± 0.00
-
-
- URA-LLaMa 7B
- -
- 0.24 ± 0.02
- -
- 0.33 ± 0.01
- 0.04 ± 0.00
- -
- 0.07 ± 0.02
- -
- 0.38 ± 0.02
- 0.03 ± 0.00
-
-
- LLaMa-2 13B
- -
- 0.26 ± 0.01
- -
- 0.38 ± 0.01
- 0.01 ± 0.00
- -
- 0.17 ± 0.08
- -
- 0.50 ± 0.02
- 0.01 ± 0.00
-
-
- LLaMa-2 7B
- -
- 0.28 ± 0.02
- -
- 0.39 ± 0.01
- 0.01 ± 0.00
- -
- 0.39 ± 0.05
- -
- 0.50 ± 0.02
- 0.01 ± 0.00
-
-
- Vietcuna 7B
- -
- 0.21 ± 0.02
- -
- 0.32 ± 0.02
- 0.04 ± 0.00
- -
- 0.17 ± 0.04
- -
- 0.39 ± 0.03
- 0.03 ± 0.00
-
-
- GPT-3.5
- -
- 0.22 ± 0.01
- -
- 0.29 ± 0.01
- 0.04 ± 0.00
- -
- 0.03 ± 0.02
- -
- 0.28 ± 0.01
- 0.02 ± 0.00
-
-
- GPT-4
- -
- 0.19 ± 0.01
- -
- 0.28 ± 0.01
- 0.06 ± 0.00
- -
- 0.09 ± 0.02
- -
- 0.28 ± 0.01
- 0.02 ± 0.00
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.summarization %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.summarization %}
+ DRR↓
+ DRG↓
+ SAR↓
+ SAG↓
+ Tox↓
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.summarization %}
+ {% assign DRR_min = 1 %}
+ {% assign DRG_min = 1 %}
+ {% assign SAR_min = 1 %}
+ {% assign SAG_min = 1 %}
+ {% assign Tox_min = 1 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].DRR and dataset[1][m].DRR < DRR_min %}
+ {% assign DRR_min = dataset[1][m].DRR %}
+ {% endif %}
+ {% if dataset[1][m].DRG and dataset[1][m].DRG < DRG_min %}
+ {% assign DRG_min = dataset[1][m].DRG %}
+ {% endif %}
+ {% if dataset[1][m].SAR and dataset[1][m].SAR < SAR_min %}
+ {% assign SAR_min = dataset[1][m].SAR %}
+ {% endif %}
+ {% if dataset[1][m].SAG and dataset[1][m].SAG < SAG_min %}
+ {% assign SAG_min = dataset[1][m].SAG %}
+ {% endif %}
+ {% if dataset[1][m].Tox and dataset[1][m].Tox < Tox_min %}
+ {% assign Tox_min = dataset[1][m].Tox %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].DRR %}
+ {{ dataset[1][model].DRR | round: 2 }} ± {{ dataset[1][model].DRR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].DRG %}
+ {{ dataset[1][model].DRG | round: 2 }} ± {{ dataset[1][model].DRG_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SAR %}
+ {{ dataset[1][model].SAR | round: 2 }} ± {{ dataset[1][model].SAR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SAG %}
+ {{ dataset[1][model].SAG | round: 2 }} ± {{ dataset[1][model].SAG_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Tox %}
+ {{ dataset[1][model].Tox | round: 2 }} ± {{ dataset[1][model].Tox_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/bias-toxicity/translation.md b/_pages/vi/bias-toxicity/translation.md
index b1174ba..5700248 100644
--- a/_pages/vi/bias-toxicity/translation.md
+++ b/_pages/vi/bias-toxicity/translation.md
@@ -3,264 +3,94 @@ layout: default
permalink: /leaderboard/vi/bias-toxicity/translation
---
# Bias-Toxicity Translation Leaderboard
+{% assign lang = 'vi' %}
Models
- PhoMT (En - Vi)
- OPUS100 (En - Vi)
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.translation %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- DRR$→|
- DRG$→|
- SAR$→|
- SAG$→|
- Tox↓
- DRR$→|
- DRG$→|
- SAR$→|
- SAG$→|
- Tox↓
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.translation %}
+ DRR↓
+ DRG↓
+ SAR↓
+ SAG↓
+ Tox↓
+ {% endfor %}
-
- URA-LLaMa 70B
- -
- 0.03 ± 0.01
- -
- 0.30 ± 0.01
- 0.05 ± 0.00
- -
- 0.27 ± 0.01
- -
- 0.47 ± 0.01
- 0.06 ± 0.00
-
-
- URA-LLaMa 13B
- -
- 0.09 ± 0.00
- -
- 0.33 ± 0.01
- 0.05 ± 0.00
- -
- 0.27 ± 0.01
- -
- 0.43 ± 0.02
- 0.07 ± 0.00
-
-
- URA-LLaMa 7B
- -
- 0.13 ± 0.00
- -
- 0.33 ± 0.01
- 0.05 ± 0.00
- -
- 0.18 ± 0.03
- -
- 0.47 ± 0.01
- 0.07 ± 0.00
-
-
- LLaMa-2 13B
- -
- 0.08 ± 0.00
- -
- 0.33 ± 0.02
- 0.05 ± 0.00
- -
- 0.31 ± 0.02
- -
- 0.47 ± 0.01
- 0.06 ± 0.00
-
-
- LLaMa-2 7B
- -
- 0.17 ± 0.01
- -
- 0.29 ± 0.01
- 0.04 ± 0.00
- -
- 0.21 ± 0.02
- -
- 0.45 ± 0.02
- 0.05 ± 0.00
-
-
- Vietcuna 7B
- -
- 0.18 ± 0.01
- -
- 0.36 ± 0.01
- 0.04 ± 0.00
- -
- 0.16 ± 0.03
- -
- 0.43 ± 0.02
- 0.07 ± 0.00
-
-
- GPT-3.5
- -
- 0.11 ± 0.01
- -
- 0.34 ± 0.01
- 0.05 ± 0.00
- -
- 0.16 ± 0.03
- -
- 0.43 ± 0.03
- 0.07 ± 0.00
-
-
- GPT-4
- -
- 0.09 ± 0.01
- -
- 0.34 ± 0.01
- 0.05 ± 0.00
- -
- 0.14 ± 0.03
- -
- 0.41 ± 0.01
- 0.07 ± 0.00
-
-
-
----
-layout: default
-permalink: /leaderboard/vi/bias-toxicity/translation
----
-# Bias-Toxicity Translation Leaderboard
-
-
-
-
- Models
- PhoMT (En $\to$ Vi)
- OPUS100 (En $\to$ Vi)
-
-
- DRR$\to\mid$
- DRG$\to\mid$
- SAR$\to\mid$
- SAG$\to\mid$
- Tox↓
- DRR$\to\mid$
- DRG$\to\mid$
- SAR$\to\mid$
- SAG$\to\mid$
- Tox↓
-
-
-
-
- URA-LLaMa 70B
- -
- 0.03 ± 0.01
- -
- 0.30 ± 0.01
- 0.05 ± 0.00
- -
- 0.27 ± 0.01
- -
- 0.47 ± 0.01
- 0.06 ± 0.00
-
-
- URA-LLaMa 13B
- -
- 0.09 ± 0.00
- -
- 0.33 ± 0.01
- 0.05 ± 0.00
- -
- 0.27 ± 0.01
- -
- 0.43 ± 0.02
- 0.07 ± 0.00
-
-
- URA-LLaMa 7B
- -
- 0.13 ± 0.00
- -
- 0.33 ± 0.01
- 0.05 ± 0.00
- -
- 0.18 ± 0.03
- -
- 0.47 ± 0.01
- 0.07 ± 0.00
-
-
- LLaMa-2 13B
- -
- 0.08 ± 0.00
- -
- 0.33 ± 0.02
- 0.05 ± 0.00
- -
- 0.31 ± 0.02
- -
- 0.47 ± 0.01
- 0.06 ± 0.00
-
-
- LLaMa-2 7B
- -
- 0.17 ± 0.01
- -
- 0.29 ± 0.01
- 0.04 ± 0.00
- -
- 0.21 ± 0.02
- -
- 0.45 ± 0.02
- 0.05 ± 0.00
-
-
- Vietcuna 7B
- -
- 0.18 ± 0.01
- -
- 0.36 ± 0.01
- 0.04 ± 0.00
- -
- 0.16 ± 0.03
- -
- 0.43 ± 0.02
- 0.07 ± 0.00
-
-
- GPT-3.5
- -
- 0.11 ± 0.01
- -
- 0.34 ± 0.01
- 0.05 ± 0.00
- -
- 0.16 ± 0.03
- -
- 0.43 ± 0.03
- 0.07 ± 0.00
-
-
- GPT-4
- -
- 0.09 ± 0.01
- -
- 0.34 ± 0.01
- 0.05 ± 0.00
- -
- 0.14 ± 0.03
- -
- 0.41 ± 0.01
- 0.07 ± 0.00
-
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].bias_toxicity.translation %}
+ {% assign DRR_min = 1 %}
+ {% assign DRG_min = 1 %}
+ {% assign SAR_min = 1 %}
+ {% assign SAG_min = 1 %}
+ {% assign Tox_min = 1 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].DRR and dataset[1][m].DRR < DRR_min %}
+ {% assign DRR_min = dataset[1][m].DRR %}
+ {% endif %}
+ {% if dataset[1][m].DRG and dataset[1][m].DRG < DRG_min %}
+ {% assign DRG_min = dataset[1][m].DRG %}
+ {% endif %}
+ {% if dataset[1][m].SAR and dataset[1][m].SAR < SAR_min %}
+ {% assign SAR_min = dataset[1][m].SAR %}
+ {% endif %}
+ {% if dataset[1][m].SAG and dataset[1][m].SAG < SAG_min %}
+ {% assign SAG_min = dataset[1][m].SAG %}
+ {% endif %}
+ {% if dataset[1][m].Tox and dataset[1][m].Tox < Tox_min %}
+ {% assign Tox_min = dataset[1][m].Tox %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].DRR %}
+ {{ dataset[1][model].DRR | round: 2 }} ± {{ dataset[1][model].DRR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].DRG %}
+ {{ dataset[1][model].DRG | round: 2 }} ± {{ dataset[1][model].DRG_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SAR %}
+ {{ dataset[1][model].SAR | round: 2 }} ± {{ dataset[1][model].SAR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SAG %}
+ {{ dataset[1][model].SAG | round: 2 }} ± {{ dataset[1][model].SAG_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Tox %}
+ {{ dataset[1][model].Tox | round: 2 }} ± {{ dataset[1][model].Tox_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/vi/chain-of-thought/reasoning.md b/_pages/vi/chain-of-thought/reasoning.md
index 6d62bc7..5c07ce8 100644
--- a/_pages/vi/chain-of-thought/reasoning.md
+++ b/_pages/vi/chain-of-thought/reasoning.md
@@ -3,73 +3,72 @@ layout: default
permalink: /leaderboard/vi/chain-of-thought/reasoning
---
# Chain-Of-Thought Reasoning Leaderboard
+{% assign lang = 'vi' %}
- Models
- Metrics
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].chain_of_thought.reasoning %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- EM ↑
- F1 ↑
- Equ. ↑
+ {% for dataset in site.data.leaderboard[lang].chain_of_thought.reasoning %}
+ EM↑
+ F1↑
+ Equ.↑
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.00 ± 0.00
- 0.12 ± 0.01
- 0.18 ± 0.02
-
-
- URA-LLaMa 13B
- 0.00 ± 0.00
- 0.23 ± 0.01
- 0.17 ± 0.01
-
-
- URA-LLaMa 7B
- 0.00 ± 0.00
- 0.23 ± 0.01
- 0.09 ± 0.01
-
-
- LLaMa-2 13B
- 0.00 ± 0.00
- 0.12 ± 0.01
- 0.18 ± 0.02
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.10 ± 0.00
- 0.12 ± 0.02
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 0.13 ± 0.01
- 0.10 ± 0.01
-
-
- MixSUra 8x7B
- 0.00 ± 0.00
- 0.17 ± 0.01
- 0.33 ± 0.00
-
-
- GPT-3.5
- 0.00 ± 0.00
- 0.32 ± 0.01
- 0.78 ± 0.02
-
-
- GPT-4
- 0.00 ± 0.00
- 0.32 ± 0.01
- 0.79 ± 0.02
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].chain_of_thought.reasoning %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign Equ_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].Equ and dataset[1][m].Equ > Equ_best %}
+ {% assign Equ_best = dataset[1][m].Equ %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Equ %}
+ {{ dataset[1][model].Equ | round: 2 }} ± {{ dataset[1][model].Equ_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/vi/fairness-aware/information-retrieval.md b/_pages/vi/fairness-aware/information-retrieval.md
index 91ad6c4..6eb7f91 100644
--- a/_pages/vi/fairness-aware/information-retrieval.md
+++ b/_pages/vi/fairness-aware/information-retrieval.md
@@ -3,113 +3,84 @@ layout: default
permalink: /leaderboard/vi/fairness-aware/information-retrieval
---
# Fairness-Aware Information Retrieval Leaderboard
+{% assign lang = 'vi' %}
- Models
- mMARCO
- mRobust04
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.information_retrieval %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.information_retrieval %}
+ M@10↑
+ M@10B↑
+ N@10↑
+ N@10B↑
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
-
-
-
-
-
-
-
-
-
-
- URA-LLaMa 13B
-
-
-
-
-
-
-
-
-
-
- URA-LLaMa 7B
- 0.10 ± 0.00
- 0.10 ± 0.00
- 0.14 ± 0.00
- 0.14 ± 0.00
- 0.01 ± 0.00
- 0.01 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
-
-
- LLaMa-2 13B
-
-
-
-
-
-
-
-
-
-
- LLaMa-2 7B
- 0.05 ± 0.00
- 0.10 ± 0.00
- 0.07 ± 0.00
- 0.16 ± 0.00
- -
- -
- -
- -
-
-
- Vietcuna 7B
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- GPT-3.5
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- GPT-4
- -
- -
- -
- -
- -
- -
- -
- -
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.information_retrieval %}
+ {% assign M10_best = 0 %}
+ {% assign M10B_best = 0 %}
+ {% assign N10_best = 0 %}
+ {% assign N10B_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m]["M@10"] and dataset[1][m]["M@10"] > M10_best %}
+ {% assign M10_best = dataset[1][m]["M@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["M@10B"] and dataset[1][m]["M@10B"] > M10B_best %}
+ {% assign M10B_best = dataset[1][m]["M@10B"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10"] and dataset[1][m]["N@10"] > N10_best %}
+ {% assign N10_best = dataset[1][m]["N@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10B"] and dataset[1][m]["N@10B"] > N10B_best %}
+ {% assign N10B_best = dataset[1][m]["N@10B"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model]["M@10"] %}
+ {{ dataset[1][model]["M@10"] | round: 2 }} ± {{ dataset[1][model]["M@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["M@10B"] %}
+ {{ dataset[1][model]["M@10B"] | round: 2 }} ± {{ dataset[1][model]["M@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10"] %}
+ {{ dataset[1][model]["N@10"] | round: 2 }} ± {{ dataset[1][model]["N@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10B"] %}
+ {{ dataset[1][model]["N@10B"] | round: 2 }} ± {{ dataset[1][model]["N@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/vi/fairness-aware/language-modeling.md b/_pages/vi/fairness-aware/language-modeling.md
index c8ceba9..97227d5 100644
--- a/_pages/vi/fairness-aware/language-modeling.md
+++ b/_pages/vi/fairness-aware/language-modeling.md
@@ -3,164 +3,108 @@ layout: default
permalink: /leaderboard/vi/fairness-aware/language-modeling
---
# Fairness-Aware Language Modeling Leaderboard
+{% assign lang = 'vi' %}
- Models
- MLQA-MLM
- VSEC
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.language_modeling %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- EM↑
- CER↓
- WER↓
- CED↓
- WED↓
- PLX↓
- EM↑
- CER↓
- WER↓
- CED↓
- WED↓
- PLX↓
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.language_modeling %}
+ EM↑
+ CER↓
+ WER↓
+ CED↓
+ WED↓
+ PLX↓
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.01 ± 0.00
- 0.58 ± 0.01
- 0.70 ± 0.01
- 653.57 ± 12.05
- 150.64 ± 2.73
- 1.25 ± 0.06
- 0.30 ± 0.00
- 0.11 ± 0.00
- 0.14 ± 0.00
- 15.19 ± 0.42
- 4.12 ± 0.11
- 1.13 ± 0.00
-
-
- URA-LLaMa 13B
- 0.02 ± 0.00
- 0.40 ± 0.01
- 0.56 ± 0.01
- 518.38 ± 11.19
- 125.24 ± 2.66
- 1.48 ± 0.11
- 0.32 ± 0.00
- 0.07 ± 0.00
- 0.21 ± 0.00
- 2.98 ± 0.11
- 1.24 ± 0.03
- 1.15 ± 0.00
-
-
- URA-LLaMa 7B
- 0.01 ± 0.00
- 0.40 ± 0.01
- 0.55 ± 0.01
- 492.93 ± 11.32
- 117.82 ± 2.72
- 1.22 ± 0.01
- 0.20 ± 0.00
- 0.54 ± 0.01
- 0.67 ± 0.01
- 41.77 ± 1.57
- 10.12 ± 0.35
- 1.07 ± 0.00
-
-
- LLaMa-2 13B
- 0.01 ± 0.00
- 0.76 ± 0.00
- 0.89 ± 0.00
- 782.03 ± 11.71
- 192.66 ± 2.83
- 1.27 ± 0.04
- 0.15 ± 0.00
- 0.07 ± 0.00
- 0.22 ± 0.00
- 3.39 ± 0.16
- 1.52 ± 0.04
- 1.01 ± 0.00
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.79 ± 0.00
- 0.96 ± 0.00
- 761.38 ± 10.65
- 197.18 ± 2.66
- 1.75 ± 0.20
- 0.12 ± 0.00
- 0.35 ± 0.01
- 0.48 ± 0.01
- 47.54 ± 0.85
- 11.82 ± 0.19
- 1.06 ± 0.00
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 1.04 ± 0.00
- 1.06 ± 0.00
- 940.71 ± 12.48
- 208.05 ± 2.81
- 1.40 ± 0.00
- 0,06 ± 0.00
- 4.78 ± 0.06
- 4.80 ± 0.06
- 634.48 ± 8.58
- 145.12 ± 1.94
- 1.46 ± 0.01
-
-
- MixSUra 8x7B
- 0.00 ± -
- 0.56 ± -
- 0.63 ± -
- 535.76 ± -
- 133.64 ± -
- 1.00 ± -
- 0,07 ± -
- 0.20 ± -
- 0.29 ± -
- 25.96 ± -
- 8.79 ± -
- 1.00 ± -
-
-
- GPT-3.5
- 0.03 ± 0.00
- 0.29 ± 0.01
- 0.46 ± 0.01
- 398.19 ± 11.01
- 96.42 ± 2.54
- -
- 0.59 ± 0.00
- 0.06 ± 0.00
- 0.19 ± 0.00
- 1.99 ± 0.08
- 0.74 ± 0.02
- -
-
-
- GPT-4
- 0.06 ± 0.00
- 0.36 ± 0.01
- 0.41 ± 0.01
- 347.82 ± 10.23
- 86.96 ± 2.41
- -
- 0.67 ± 0.00
- 0.01 ± 0.00
- 0.02 ± 0.00
- 1.30 ± 0.04
- 0.54 ± 0.01
- -
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.language_modeling %}
+ {% assign EM_best = 0 %}
+ {% assign CER_best = 1 %}
+ {% assign WER_best = 1 %}
+ {% assign CED_best = 10000 %}
+ {% assign WED_best = 10000 %}
+ {% assign PLX_best = 10000 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].CER and dataset[1][m].CER < CER_best %}
+ {% assign CER_best = dataset[1][m].CER %}
+ {% endif %}
+ {% if dataset[1][m].WER and dataset[1][m].WER < WER_best %}
+ {% assign WER_best = dataset[1][m].WER %}
+ {% endif %}
+ {% if dataset[1][m].CED and dataset[1][m].CED < CED_best %}
+ {% assign CED_best = dataset[1][m].CED %}
+ {% endif %}
+ {% if dataset[1][m].WED and dataset[1][m].WED < WED_best %}
+ {% assign WED_best = dataset[1][m].WED %}
+ {% endif %}
+ {% if dataset[1][m].PLX and dataset[1][m].PLX < PLX_best %}
+ {% assign PLX_best = dataset[1][m].PLX %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].CER %}
+ {{ dataset[1][model].CER | round: 2 }} ± {{ dataset[1][model].CER_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].WER %}
+ {{ dataset[1][model].WER | round: 2 }} ± {{ dataset[1][model].WER_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].CED %}
+ {{ dataset[1][model].CED | round: 2 }} ± {{ dataset[1][model].CED_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].WED %}
+ {{ dataset[1][model].WED | round: 2 }} ± {{ dataset[1][model].WED_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].PLX %}
+ {{ dataset[1][model].PLX | round: 2 }} ± {{ dataset[1][model].PLX_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/vi/fairness-aware/question-answering.md b/_pages/vi/fairness-aware/question-answering.md
index f2ffc91..1c8836f 100644
--- a/_pages/vi/fairness-aware/question-answering.md
+++ b/_pages/vi/fairness-aware/question-answering.md
@@ -3,77 +3,60 @@ layout: default
permalink: /leaderboard/vi/fairness-aware/question-answering
---
# Fairness-Aware Question Answering Leaderboard
+{% assign lang = 'vi' %}
-
-
- Models
- XQuAD
- MLQA
-
-
- Exact Match↑
- F1↑
- Exact Match↑
- F1↑
-
-
-
-
- URA-LLaMa 70B
- 0.04 ± 0.00
- 0.27 ± 0.00
- 0.03 ± 0.00
- 0.25 ± 0.00
-
-
- URA-LLaMa 13B
- 0.00 ± 0.00
- 0.13 ± 0.00
- 0.00 ± 0.00
- 0.14 ± 0.00
-
-
- URA-LLaMa 7B
- 0.00 ± 0.00
- 0.13 ± 0.00
- 0.00 ± 0.00
- 0.15 ± 0.01
-
-
- LLaMa-2 13B
- 0.00 ± 0.00
- 0.03 ± 0.00
- 0.00 ± 0.00
- 0.04 ± 0.00
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.04 ± 0.00
- 0.00 ± 0.00
- 0.05 ± 0.00
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
-
-
- GPT-3.5
- 0.00 ± 0.00
- 0.24 ± 0.00
- 0.00 ± 0.00
- 0.23 ± 0.00
-
-
- GPT-4
- 0.00 ± 0.00
- 0.26 ± 0.00
- 0.00 ± 0.00
- 0.24 ± 0.00
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.question_answering %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.question_answering %}
+ EM↑
+ F1↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.question_answering %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/fairness-aware/sentiment-analysis.md b/_pages/vi/fairness-aware/sentiment-analysis.md
index f5a7b0b..017da03 100644
--- a/_pages/vi/fairness-aware/sentiment-analysis.md
+++ b/_pages/vi/fairness-aware/sentiment-analysis.md
@@ -3,157 +3,96 @@ layout: default
permalink: /leaderboard/vi/fairness-aware/sentiment-analysis
---
# Fairness-Aware Sentiment Analysis Leaderboard
+{% assign lang = 'vi' %}
-
-
- Models
- VLSP 2016
- UiT-VSFC
-
-
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.65 ± 0.01
- 0.49 ± 0.01
- 0.58 ± 0.01
- 0.13 ± 0.01
- 0.77 ± 0.04
- 0.76 ± 0.01
- 0.48 ± 0.01
- 0.61 ± 0.01
- 0.17 ± 0.01
- 0.66 ± 0.03
-
-
- URA-LLaMa 13B
- 0.59 ± 0.01
- 0.57 ± 0.01
- 0.62 ± 0.01
- 0.07 ± 0.01
- 0.83 ± 0.04
- 0.75 ± 0.01
- 0.46 ± 0.08
- 0.83 ± 0.01
- 0.11 ± 0.01
- 0.88 ± 0.02
-
-
- URA-LLaMa 7B
- 0.74 ± 0.02
- 0.39 ± 0.06
- 0.83 ± 0.01
- 0.21 ± 0.02
- 0.98 ± 0.02
- 0.73 ± 0.01
- 0.73 ± 0.01
- 0.78 ± 0.01
- 0.13 ± 0.01
- 0.94 ± 0.01
-
-
- LLaMa-2 13B
- 0.51 ± 0.01
- 0.1 ± 0.06
- 0.56 ± 0.01
- 0.32 ± 0.02
- 0.79 ± 0.04
- 0.63 ± 0.01
- 0.41 ± 0.02
- 0.70 ± 0.01
- 0.13 ± 0.01
- 0.89 ± 0.02
-
-
- LLaMa-2 7B
- 0.45 ± 0.02
- 0.34 ± 0.01
- 0.53 ± 0.01
- 0.26 ± 0.02
- 0.50 ± 0.0
- 0.51 ± 0.01
- 0.55 ± 0.01
- 0.68 ± 0.01
- 0.22 ± 0.01
- 0.64 ± 0.03
-
-
- Vietcuna 7B
- 0.04 ± 0.01
- 0.04 ± 0.01
- 0.49 ± 0.01
- 0.71 ± 0.01
- 0.05 ± 0.02
- 0.03 ± 0.00
- 0.03 ± 0.00
- 0.55 ± 0.01
- 0.50 ± 0.00
- 0.01 ± 0.01
-
-
- MixSUra 8x7B
- 0.62 ± -
- 0.62 ± -
- 0.59 ± -
- 0.30 ± -
- 0.59 ± -
- 0.74 ± -
- 0.46 ± -
- 0.61 ± -
- 0.24 ± -
- 0.66 ± -
-
-
- Gemini Pro
- 0.67 ± -
- 0.50 ± -
- -
- 0.34 ± -
- 0.59 ± -
- 0.79 ± -
- 0.50 ± -
- -
- 0.46 ± -
- 0.82 ± -
-
-
- GPT-3.5
- 0.66 ± 0.01
- 0.60 ± 0.01
- -
- 0.33 ± 0.01
- 0.52 ± 0.05
- 0.86 ± 0.01
- 0.71 ± 0.01
- -
- 0.52 ± 0.01
- 0.86 ± 0.02
-
-
- GPT-4
- 0.75 ± 0.01
- 0.74 ± 0.01
- -
- 0.41 ± 0.00
- 0.73 ± 0.04
- 0.85 ± 0.01
- 0.71 ± 0.01
- -
- 0.52 ± 0.01
- 0.87 ± 0.02
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.sentiment_analysis %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.sentiment_analysis %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.sentiment_analysis %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/fairness-aware/text-classification.md b/_pages/vi/fairness-aware/text-classification.md
index ea021e3..1743b71 100644
--- a/_pages/vi/fairness-aware/text-classification.md
+++ b/_pages/vi/fairness-aware/text-classification.md
@@ -3,157 +3,96 @@ layout: default
permalink: /leaderboard/vi/fairness-aware/text-classification
---
# Fairness-Aware Text Classification Leaderboard
+{% assign lang = 'vi' %}
- Models
- UiT-VSMEC
- PhoATIS
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.text_classification %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.text_classification %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.24 ± 0.02
- 0.14 ± 0.01
- 0.58 ± 0.01
- 0.26 ± 0.02
- 0.37 ± 0.06
- 0.15 ± 0.01
- 0.22 ± 0.03
- 0.31 ± 0.00
- 0.81 ± 0.01
- 0.13 ± 0.04
-
-
- URA-LLaMa 13B
- 0.31 ± 0.02
- 0.11 ± 0.01
- 0.58 ± 0.01
- 0.23 ± 0.02
- 0.57 ± 0.06
- 0.01 ± 0.01
- 0.05 ± 0.02
- 0.58 ± 0.00
- 0.84 ± 0.01
- 0.00 ± 0.01
-
-
- URA-LLaMa 7B
- 0.29 ± 0.02
- 0.11 ± 0.01
- 0.60 ± 0.01
- 0.12 ± 0.02
- 0.41 ± 0.06
- 0.00 ± 0.01
- 0.00 ± 0.00
- 0.55 ± 0.00
- 0.30 ± 0.01
- 0.01 ± 0.03
-
-
- LLaMa-2 13B
- 0.18 ± 0.02
- 0.08 ± 0.01
- 0.55 ± 0.01
- 0.45 ± 0.01
- 0.44 ± 0.06
- 0.02 ± 0.01
- 0.01 ± 0.02
- 0.57 ± 0.01
- 0.90 ± 0.01
- 0.01 ± 0.01
-
-
- LLaMa-2 7B
- 0.25 ± 0.02
- 0.11 ± 0.01
- 0.57 ± 0.01
- 0.22 ± 0.02
- 0.53 ± 0.06
- 0.02 ± 0.00
- 0.06 ± 0.01
- 0.57 ± 0.01
- 0.68 ± 0.01
- 0.01 ± 0.01
-
-
- Vietcuna 7B
- 0.15 ± 0.01
- 0.05 ± 0.01
- 0.46 ± 0.01
- 0.85 ± 0.01
- 0.16 ± 0.04
- 0.04 ± 0.01
- 0.01 ± 0.00
- 0.77 ± 0.01
- 0.21 ± 0.01
- 0.07 ± 0.03
-
-
- MixSUra 8x7B
- 0.40 ± -
- 0.36 ± -
- 0.72 ± -
- 0.53 ± -
- 0.79 ± -
- 0.81 ± -
- 0.58 ± -
- 0.96 ± -
- 0.14 ± -
- 0.91 ± -
-
-
- Gemini Pro
- 0.48 ± -
- 0.38 ± -
- -
- 0.34 ± -
- 0.43 ± -
- 0.79 ± -
- 0.67 ± -
- -
- 0.73 ± -
- 0.68 ± -
-
-
- GPT-3.5
- 0.44 ± 0.02
- 0.42 ± 0.02
- -
- 0.30 ± 0.02
- 0.36 ± 0.06
- 0.68 ± 0.02
- 0.66 ± 0.03
- -
- 0.62 ± 0.02
- 0.67 ± 0.05
-
-
- GPT-4
- 0.49 ± 0.02
- 0.47 ± 0.02
- -
- 0.35 ± 0.02
- 0.36 ± 0.06
- 0.83 ± 0.01
- 0.76 ± 0.03
- -
- 0.77 ± 0.01
- 0.87 ± 0.04
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.text_classification %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/vi/fairness-aware/toxicity-detection.md b/_pages/vi/fairness-aware/toxicity-detection.md
index 8b89bfd..7a1706f 100644
--- a/_pages/vi/fairness-aware/toxicity-detection.md
+++ b/_pages/vi/fairness-aware/toxicity-detection.md
@@ -3,157 +3,96 @@ layout: default
permalink: /leaderboard/vi/fairness-aware/toxicity-detection
---
# Fairness-Aware Toxicity Detection Leaderboard
+{% assign lang = 'vi' %}
-
-
- Models
- UiT-ViCTSD
- UiT-ViHSD
-
-
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.41 ± 0.02
- 0.26 ± 0.01
- 0.75 ± 0.01
- 0.53 ± 0.01
- 0.33 ± 0.05
- 0.15 ± 0.00
- 0.40 ± 0.00
- 0.64 ± 0.01
- 0.58 ± 0.00
- 0.24 ± 0.02
-
-
- URA-LLaMa 13B
- 0.43 ± 0.02
- 0.29 ± 0.07
- 0.66 ± 0.01
- 0.36 ± 0.02
- 0.42 ± 0.05
- 0.24 ± 0.01
- 0.15 ± 0.00
- 0.61 ± 0.01
- 0.43 ± 0.01
- 0.21 ± 0.02
-
-
- URA-LLaMa 7B
- 0.42 ± 0.02
- 0.39 ± 0.01
- 0.60 ± 0.01
- 0.30 ± 0.01
- 0.66 ± 0.05
- 0.16 ± 0.00
- 0.10 ± 0.00
- 0.67 ± 0.01
- 0.33 ± 0.00
- 0.28 ± 0.02
-
-
- LLaMa-2 13B
- 0.27 ± 0.01
- 0.18 ± 0.01
- 0.67 ± 0.01
- 0.53 ± 0.01
- 0.57 ± 0.05
- 0.16 ± 0.00
- 0.10 ± 0.00
- 0.62 ± 0.01
- 0.59 ± 0.00
- 0.42 ± 0.02
-
-
- LLaMa-2 7B
- 0.15 ± 0.01
- 0.11 ± 0.01
- 0.62 ± 0.01
- 0.67 ± 0.01
- 0.07 ± 0.03
- 0.01 ± 0.00
- 0.01 ± 0.00
- 0.56 ± 0.01
- 0.71 ± 0.00
- 0.01 ± 0.00
-
-
- Vietcuna 7B
- 0.08 ± 0.01
- 0.09 ± 0.01
- 0.50 ± 0.01
- 0.42 ± 0.01
- 0.06 ± 0.03
- 0.62 ± 0.01
- 0.21 ± 0.00
- 0.50 ± 0.00
- 0.29 ± 0.01
- 0.62 ± 0.02
-
-
- MixSUra 8x7B
- 0.69 ± -
- 0.38 ± -
- - ± -
- 0.29 ± -
- 0.78 ± -
- 0.56 ± -
- 0.31 ± -
- 0.68 ± -
- 0.32 ± -
- 0.92 ± -
-
-
- Gemini Pro
- 0.81 ± -
- 0.43 ± -
- - ± -
- 0.31 ± -
- 0.82 ± -
- 0.70 ± -
- 0.37 ± -
- - ± -
- 0.36 ± -
- 0.69 ± -
-
-
- GPT-3.5
- 0.60 ± 0.02
- 0.52 ± 0.02
- - ± -
- 0.11 ± 0.02
- 0.63 ± 0.05
- 0.61 ± 0.01
- 0.46 ± 0.01
- - ± -
- 0.29 ± 0.01
- 0.62 ± 0.02
-
-
- GPT-4
- 0.87 ± 0.01
- 0.69 ± 0.02
- - ± -
- 0.37 ± 0.01
- 0.86 ± 0.03
- 0.76 ± 0.01
- 0.56 ± 0.01
- - ± -
- 0.43 ± 0.01
- 0.76 ± 0.02
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.toxicity_detection %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.toxicity_detection %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].fairness_aware.toxicity_detection %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/few-shot/information-retrieval.md b/_pages/vi/few-shot/information-retrieval.md
index 25c1d2c..f82fbbd 100644
--- a/_pages/vi/few-shot/information-retrieval.md
+++ b/_pages/vi/few-shot/information-retrieval.md
@@ -3,124 +3,84 @@ layout: default
permalink: /leaderboard/vi/few-shot/information-retrieval
---
# Few-Shot Information Retrieval Leaderboard
+{% assign lang = 'vi' %}
-
-
- Models
- mMARCO
- mRobust04
-
-
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
-
-
-
-
- URA-LLaMa 70B
- 0.05 ± 0.00
- 0.11 ± 0.00
- 0.06 ± 0.00
- 0.14 ± 0.00
- 0.04±0.00
- 0.04±0.00
- 0.03±0.00
- 0.04±0.00
-
-
- URA-LLaMa 13B
- 0.04 ± 0.00
- 0.10 ± 0.00
- 0.06 ± 0.00
- 0.14 ± 0.00
- 0.03±0.00
- 0.05±0.00
- 0.04±0.00
- 0.04±0.00
-
-
- URA-LLaMa 7B
- 0.04 ± 0.00
- 0.11 ± 0.00
- 0.06 ± 0.00
- 0.16 ± 0.00
- 0.03 ± 0.00
- 0.03 ± 0.00
- 0.02 ± 0.00
- 0.02 ± 0.00
-
-
- LLaMa-2 13B
- 0.07 ± 0.00
- 0.15 ± 0.00
- 0.09 ± 0.00
- 0.21 ± 0.00
- 0.05±0.00
- 0.04±0.00
- 0.04±0.00
- 0.04±0.00
-
-
- LLaMa-2 7B
- 0.05 ± 0.00
- 0.11 ± 0.00
- 0.07 ± 0.00
- 0.16 ± 0.00
- 0.02±0.00
- 0.03±0.00
- 0.03±0.00
- 0.02±0.00
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00±0.00
- 0.00±0.00
- 0.00±0.00
- 0.00±0.00
-
-
- MixSUra 8x7B
- 0.01 ± -
- 0.07 ± -
- 0.04 ± -
- 0.11 ± -
- 0.04±-
- 0.04±-
- 0.02±-
- 0.02±-
-
-
- GPT-3.5
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- GPT-4
- -
- -
- -
- -
- -
- -
- -
- -
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.information_retrieval %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.information_retrieval %}
+ M@10↑
+ M@10B↑
+ N@10↑
+ N@10B↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.information_retrieval %}
+ {% assign M10_best = 0 %}
+ {% assign M10B_best = 0 %}
+ {% assign N10_best = 0 %}
+ {% assign N10B_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m]["M@10"] and dataset[1][m]["M@10"] > M10_best %}
+ {% assign M10_best = dataset[1][m]["M@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["M@10B"] and dataset[1][m]["M@10B"] > M10B_best %}
+ {% assign M10B_best = dataset[1][m]["M@10B"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10"] and dataset[1][m]["N@10"] > N10_best %}
+ {% assign N10_best = dataset[1][m]["N@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10B"] and dataset[1][m]["N@10B"] > N10B_best %}
+ {% assign N10B_best = dataset[1][m]["N@10B"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model]["M@10"] %}
+ {{ dataset[1][model]["M@10"] | round: 2 }} ± {{ dataset[1][model]["M@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["M@10B"] %}
+ {{ dataset[1][model]["M@10B"] | round: 2 }} ± {{ dataset[1][model]["M@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10"] %}
+ {{ dataset[1][model]["N@10"] | round: 2 }} ± {{ dataset[1][model]["N@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10B"] %}
+ {{ dataset[1][model]["N@10B"] | round: 2 }} ± {{ dataset[1][model]["N@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/few-shot/knowledge.md b/_pages/vi/few-shot/knowledge.md
index db04348..2c1b6e9 100644
--- a/_pages/vi/few-shot/knowledge.md
+++ b/_pages/vi/few-shot/knowledge.md
@@ -2,115 +2,129 @@
layout: default
permalink: /leaderboard/vi/few-shot/knowledge
---
-# Few-Shot Knowledge Leaderboard
+# Few-shot Knowledge Leaderboard
+{% assign lang = 'vi' %}
-
-
-Models
-ZaloE2E
-ViMMRC
-
-
-EM↑
-F1↑
-AC↑
-F1↑
-AR↑
-ECE↓
-A@10↑
-
-
-
-
-URA-LLaMa 70B
-0.34 ± 0.02
-0.50 ± 0.02
-0.78 ± 0.02
-0.63 ± 0.03
-0.90 ± 0.01
-0.13 ± 0.02
-0.96 ± 0.03
-
-
-URA-LLaMa 13B
-0.26 ± 0.02
-0.40 ± 0.02
-0.62 ± 0.02
-0.50 ± 0.02
-0.69 ± 0.02
-0.18 ± 0.02
-0.65 ± 0.07
-
-
-URA-LLaMa 7B
-0.14 ± 0.02
-0.25 ± 0.02
-0.42 ± 0.02
-0.33 ± 0.02
-0.61 ± 0.02
-0.13 ± 0.02
-0.39 ± 0.07
-
-
-LLaMa-2 13B
-0.22 ± 0.02
-0.36 ± 0.02
-0.58 ± 0.02
-0.46 ± 0.02
-0.62 ± 0.02
-0.28 ± 0.02
-0.77 ± 0.06
-
-
-LLaMa-2 7B
-0.07 ± 0.01
-0.15 ± 0.01
-0.30 ± 0.02
-0.23 ± 0.02
-0.56 ± 0.02
-0.43 ± 0.02
-0.16 ± 0.05
-
-
-Vietcuna 7B
-0.07 ± 0.01
-0.19 ± 0.01
-0.31 ± 0.02
-0.18 ± 0.01
-0.50 ± 0.00
-0.06 ± 0.02
-0.31 ± 0.06
-
-
-MixSUra 8x7B
-0.19 ± -
-0.34 ± -
-0.65 ± -
-0.64 ± -
-0.54 ± -
-0.29 ± -
-0.65 ± -
-
-
-GPT-3.5
-0.49 ± 0.02
-0.64 ± 0.02
-0.90 ± 0.01
-0.73 ± 0.03
--
-0.66 ± 0.01
-0.91 ± 0.04
-
-
-GPT-4
-0.49 ± 0.02
-0.64 ± 0.02
-0.91 ± 0.01
-0.73 ± 0.04
--
-0.66 ± 0.01
-0.91 ± 0.04
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.knowledge %}
+ {% if dataset[1].num_fields == 2 %}
+
+ {{ dataset[0] }}
+
+ {% else %}
+
+ {{ dataset[0] }}
+
+ {% endif %}
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.knowledge %}
+ {% if dataset[1].num_fields == 2 %}
+ EM↑
+ F1↑
+ {% else %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endif %}
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.knowledge %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AC_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+ {% if dataset[1].num_fields == 2 %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% else %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endif %}
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/few-shot/language-modeling.md b/_pages/vi/few-shot/language-modeling.md
index 85c1105..f2b6953 100644
--- a/_pages/vi/few-shot/language-modeling.md
+++ b/_pages/vi/few-shot/language-modeling.md
@@ -3,164 +3,108 @@ layout: default
permalink: /leaderboard/vi/few-shot/language-modeling
---
# Few-Shot Language Modeling Leaderboard
+{% assign lang = 'vi' %}
- Models
- MLQA-MLM
- VSEC
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.language_modeling %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- EM↑
- CER↓
- WER↓
- CED↓
- WED↓
- PLX↓
- EM↑
- CER↓
- WER↓
- CED↓
- WED↓
- PLX↓
+ {% for dataset in site.data.leaderboard[lang].few_shot.language_modeling %}
+ EM↑
+ CER↓
+ WER↓
+ CED↓
+ WED↓
+ PLX↓
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.01 ± 0.00
- 0.54 ± 0.00
- 0.66 ± 0.00
- 669.74 ± 10.38
- 153.04 ± 2.33
- 1.32 ± 0.05
- 0.33 ± 0.00
- 0.11 ± 0.00
- 0.13 ± 0.00
- 15.09 ± 0.42
- 4.05 ± 0.11
- 1.13 ± 0.00
-
-
- URA-LLaMa 13B
- 0.01 ± 0.00
- 0.45 ± 0.01
- 0.61 ± 0.01
- 559.64 ± 11.23
- 136.97 ± 2.68
- 1.49 ± 0.10
- 0.35 ± 0.00
- 0.02 ± 0.00
- 0.04 ± 0.00
- 2.81 ± 0.12
- 1.18 ± 0.03
- 1.15 ± 0.00
-
-
- URA-LLaMa 7B
- 0.01 ± 0.00
- 0.40 ± 0.01
- 0.55 ± 0.01
- 498.36 ± 11.01
- 118.11 ± 2.58
- 1.24 ± 0.01
- 0.22 ± 0.00
- 0.32 ± 0.01
- 0.33 ± 0.01
- 41.89 ± 1.54
- 10.10 ± 0.34
- 1.07 ± 0.00
-
-
- LLaMa-2 13B
- 0.01 ± 0.00
- 0.74 ± 0.00
- 0.87 ± 0.00
- 760.98 ± 11.91
- 186.90 ± 2.85
- 1.24 ± 0.03
- 0.16 ± 0.00
- 0.03 ± 0.00
- 0.05 ± 0.00
- 3.38 ± 0.16
- 1.51 ± 0.04
- 1.01 ± 0.00
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.81 ± 0.00
- 0.98 ± 0.00
- 769.36 ± 10.51
- 198.53 ± 2.57
- 1.74 ± 0.19
- 0.12 ± 0.00
- 0.36 ± 0.01
- 0.39 ± 0.01
- 47.50 ± 0.86
- 11.80 ± 0.19
- 1.06 ± 0.00
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 1.04 ± 0.00
- 1.06 ± 0.00
- 935.65 ± 12.47
- 204.98 ± 2.79
- 1.40 ± 0.00
- 0.00 ± 0.00
- 8.00 ± 0.07
- 8.01 ± 0.07
- 1063.93 ± 7.64
- 241.74 ± 1.74
- 1.46 ± 0.00
-
-
- MixSUra 8x7B
- 0.00 ± -
- 0.55 ± -
- 0.63 ± -
- 526.79 ± -
- 131.02 ± -
- 1.00 ± -
- 0.08 ± -
- 0.19 ± -
- 0.28 ± -
- 25.13 ± -
- 8.58 ± -
- 1.00 ± -
-
-
- GPT-3.5
- 0.04 ± 0.00
- 0.28 ± 0.01
- 0.44 ± 0.01
- 387.37 ± 10.86
- 92.78 ± 2.46
- -
- 0.66 ± 0.00
- 0.01 ± 0.00
- 0.02 ± 0.00
- 1.63 ± 0.08
- 0.61 ± 0.02
- -
-
-
- GPT-4
- 0.08 ± 0.00
- 0.23 ± 0.01
- 0.40 ± 0.01
- 336.53 ± 10.18
- 83.55 ± 2.34
- -
- 0.75 ± 0.00
- 0.01 ± 0.00
- 0.01 ± 0.00
- 0.89 ± 0.04
- 0.37 ± 0.01
- -
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.language_modeling %}
+ {% assign EM_best = 0 %}
+ {% assign CER_best = 1 %}
+ {% assign WER_best = 1 %}
+ {% assign CED_best = 10000 %}
+ {% assign WED_best = 10000 %}
+ {% assign PLX_best = 10000 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].CER and dataset[1][m].CER < CER_best %}
+ {% assign CER_best = dataset[1][m].CER %}
+ {% endif %}
+ {% if dataset[1][m].WER and dataset[1][m].WER < WER_best %}
+ {% assign WER_best = dataset[1][m].WER %}
+ {% endif %}
+ {% if dataset[1][m].CED and dataset[1][m].CED < CED_best %}
+ {% assign CED_best = dataset[1][m].CED %}
+ {% endif %}
+ {% if dataset[1][m].WED and dataset[1][m].WED < WED_best %}
+ {% assign WED_best = dataset[1][m].WED %}
+ {% endif %}
+ {% if dataset[1][m].PLX and dataset[1][m].PLX < PLX_best %}
+ {% assign PLX_best = dataset[1][m].PLX %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].CER %}
+ {{ dataset[1][model].CER | round: 2 }} ± {{ dataset[1][model].CER_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].WER %}
+ {{ dataset[1][model].WER | round: 2 }} ± {{ dataset[1][model].WER_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].CED %}
+ {{ dataset[1][model].CED | round: 2 }} ± {{ dataset[1][model].CED_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].WED %}
+ {{ dataset[1][model].WED | round: 2 }} ± {{ dataset[1][model].WED_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].PLX %}
+ {{ dataset[1][model].PLX | round: 2 }} ± {{ dataset[1][model].PLX_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/vi/few-shot/reasoning.md b/_pages/vi/few-shot/reasoning.md
index 47b2923..f5428de 100644
--- a/_pages/vi/few-shot/reasoning.md
+++ b/_pages/vi/few-shot/reasoning.md
@@ -3,135 +3,72 @@ layout: default
permalink: /leaderboard/vi/few-shot/reasoning
---
# Few-Shot Reasoning Leaderboard
+{% assign lang = 'vi' %}
-
-
- Models
- SR - Natural
- SR - Abstract symbol
- MATH
-
-
- EM↑
- F1↑
- Equ.↑
- EM↑
- F1↑
- Equ.↑
- EM↑
- F1↑
- Equ.↑
-
-
-
-
- URA-LLaMa 70B
- 0.14 ± 0.00
- 0.48 ± 0.00
- 0.15 ± 0.00
- 0.27 ± 0.00
- 0.85 ± 0.00
- 0.30 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.12 ± 0.02
-
-
- URA-LLaMa 13B
- 0.08 ± 0.00
- 0.42 ± 0.00
- 0.08 ± 0.00
- 0.20 ± 0.00
- 0.70 ± 0.00
- 0.17 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.01
-
-
- URA-LLaMa 7B
- 0.04 ± 0.00
- 0.38 ± 0.00
- 0.04 ± 0.00
- 0.11 ± 0.00
- 0.61 ± 0.00
- 0.10 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.07 ± 0.01
-
-
- LLaMa-2 13B
- 0.03 ± 0.00
- 0.24 ± 0.00
- 0.04 ± 0.00
- 0.19 ± 0.00
- 0.69 ± 0.00
- 0.18 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.16 ± 0.02
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.01 ± 0.00
- 0.00 ± 0.00
- 0.06 ± 0.00
- 0.44 ± 0.00
- 0.06 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.11 ± 0.01
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.14 ± 0.00
- 0.71 ± 0.00
- 0.10 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.01 ± 0.00
-
-
- MixSUra 8x7B
- 0.07 ± 0.00
- 0.41 ± 0.00
- 0.07 ± 0.00
- 0.22 ± 0.00
- 0.78 ± 0.00
- 0.23 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
-
-
- GPT-3.5
- 0.15 ± 0.00
- 0.50 ± 0.00
- 0.16 ± 0.00
- 0.26 ± 0.00
- 0.83 ± 0.00
- 0.29 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.62 ± 0.02
-
-
- GPT-4
- 0.37 ± 0.00
- 0.74 ± 0.00
- 0.42 ± 0.00
- 0.37 ± 0.00
- 0.87 ± 0.00
- 0.44 ± 0.00
- 0.00 ± 0.00
- 0.01 ± 0.00
- 0.65 ± 0.02
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.reasoning %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.reasoning %}
+ EM↑
+ F1↑
+ Equ↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.reasoning %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign Equ_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m]["Equ"] and dataset[1][m]["Equ"] > Equ_best %}
+ {% assign Equ_best = dataset[1][m]["Equ"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["Equ"] %}
+ {{ dataset[1][model]["Equ"] | round: 2 }} ± {{ dataset[1][model]["Equ_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/few-shot/sentiment-analysis.md b/_pages/vi/few-shot/sentiment-analysis.md
index 7c77504..0608261 100644
--- a/_pages/vi/few-shot/sentiment-analysis.md
+++ b/_pages/vi/few-shot/sentiment-analysis.md
@@ -1,146 +1,98 @@
---
layout: default
-permalink: /leaderboard/vi/few-shot/sentiment-analysis
+permalink: /leaderboard/vi/few-shot/sentiment-analysis
---
# Few-Shot Sentiment Analysis Leaderboard
+{% assign lang = 'vi' %}
-
-
- Models
- VLSP 2016
- UiT-VSFC
-
-
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.66 ± 0.01
- 0.49 ± 0.01
- 0.72 ± 0.01
- 0.13 ± 0.01
- 0.77 ± 0.04
- 0.75 ± 0.01
- 0.48 ± 0.01
- 0.81 ± 0.01
- 0.16 ± 0.01
- 0.71 ± 0.02
-
-
- URA-LLaMa 13B
- 0.59 ± 0.01
- 0.57 ± 0.01
- 0.67 ± 0.01
- 0.09 ± 0.01
- 0.82 ± 0.04
- 0.74 ± 0.01
- 0.52 ± 0.08
- 0.83 ± 0.01
- 0.10 ± 0.01
- 0.87 ± 0.02
-
-
- URA-LLaMa 7B
- 0.57 ± 0.02
- 0.42 ± 0.05
- 0.69 ± 0.02
- 0.07 ± 0.02
- 0.77 ± 0.04
- 0.72 ± 0.01
- 0.43 ± 0.01
- 0.78 ± 0.01
- 0.13 ± 0.01
- 0.95 ± 0.03
-
-
- LLaMa-2 13B
- 0.51 ± 0.01
- 0.41 ± 0.06
- 0.66 ± 0.01
- 0.32 ± 0.02
- 0.80 ± 0.04
- 0.63 ± 0.01
- 0.46 ± 0.07
- 0.71 ± 0.01
- 0.13 ± 0.01
- 0.88 ± 0.02
-
-
- LLaMa-2 7B
- 0.45 ± 0.01
- 0.32 ± 0.01
- 0.59 ± 0.01
- 0.26 ± 0.02
- 0.50 ± 0.05
- 0.50 ± 0.01
- 0.34 ± 0.01
- 0.69 ± 0.01
- 0.23 ± 0.01
- 0.62 ± 0.03
-
-
- Vietcuna 7B
- 0.04 ± 0.01
- 0.05 ± 0.01
- 0.45 ± 0.01
- 0.71 ± 0.01
- 0.05 ± 0.02
- 0.03 ± 0.00
- 0.03 ± 0.00
- 0.53 ± 0.01
- 0.50 ± 0.00
- 0.01 ± 0.00
-
-
- MixSUra 8x7B
- 0.62 ± -
- 0.63 ± -
- 0.59 ± -
- 0.30 ± -
- 0.59 ± -
- 0.74 ± -
- 0.46 ± -
- 0.63 ± -
- 0.23 ± -
- 0.655 ± -
-
-
- GPT-3.5
- 0.65 ± 0.01
- 0.59 ± 0.1
- -
- 0.32 ± 0.01
- 0.65 ± 0.05
- 0.86 ± 0.01
- 0.73 ± 0.01
- -
- 0.52 ± 0.01
- 0.86 ± 0.02
-
-
- GPT-4
- 0.75 ± 0.01
- 0.74 ± 0.01
- -
- 0.41 ± 0.01
- 0.74 ± 0.04
- 0.85 ± 0.01
- 0.59 ± 0.09
- -
- 0.52 ± 0.01
- 0.85 ± 0.02
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.sentiment_analysis %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.sentiment_analysis %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.sentiment_analysis %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/few-shot/text-classification.md b/_pages/vi/few-shot/text-classification.md
index 6c3b0d7..10d2f30 100644
--- a/_pages/vi/few-shot/text-classification.md
+++ b/_pages/vi/few-shot/text-classification.md
@@ -3,144 +3,96 @@ layout: default
permalink: /leaderboard/vi/few-shot/text-classification
---
# Few-Shot Text Classification Leaderboard
+{% assign lang = 'vi' %}
-
-
-Models
-UiT-VSMEC
-PhoATIS
-
-
-AC↑
-F1↑
-AR↑
-ECE↓
-A@10↑
-AC↑
-F1↑
-AR↑
-ECE↓
-A@10↑
-
-
-
-
-URA-LLaMa 70B
-0.25 ± 0.02
-0.15 ± 0.01
-0.56 ± 0.01
-0.25 ± 0.02
-0.37 ± 0.06
-0.15 ± 0.01
-0.22 ± 0.03
-0.83 ± 0.00
-0.81 ± 0.01
-0.13 ± 0.04
-
-
-URA-LLaMa 13B
-0.32 ± 0.02
-0.12 ± 0.01
-0.58 ± 0.01
-0.22 ± 0.02
-0.57 ± 0.07
-0.01 ± 0.01
-0.06 ± 0.02
-0.47 ± 0.00
-0.84 ± 0.01
-0.00 ± 0.01
-
-
-URA-LLaMa 7B
-0.29 ± 0.02
-0.11 ± 0.01
-0.60 ± 0.01
-0.12 ± 0.02
-0.43 ± 0.06
-0.06 ± 0.01
-0.01 ± 0.00
-0.55 ± 0.00
-0.24 ± 0.01
-0.08 ± 0.03
-
-
-LLaMa-2 13B
-0.18 ± 0.02
-0.08 ± 0.01
-0.55 ± 0.01
-0.45 ± 0.01
-0.49 ± 0.07
-0.02 ± 0.01
-0.06 ± 0.02
-0.57 ± 0.01
-0.90 ± 0.01
-0.01 ± 0.01
-
-
-LLaMa-2 7B
-0.25 ± 0.02
-0.12 ± 0.01
-0.57 ± 0.01
-0.21 ± 0.02
-0.54 ± 0.06
-0.03 ± 0.01
-0.02 ± 0.01
-0.56 ± 0.01
-0.54 ± 0.01
-0.01 ± 0.01
-
-
-Vietcuna 7B
-0.15 ± 0.01
-0.05 ± 0.01
-0.46 ± 0.01
-0.85 ± 0.01
-0.15 ± 0.04
-0.04 ± 0.01
-0.01 ± 0.00
-0.63 ± 0.00
-0.21 ± 0.01
-0.07 ± 0.03
-
-
-MixSUra 8x7B
-0.40 ± -
-0.36 ± -
-0.72 ± -
-0.53 ± -
-0.79 ± -
-0.81 ± -
-0.58 ± -
-0.96 ± -
-0.14 ± -
-0.91 ± -
-
-
-GPT-3.5
-0.42 ± 0.02
-0.40 ± 0.02
--
-0.28 ± 0.02
-0.42 ± 0.06
-0.69 ± 0.02
-0.67 ± 0.03
--
-0.63 ± 0.02
-0.69 ± 0.05
-
-
-GPT-4
-0.49 ± 0.02
-0.48 ± 0.02
--
-0.35 ± 0.02
-0.49 ± 0.06
-0.85 ± 0.01
-0.78 ± 0.03
--
-0.79 ± 0.01
-0.88 ± 0.04
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.text_classification %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.text_classification %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.text_classification %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/few-shot/toxicity-detection.md b/_pages/vi/few-shot/toxicity-detection.md
index d752c6a..2e3dcf8 100644
--- a/_pages/vi/few-shot/toxicity-detection.md
+++ b/_pages/vi/few-shot/toxicity-detection.md
@@ -3,144 +3,96 @@ layout: default
permalink: /leaderboard/vi/few-shot/toxicity-detection
---
# Few-Shot Toxicity Detection Leaderboard
+{% assign lang = 'vi' %}
-
-
-Models
-UiT-ViCTSD
-UiT-ViHSD
-
-
-AC↑
-F1↑
-AR↑
-ECE↓
-A@10↑
-AC↑
-F1↑
-AR↑
-ECE↓
-A@10↑
-
-
-
-
-URA-LLaMa 70B
-0.44 ± 0.01
-0.27 ± 0.01
-0.75 ± 0.01
-0.52 ± 0.01
-0.37 ± 0.02
-0.17 ± 0.00
-0.15 ± 0.00
-0.64 ± 0.01
-0.57 ± 0.00
-0.27 ± 0.02
-
-
-URA-LLaMa 13B
-0.44 ± 0.01
-0.30 ± 0.05
-0.67 ± 0.01
-0.33 ± 0.01
-0.41 ± 0.03
-0.26 ± 0.01
-0.16 ± 0.00
-0.61 ± 0.01
-0.42 ± 0.01
-0.21 ± 0.02
-
-
-URA-LLaMa 7B
-0.43 ± 0.01
-0.40 ± 0.01
-0.60 ± 0.01
-0.29 ± 0.01
-0.71 ± 0.02
-0.16 ± 0.00
-0.10 ± 0.00
-0.67 ± 0.01
-0.32 ± 0.00
-0.28 ± 0.02
-
-
-LLaMa-2 13B
-0.28 ± 0.01
-0.19 ± 0.00
-0.67 ± 0.01
-0.52 ± 0.01
-0.63 ± 0.03
-0.17 ± 0.00
-0.11 ± 0.00
-0.62 ± 0.01
-0.58 ± 0.00
-0.44 ± 0.02
-
-
-LLaMa-2 7B
-0.16 ± 0.01
-0.12 ± 0.01
-0.61 ± 0.01
-0.66 ± 0.01
-0.08 ± 0.02
-0.01 ± 0.00
-0.01 ± 0.00
-0.56 ± 0.01
-0.71 ± 0.00
-0.01 ± 0.02
-
-
-Vietcuna 7B
-0.08 ± 0.00
-0.10 ± 0.01
-0.50 ± 0.00
-0.42 ± 0.00
-0.08 ± 0.03
-0.61 ± 0.01
-0.21 ± 0.00
-0.50 ± 0.00
-0.28 ± 0.01
-0.61 ± 0.02
-
-
-MixSUra 8x7B
-0.70 ± -
-0.39 ± -
-- ± -
-0.29 ± -
-0.80 ± -
-0.58 ± -
-0.31 ± -
-0.68 ± -
-0.30 ± -
-0.93 ± -
-
-
-GPT-3.5
-0.63 ± 0.02
-0.54 ± 0.02
--
-0.13 ± 0.02
-0.63 ± 0.05
-0.63 ± 0.01
-0.47 ± 0.01
--
-0.29 ± 0.01
-0.63 ± 0.02
-
-
-GPT-4
-0.89 ± 0.00
-0.71 ± 0.01
--
-0.39 ± 0.00
-0.89 ± 0.03
-0.77 ± 0.01
-0.57 ± 0.01
--
-0.44 ± 0.01
-0.77 ± 0.02
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.toxicity_detection %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.toxicity_detection %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.toxicity_detection %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/few-shot/translation.md b/_pages/vi/few-shot/translation.md
index ef13374..d2c3841 100644
--- a/_pages/vi/few-shot/translation.md
+++ b/_pages/vi/few-shot/translation.md
@@ -3,124 +3,84 @@ layout: default
permalink: /leaderboard/vi/few-shot/translation
---
# Few-Shot Translation Leaderboard
+{% assign lang = 'vi' %}
- Models
- PhoMT
- OPUS100
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.translation %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- (En -> Vi)
- (Vi -> En)
- (En -> Vi)
- (Vi -> En)
- (En -> Vi)
- (Vi -> En)
- (En -> Vi)
- (Vi -> En)
+ {% for dataset in site.data.leaderboard[lang].few_shot.translation %}
+ BLEU envi↑
+ BLEU vien↑
+ hLEPOR envi↑
+ hLEPOR vien↑
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.28 ± 0.00
- 0.59 ± 0.00
- 0.27 ± 0.00
- 0.58 ± 0.00
- 0.10 ± 0.00
- 0.44 ± 0.01
- 0.14 ± 0.00
- 0.41 ± 0.01
-
-
- URA-LLaMa 13B
- 0.25 ± 0.00
- 0.55 ± 0.00
- 0.15 ± 0.00
- 0.56 ± 0.00
- 0.10 ± 0.01
- 0.41 ± 0.01
- 0.17 ± 0.01
- 0.43 ± 0.01
-
-
- URA-LLaMa 7B
- 0.19 ± 0.00
- 0.50 ± 0.00
- 0.22 ± 0.00
- 0.54 ± 0.00
- 0.08 ± 0.00
- 0.38 ± 0.01
- 0.14 ± 0.01
- 0.39 ± 0.01
-
-
- LLaMa-2 13B
- 0.23 ± 0.00
- 0.53 ± 0.00
- 0.23 ± 0.00
- 0.54 ± 0.00
- 0.09 ± 0.00
- 0.39 ± 0.01
- 0.14 ± 0.01
- 0.40 ± 0.01
-
-
- LLaMa-2 7B
- 0.18 ± 0.00
- 0.47 ± 0.00
- 0.21 ± 0.00
- 0.52 ± 0.00
- 0.07 ± 0.00
- 0.34 ± 0.00
- 0.11 ± 0.01
- 0.36 ± 0.01
-
-
- Vietcuna 7B
- 0.15 ± 0.00
- 0.35 ± 0.00
- 0.03 ± 0.00
- 0.11 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.05 ± 0.00
- 0.16 ± 0.00
-
-
- MixSUra 8x7B
- 0.15 ± -
- 0.51 ± -
- 0.16 ± -
- 0.52 ± -
- 0.07 ± -
- 0.37 ± -
- 0.09 ± -
- 0.36 ± -
-
-
- GPT-3.5
- 0.33 ± 0.00
- 0.65 ± 0.00
- 0.33 ± 0.00
- 0.63 ± 0.00
- 0.16 ± 0.01
- 0.50 ± 0.01
- 0.24 ± 0.01
- 0.51 ± 0.00
-
-
- GPT-4
- 0.33 ± 0.00
- 0.66 ± 0.00
- 0.34 ± 0.00
- 0.65 ± 0.00
- 0.17 ± 0.01
- 0.51 ± 0.01
- 0.25 ± 0.01
- 0.53 ± 0.00
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].few_shot.translation %}
+ {% assign bleu_envi_best = 0 %}
+ {% assign bleu_vien_best = 0 %}
+ {% assign hlepor_envi_best = 0 %}
+ {% assign hlepor_vien_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m]["BLEU envi"] and dataset[1][m]["BLEU envi"] > bleu_envi_best %}
+ {% assign bleu_envi_best = dataset[1][m]["BLEU envi"] %}
+ {% endif %}
+ {% if dataset[1][m]["BLEU vien"] and dataset[1][m]["BLEU vien"] > bleu_vien_best %}
+ {% assign bleu_vien_best = dataset[1][m]["BLEU vien"] %}
+ {% endif %}
+ {% if dataset[1][m]["hLEPOR envi"] and dataset[1][m]["hLEPOR envi"] > hlepor_envi_best %}
+ {% assign hlepor_envi_best = dataset[1][m]["hLEPOR envi"] %}
+ {% endif %}
+ {% if dataset[1][m]["hLEPOR vien"] and dataset[1][m]["hLEPOR vien"] > hlepor_vien_best %}
+ {% assign hlepor_vien_best = dataset[1][m]["hLEPOR vien"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model]["BLEU envi"] %}
+ {{ dataset[1][model]["BLEU envi"] | round: 2 }} ± {{ dataset[1][model]["BLEU envi_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["BLEU vien"] %}
+ {{ dataset[1][model]["BLEU vien"] | round: 2 }} ± {{ dataset[1][model]["BLEU vien_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["hLEPOR envi"] %}
+ {{ dataset[1][model]["hLEPOR envi"] | round: 2 }} ± {{ dataset[1][model]["hLEPOR envi_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["hLEPOR vien"] %}
+ {{ dataset[1][model]["hLEPOR vien"] | round: 2 }} ± {{ dataset[1][model]["hLEPOR vien_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/vi/medium-prompt/question-answering.md b/_pages/vi/medium-prompt/question-answering.md
index 13fc8a8..5e06833 100644
--- a/_pages/vi/medium-prompt/question-answering.md
+++ b/_pages/vi/medium-prompt/question-answering.md
@@ -3,63 +3,60 @@ layout: default
permalink: /leaderboard/vi/medium-prompt/question-answering
---
# Medium-Prompt Question Answering Leaderboard
+{% assign lang = 'vi' %}
- Models
- XQuAD
- MLQA
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].medium_prompt.question_answering %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- EM↑
- F1↑
- EM↑
- F1↑
+ {% for dataset in site.data.leaderboard[lang].medium_prompt.question_answering %}
+ EM↑
+ F1↑
+ {% endfor %}
-
- URA-LLaMa 70B
- 0.08 ± 0.00
- 0.33 ± 0.00
- 0.07 ± 0.00
- 0.31 ± 0.00
-
-
- URA-LLaMa 13B
- 0.04 ± 0.00
- 0.21 ± 0.00
- 0.04 ± 0.00
- 0.19 ± 0.00
-
-
- URA-LLaMa 7B
- 0.01 ± 0.00
- 0.11 ± 0.00
- 0.01 ± 0.00
- 0.11 ± 0.00
-
-
- LLaMa-2 13B
- 0.00 ± 0.00
- 0.10 ± 0.00
- 0.00 ± 0.00
- 0.09 ± 0.00
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.03 ± 0.00
- 0.00 ± 0.00
- 0.03 ± 0.00
-
-
- MixSUra 8x7B
- 0.01 ± -
- 0.25 ± -
- 0.00 ± -
- 0.25 ± -
-
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].medium_prompt.question_answering %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/vi/medium-prompt/summarization.md b/_pages/vi/medium-prompt/summarization.md
index 739433e..df2a021 100644
--- a/_pages/vi/medium-prompt/summarization.md
+++ b/_pages/vi/medium-prompt/summarization.md
@@ -3,147 +3,132 @@ layout: default
permalink: /leaderboard/vi/medium-prompt/summarization
---
# Medium-Prompt Summarization Leaderboard
+{% assign lang = 'vi' %}
-
-
-Models
-VietNews
-WikiLingua
-
-
-R1↑
-R2↑
-RL↑
-SC↑
-BS↑
-Cv↑
-De↑
-Cp↑
-R1↑
-R2↑
-RL↑
-SC↑
-BS↑
-Cv↑
-De↑
-Cp↑
-
-
-
-
-URA-LLaMa 70B
-0.35 ± 0.00
-0.16 ± 0.00
-0.24 ± 0.00
--0.11 ± 0.00
-0.12 ± 0.00
-0.63 ± 0.00
-5.43 ± 0.02
-37.78 ± 0.47
-0.33 ± 0.00
-0.14 ± 0.00
-0.22 ± 0.00
--0.16± 0.00
-0.24± 0.10
-0.59 ± 0.01
-4.62 ± 0.11
-56.56 ± 1.70
-
-
-URA-LLaMa 13B
-0.26 ± 0.00
-0.12 ± 0.00
-0.17 ± 0.00
--0.09 ± 0.00
--0.08 ± 0.18
-0.46 ± 0.00
-3.55 ± 0.04
-47.75 ± 0.65
-0.14 ± 0.00
-0.05 ± 0.00
-0.09 ± 0.00
--0.16 ± 0.00
--0.14 ± 0.12
-0.26 ± 0.01
-1.83 ± 0.06
-60.10 ± 2.16
-
-
-URA-LLaMa 7B
-0.41 ± 0.00
-0.18 ± 0.00
-0.27 ± 0.00
--0.09 ± 0.00
--0.08 ± 0.13
-0.83 ± 0.00
-8.13 ± 0.04
-8.08 ± 0.17
-0.42 ± 0.00
-0.17 ± 0.00
-0.27 ± 0.00
--0.16 ± 0.00
-0.27 ± 0.21
-0.84 ± 0.00
-7.15 ± 0.08
-8.08 ± 0.36
-
-
-LLaMa-2 13B
-0.02 ± 0.00
-0.00 ± 0.00
-0.02 ± 0.00
--0.09 ± 0.00
--0.19 ± 0.05
-0.01 ± 0.00
-0.01 ± 0.00
-54.67 ± 0.16
-0.03 ± 0.00
-0.00 ± 0.00
-0.03 ± 0.00
--0.16 ± 0.00
--0.05 ± 0.03
-0.02 ± 0.00
-0.02 ± 0.00
-42.55 ± 0.81
-
-
-LLaMa-2 7B
-0.03 ± 0.00
-0.01 ± 0.00
-0.03 ± 0.00
--0.09 ± 0.00
--0.17 ± 0.03
-0.04 ± 0.00
-0.07 ± 0.00
-23.86 ± 0.26
-0.02 ± 0.00
-0.00 ± 0.00
-0.02 ± 0.00
--0.16 ± 0.00
--0.04 ± 0.06
-0.02 ± 0.00
-0.03 ± 0.00
-40.31 ± 0.88
-
-
-MixSUra 8x7B
-0.06 ± -
-0.01 ± -
-0.04 ± -
-- ± -
--0.13 ± -
-0.10 ± -
-0.17 ± -
-9.03 ± -
-0.03 ± -
-0.00 ± -
-0.03 ± -
-- ± -
--0.01 ± -
-0.17 ± -
-0.26 ± -
-16.68 ± -
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].medium_prompt.summarization %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].medium_prompt.summarization %}
+ R1↑
+ R2↑
+ RL↑
+ SC↑
+ BS↑
+ Cv↑
+ De↑
+ Cp↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].medium_prompt.summarization %}
+ {% assign R1_best = 0 %}
+ {% assign R2_best = 0 %}
+ {% assign RL_best = 0 %}
+ {% assign SC_best = -1 %}
+ {% assign BS_best = 0 %}
+ {% assign Cv_best = 0 %}
+ {% assign De_best = 0 %}
+ {% assign Cp_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].R1 and dataset[1][m].R1 > R1_best %}
+ {% assign R1_best = dataset[1][m].R1 %}
+ {% endif %}
+ {% if dataset[1][m].R2 and dataset[1][m].R2 > R2_best %}
+ {% assign R2_best = dataset[1][m].R2 %}
+ {% endif %}
+ {% if dataset[1][m].RL and dataset[1][m].RL > RL_best %}
+ {% assign RL_best = dataset[1][m].RL %}
+ {% endif %}
+ {% if dataset[1][m].SC and dataset[1][m].SC > SC_best %}
+ {% assign SC_best = dataset[1][m].SC %}
+ {% endif %}
+ {% if dataset[1][m].BS and dataset[1][m].BS > BS_best %}
+ {% assign BS_best = dataset[1][m].BS %}
+ {% endif %}
+ {% if dataset[1][m].Cv and dataset[1][m].Cv > Cv_best %}
+ {% assign Cv_best = dataset[1][m].Cv %}
+ {% endif %}
+ {% if dataset[1][m].De and dataset[1][m].De > De_best %}
+ {% assign De_best = dataset[1][m].De %}
+ {% endif %}
+ {% if dataset[1][m].Cp and dataset[1][m].Cp > Cp_best %}
+ {% assign Cp_best = dataset[1][m].Cp %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].R1 %}
+ {{ dataset[1][model].R1 | round: 2 }} ± {{ dataset[1][model].R1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].R2 %}
+ {{ dataset[1][model].R2 | round: 2 }} ± {{ dataset[1][model].R2_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].RL %}
+ {{ dataset[1][model].RL | round: 2 }} ± {{ dataset[1][model].RL_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SC %}
+ {{ dataset[1][model].SC | round: 2 }} ± {{ dataset[1][model].SC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].BS %}
+ {{ dataset[1][model].BS | round: 2 }} ± {{ dataset[1][model].BS_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cv %}
+ {{ dataset[1][model].Cv | round: 2 }} ± {{ dataset[1][model].Cv_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].De %}
+ {{ dataset[1][model].De | round: 2 }} ± {{ dataset[1][model].De_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cp %}
+ {{ dataset[1][model].Cp | round: 2 }} ± {{ dataset[1][model].Cp_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/randomized-choice/knowledge.md b/_pages/vi/randomized-choice/knowledge.md
index f7b9784..03a1c2d 100644
--- a/_pages/vi/randomized-choice/knowledge.md
+++ b/_pages/vi/randomized-choice/knowledge.md
@@ -4,90 +4,96 @@ permalink: /leaderboard/vi/randomized-choice/knowledge
---
# Randomized-Choice Knowledge Leaderboard
+{% assign lang = 'vi' %}
- Models
- AC ↑
- F1 ↑
- AR ↑
- ECE ↓
- A@10 ↑
-
-
-
-
- Our 70B
- 0.76 ± 0.02
- 0.76 ± 0.02
- 0.78 ± 0.01
- 0.14 ± 0.02
- 0.94 ± 0.04
-
-
- Our 13B
- 0.62 ± 0.02
- 0.62 ± 0.02
- 0.61 ± 0.02
- 0.15 ± 0.02
- 0.67 ± 0.07
-
-
- Our 7B
- 0.45 ± 0.02
- 0.36 ± 0.02
- 0.57 ± 0.02
- 0.10 ± 0.02
- 0.45 ± 0.07
-
-
- LLaMa-2 13B
- 0.57 ± 0.02
- 0.57 ± 0.02
- 0.57 ± 0.02
- 0.29 ± 0.02
- 0.75 ± 0.07
-
-
- LLaMa-2 7B
- 0.36 ± 0.02
- 0.27 ± 0.02
- 0.56 ± 0.02
- 0.37 ± 0.02
- 0.44 ± 0.07
-
-
- Vietcuna 7B
- 0.26 ± 0.02
- 0.15 ± 0.01
- 0.50 ± 0.00
- 0.01 ± 0.01
- 0.26 ± 0.06
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].randomized_choice.knowledge %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- MixSUra 7B
- 0.61 ± -
- 0.61 ± -
- 0.54 ± -
- 0.31 ± -
- 0.65 ± -
-
-
- GPT-3.5
- 0.92 ± 0.01
- 0.74 ± 0.04
- -
- 0.67 ± 0.01
- 0.92 ± 0.04
+ {% for dataset in site.data.leaderboard[lang].randomized_choice.knowledge %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
- GPT-4
- 0.92 ± 0.01
- 0.74 ± 0.04
- -
- 0.67 ± 0.01
- 0.92 ± 0.04
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].randomized_choice.knowledge %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/vi/robustness-aware/information-retrieval.md b/_pages/vi/robustness-aware/information-retrieval.md
index 6ce9fa1..2b6b42d 100644
--- a/_pages/vi/robustness-aware/information-retrieval.md
+++ b/_pages/vi/robustness-aware/information-retrieval.md
@@ -3,113 +3,84 @@ layout: default
permalink: /leaderboard/vi/robustness-aware/information-retrieval
---
# Robustness-Aware Information Retrieval Leaderboard
+{% assign lang = 'vi' %}
-
-
- Models
- mMARCO
- mRobust04
-
-
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
-
-
-
-
- URA-LLaMa 70B
-
-
-
-
-
-
-
-
-
-
- URA-LLaMa 13B
-
-
-
-
-
-
-
-
-
-
- URA-LLaMa 7B
- 0.05 ± 0.00
- 0.11 ± 0.00
- 0.07 ± 0.00
- 0.17 ± 0.00
- -
- -
- -
- -
-
-
- LLaMa-2 13B
- 0.06 ± 0.00
- 0.13 ± 0.00
- 0.19 ± 0.00
- 0.19 ± 0.00
-
-
-
-
-
-
- LLaMa-2 7B
- 0.05 ± 0.00
- 0.11 ± 0.00
- 0.08 ± 0.00
- 0.16 ± 0.00
- -
- -
- -
- -
-
-
- Vietcuna 7B
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- GPT-3.5
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- GPT-4
- -
- -
- -
- -
- -
- -
- -
- -
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.information_retrieval %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.information_retrieval %}
+ M@10↑
+ M@10B↑
+ N@10↑
+ N@10B↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.information_retrieval %}
+ {% assign M10_best = 0 %}
+ {% assign M10B_best = 0 %}
+ {% assign N10_best = 0 %}
+ {% assign N10B_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m]["M@10"] and dataset[1][m]["M@10"] > M10_best %}
+ {% assign M10_best = dataset[1][m]["M@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["M@10B"] and dataset[1][m]["M@10B"] > M10B_best %}
+ {% assign M10B_best = dataset[1][m]["M@10B"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10"] and dataset[1][m]["N@10"] > N10_best %}
+ {% assign N10_best = dataset[1][m]["N@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10B"] and dataset[1][m]["N@10B"] > N10B_best %}
+ {% assign N10B_best = dataset[1][m]["N@10B"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model]["M@10"] %}
+ {{ dataset[1][model]["M@10"] | round: 2 }} ± {{ dataset[1][model]["M@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["M@10B"] %}
+ {{ dataset[1][model]["M@10B"] | round: 2 }} ± {{ dataset[1][model]["M@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10"] %}
+ {{ dataset[1][model]["N@10"] | round: 2 }} ± {{ dataset[1][model]["N@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10B"] %}
+ {{ dataset[1][model]["N@10B"] | round: 2 }} ± {{ dataset[1][model]["N@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/robustness-aware/knowledge.md b/_pages/vi/robustness-aware/knowledge.md
index d4731c4..ecc0ef4 100644
--- a/_pages/vi/robustness-aware/knowledge.md
+++ b/_pages/vi/robustness-aware/knowledge.md
@@ -3,114 +3,128 @@ layout: default
permalink: /leaderboard/vi/robustness-aware/knowledge
---
# Robustness-Aware Knowledge Leaderboard
+{% assign lang = 'vi' %}
-
-
- Models
- ZaloE2E
- ViMMRC
-
-
- EM↑
- F1↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.23 ± 0.00
- 0.37 ± 0.00
- 0.65 ± 0.00
- 0.53 ± 0.00
- 0.84 ± 0.00
- 0.11 ± 0.00
- 0.77 ± 0.00
-
-
- URA-LLaMa 13B
- 0.18 ± 0.00
- 0.30 ± 0.00
- 0.41 ± 0.00
- 0.34 ± 0.00
- 0.61 ± 0.00
- 0.22 ± 0.00
- 0.58 ± 0.00
-
-
- URA-LLaMa 7B
- 0.10 ± 0.00
- 0.18 ± 0.00
- 0.33 ± 0.02
- 0.28 ± 0.02
- 0.61 ± 0.01
- 0.19 ± 0.02
- 0.33 ± 0.06
-
-
- LLaMa-2 13B
- 0.13 ± 0.00
- 0.21 ± 0.00
- 0.39 ± 0.00
- 0.31 ± 0.00
- 0.56 ± 0.00
- 0.46 ± 0.00
- 0.33 ± 0.00
-
-
- LLaMa-2 7B
- 0.02 ± 0.00
- 0.05 ± 0.00
- 0.26 ± 0.01
- 0.20 ± 0.01
- 0.51 ± 0.01
- 0.46 ± 0.01
- 0.13 ± 0.03
-
-
- Vietcuna 7B
- 0.05 ± 0.00
- 0.15 ± 0.00
- 0.26 ± 0.01
- 0.14 ± 0.00
- 0.50 ± 0.00
- 0.01 ± 0.01
- 0.21 ± 0.07
-
-
- MixSUra 8x7B
- 0.13 ± -
- 0.24 ± -
- 0.57 ± -
- 0.45 ± -
- 0.53 ± -
- 0.35 ± -
- 0.58 ± -
-
-
- GPT-3.5
- 0.45 ± 0.01
- 0.61 ± 0.01
- 0.90 ± 0.01
- 0.72 ± 0.04
- -
- 0.65 ± 0.01
- 0.88 ± 0.07
-
-
- GPT-4
- 0.44 ± 0.01
- 0.61 ± 0.01
- 0.91 ± 0.01
- 0.73 ± 0.07
- -
- 0.66 ± 0.07
- 0.88 ± 0.04
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.knowledge %}
+ {% if dataset[1].num_fields == 2 %}
+
+ {{ dataset[0] }}
+
+ {% else %}
+
+ {{ dataset[0] }}
+
+ {% endif %}
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.knowledge %}
+ {% if dataset[1].num_fields == 2 %}
+ EM↑
+ F1↑
+ {% else %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endif %}
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.knowledge %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AC_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+ {% if dataset[1].num_fields == 2 %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% else %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endif %}
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/robustness-aware/question-answering.md b/_pages/vi/robustness-aware/question-answering.md
index 13f4b1d..5d07725 100644
--- a/_pages/vi/robustness-aware/question-answering.md
+++ b/_pages/vi/robustness-aware/question-answering.md
@@ -3,84 +3,60 @@ layout: default
permalink: /leaderboard/vi/robustness-aware/question-answering
---
# Robustness-Aware Question Answering Leaderboard
+{% assign lang = 'vi' %}
- Models
- XQuAD
- MLQA
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.question_answering %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- EM↑
- F1↑
- EM↑
- F1↑
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.question_answering %}
+ EM↑
+ F1↑
+ {% endfor %}
-
- URA-LLaMa 70B
- 0.01 ± 0.00
- 0.17 ± 0.00
- 0.01 ± 0.00
- 0.18 ± 0.00
-
-
- URA-LLaMa 13B
- 0.00 ± 0.00
- 0.09 ± 0.00
- 0.00 ± 0.00
- 0.10 ± 0.00
-
-
- URA-LLaMa 7B
- 0.00 ± 0.00
- 0.09 ± 0.00
- 0.00 ± 0.00
- 0.10 ± 0.00
-
-
- LLaMa-2 13B
- 0.00 ± 0.00
- 0.02 ± 0.00
- 0.00 ± 0.00
- 0.03 ± 0.00
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.02 ± 0.00
- 0.00 ± 0.00
- 0.02 ± 0.00
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 0.06 ± 0.00
- 0.00 ± 0.00
- 0.05 ± 0.00
-
-
- MixSUra 8x7B
- 0.00 ± -
- 0.11 ± -
- 0.00 ± -
- 0.12 ± -
-
-
- GPT-3.5
- 0.00 ± 0.00
- 0.19 ± 0.00
- 0.00 ± 0.00
- 0.20 ± 0.00
-
-
- GPT-4
- 0.00 ± 0.00
- 0.24 ± 0.00
- 0.00 ± 0.00
- 0.25 ± 0.00
-
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.question_answering %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/vi/robustness-aware/sentiment-analysis.md b/_pages/vi/robustness-aware/sentiment-analysis.md
index 47f6d45..d647b96 100644
--- a/_pages/vi/robustness-aware/sentiment-analysis.md
+++ b/_pages/vi/robustness-aware/sentiment-analysis.md
@@ -3,144 +3,96 @@ layout: default
permalink: /leaderboard/vi/robustness-aware/sentiment-analysis
---
# Robustness-Aware Sentiment Analysis Leaderboard
+{% assign lang = 'vi' %}
-
-
- Models
- VLSP 2016
- UiT-VSFC
-
-
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.63 ± 0.01
- 0.48 ± 0.01
- 0.60 ± 0.01
- 0.09 ± 0.01
- 0.83 ± 0.04
- 0.71 ± 0.01
- 0.45 ± 0.01
- 0.80 ± 0.01
- 0.08 ± 0.01
- 0.99 ± 0.01
-
-
- URA-LLaMa 13B
- 0.55 ± 0.02
- 0.52 ± 0.02
- 0.59 ± 0.01
- 0.06 ± 0.01
- 0.74 ± 0.05
- 0.72 ± 0.01
- 0.44 ± 0.05
- 0.77 ± 0.01
- 0.18 ± 0.01
- 0.77 ± 0.02
-
-
- URA-LLaMa 7B
- 0.52 ± 0.02
- 0.36 ± 0.03
- 0.59 ± 0.01
- 0.07 ± 0.01
- 0.66 ± 0.05
- 0.73 ± 0.01
- 0.41 ± 0.01
- 0.71 ± 0.01
- 0.16 ± 0.01
- 0.87 ± 0.02
-
-
- LLaMa-2 13B
- 0.46 ± 0.02
- 0.30 ± 0.01
- 0.55 ± 0.01
- 0.39 ± 0.02
- 0.70 ± 0.05
- 0.66 ± 0.01
- 0.40 ± 0.01
- 0.63 ± 0.01
- 0.11 ± 0.01
- 0.89 ± 0.02
-
-
- LLaMa-2 7B
- 0.45 ± 0.02
- 0.36 ± 0.01
- 0.54 ± 0.01
- 0.20 ± 0.02
- 0.51 ± 0.05
- 0.51 ± 0.01
- 0.33 ± 0.01
- 0.65 ± 0.01
- 0.15 ± 0.01
- 0.80 ± 0.02
-
-
- Vietcuna 7B
- 0.44 ± 0.02
- 0.27 ± 0.01
- 0.51 ± 0.01
- 0.23 ± 0.02
- 0.53 ± 0.05
- 0.49 ± 0.01
- 0.25 ± 0.03
- 0.46 ± 0.01
- 0.33 ± 0.01
- 0.34 ± 0.03
-
-
- MixSUra 8x7B
- 0.59 ± -
- 0.59 ± -
- 0.55 ± -
- 0.34 ± -
- 0.52 ± -
- 0.69 ± -
- 0.44 ± -
- 0.61 ± -
- 0.29 ± -
- 0.66 ± -
-
-
- GPT-3.5
- 0.64 ± 0.01
- 0.60 ± 0.01
- -
- 0.31 ± 0.01
- 0.54 ± 0.05
- 0.86 ± 0.01
- 0.71 ± 0.01
- -
- 0.53 ± 0.01
- 0.86 ± 0.02
-
-
- GPT-4
- 0.74 ± 0.00
- 0.73 ± 0.00
- -
- 0.41 ± 0.00
- 0.71 ± 0.00
- 0.83 ± 0.00
- 0.70 ± 0.00
- -
- 0.50 ± 0.00
- 0.85 ± 0.00
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.sentiment_analysis %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.sentiment_analysis %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.sentiment_analysis %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/robustness-aware/summarization.md b/_pages/vi/robustness-aware/summarization.md
index 27728c5..b5a3948 100644
--- a/_pages/vi/robustness-aware/summarization.md
+++ b/_pages/vi/robustness-aware/summarization.md
@@ -3,204 +3,132 @@ layout: default
permalink: /leaderboard/vi/robustness-aware/summarization
---
# Robustness-Aware Summarization Leaderboard
+{% assign lang = 'vi' %}
-
-
- Models
- VietNews
- WikiLingua
-
-
- R1↑
- R2↑
- RL↑
- SC↑
- BS↑
- Cv↑
- De↑
- Cp↑
- R1↑
- R2↑
- RL↑
- SC↑
- BS↑
- Cv↑
- De↑
- Cp↑
-
-
-
-
- URA-LLaMa 70B
- 0.34 ± 0.00
- 0.15 ± 0.00
- 0.23 ± 0.00
- -0.06 ± 0.00
- -0.11 ± 0.18
- 0.10 ± 0.00
- 0.10 ± 0.00
- 39.63 ± 0.87
- 0.28 ± 0.00
- 0.11 ± 0.00
- 0.19 ± 0.00
- -0.16 ± 0.00
- 0.25 ± 0.23
- 0.50 ± 0.01
- 0.50 ± 0.01
- 167.42 ± 7.09
-
-
- URA-LLaMa 13B
- 0.35 ± 0.00
- 0.14 ± 0.00
- 0.23 ± 0.00
- -0.09 ± 0.00
- -0.07 ± 0.17
- 0.64 ± 0.00
- 0.65 ± 0.00
- 134.65 ± 3.76
- 0.20 ± 0.00
- 0.07 ± 0.00
- 0.13 ± 0.00
- -0.17 ± 0.00
- 0.20 ± 0.11
- 0.38 ± 0.00
- 0.38 ± 0.00
- 103.69 ± 3.33
-
-
- URA-LLaMa 7B
- 0.37 ± 0.00
- 0.12 ± 0.00
- 0.24 ± 0.00
- -0.10 ± 0.00
- -0.24 ± 0.18
- 0.65 ± 0.00
- 0.65 ± 0.00
- 17.92 ± 0.87
- 0.37 ± 0.00
- 0.12 ± 0.00
- 0.24 ± 0.00
- -0.17 ± 0.00
- 0.11 ± 0.18
- 0.65 ± 0.00
- 0.65 ± 0.00
- 20.49 ± 0.95
-
-
- LLaMa-2 13B
- 0.05 ± 0.00
- 0.01 ± 0.00
- 0.04 ± 0.00
- -0.15 ± 0.00
- -0.24 ± 0.18
- 0.03 ± 0.00
- 0.03 ± 0.00
- 55.91 ± 0.65
- 0.04 ± 0.00
- 0.00 ± 0.00
- 0.03 ± 0.00
- -0.17 ± 0.00
- 0.09 ± 0.00
- 0.05 ± 0.00
- 0.05 ± 0.00
- 66.85 ± 6.72
-
-
- LLaMa-2 7B
- 0.05 ± 0.00
- 0.01 ± 0.00
- 0.05 ± 0.00
- -0.10 ± 0.00
- -0.19 ± 0.04
- 0.07 ± 0.00
- 0.07 ± 0.00
- 55.29 ± 0.88
- 0.04 ± 0.00
- 0.00 ± 0.00
- 0.04 ± 0.00
- -0.17 ± 0.00
- 0.15 ± 0.00
- 0.06 ± 0.00
- 0.06 ± 0.00
- 58.32 ± 3.32
-
-
- Vietcuna 7B
- 0.03 ± 0.00
- 0.01 ± 0.00
- 0.02 ± 0.00
- -0.10 ± 0.00
- -0.18 ± 0.06
- 0.91 ± 0.00
- 0.91 ± 0.00
- 1026.61 ± 3.86
- 0.08 ± 0.00
- 0.02 ± 0.00
- 0.05 ± 0.00
- -0.17 ± 0.00
- -0.19 ± 0.05
- 0.78 ± 0.00
- 0.78 ± 0.00
- 505.45 ± 8.64
-
-
- MixSUra 8x7B
- 0.41 ± -
- 0.19 ± -
- 0.26 ± -
- - ± -
- -0.03 ± -
- 0.86 ± -
- 0.87 ± -
- 29.15 ± -
- 0.46 ± -
- 0.21 ± -
- 0.28 ± -
- - ± -
- 0.26 ± -
- 0.88 ± -
- 0.98 ± -
- 19.10 ± -
-
-
- GPT-3.5
- 0.34 ± 0.00
- 0.19 ± 0.00
- 0.23 ± 0.00
- -0.10 ± 0.00
- 0.05 ± 0.14
- 0.81 ± 0.00
- 0.81 ± 0.00
- 128.44 ± 2.94
- 0.39 ± 0.00
- 0.19 ± 0.00
- 0.25 ± 0.00
- -0.17 ± 0.00
- 0.28 ± 0.11
- 0.82 ± 0.00
- 0.82 ± 0.00
- 200.90 ± 7.40
-
-
- GPT-4
- 0.39 ± 0.00
- 0.21 ± 0.00
- 0.26 ± 0.00
- -0.10 ± 0.09
- 0.04 ± 0.00
- 0.83 ± 0.00
- 0.83 ± 0.71
- 24.48 ± 0.00
- 0.45 ± 0.00
- 0.20 ± 0.00
- 0.27 ± 0.00
- -0.17 ± 0.00
- 0.28 ± 0.00
- 0.80 ± 0.03
- 0.81 ± 0.00
- 20.40 ± 1.59
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.summarization %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.summarization %}
+ R1↑
+ R2↑
+ RL↑
+ SC↑
+ BS↑
+ Cv↑
+ De↑
+ Cp↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.summarization %}
+ {% assign R1_best = 0 %}
+ {% assign R2_best = 0 %}
+ {% assign RL_best = 0 %}
+ {% assign SC_best = -1 %}
+ {% assign BS_best = 0 %}
+ {% assign Cv_best = 0 %}
+ {% assign De_best = 0 %}
+ {% assign Cp_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].R1 and dataset[1][m].R1 > R1_best %}
+ {% assign R1_best = dataset[1][m].R1 %}
+ {% endif %}
+ {% if dataset[1][m].R2 and dataset[1][m].R2 > R2_best %}
+ {% assign R2_best = dataset[1][m].R2 %}
+ {% endif %}
+ {% if dataset[1][m].RL and dataset[1][m].RL > RL_best %}
+ {% assign RL_best = dataset[1][m].RL %}
+ {% endif %}
+ {% if dataset[1][m].SC and dataset[1][m].SC > SC_best %}
+ {% assign SC_best = dataset[1][m].SC %}
+ {% endif %}
+ {% if dataset[1][m].BS and dataset[1][m].BS > BS_best %}
+ {% assign BS_best = dataset[1][m].BS %}
+ {% endif %}
+ {% if dataset[1][m].Cv and dataset[1][m].Cv > Cv_best %}
+ {% assign Cv_best = dataset[1][m].Cv %}
+ {% endif %}
+ {% if dataset[1][m].De and dataset[1][m].De > De_best %}
+ {% assign De_best = dataset[1][m].De %}
+ {% endif %}
+ {% if dataset[1][m].Cp and dataset[1][m].Cp > Cp_best %}
+ {% assign Cp_best = dataset[1][m].Cp %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].R1 %}
+ {{ dataset[1][model].R1 | round: 2 }} ± {{ dataset[1][model].R1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].R2 %}
+ {{ dataset[1][model].R2 | round: 2 }} ± {{ dataset[1][model].R2_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].RL %}
+ {{ dataset[1][model].RL | round: 2 }} ± {{ dataset[1][model].RL_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SC %}
+ {{ dataset[1][model].SC | round: 2 }} ± {{ dataset[1][model].SC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].BS %}
+ {{ dataset[1][model].BS | round: 2 }} ± {{ dataset[1][model].BS_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cv %}
+ {{ dataset[1][model].Cv | round: 2 }} ± {{ dataset[1][model].Cv_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].De %}
+ {{ dataset[1][model].De | round: 2 }} ± {{ dataset[1][model].De_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cp %}
+ {{ dataset[1][model].Cp | round: 2 }} ± {{ dataset[1][model].Cp_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/robustness-aware/text-classification.md b/_pages/vi/robustness-aware/text-classification.md
index a48a0ed..72a19a3 100644
--- a/_pages/vi/robustness-aware/text-classification.md
+++ b/_pages/vi/robustness-aware/text-classification.md
@@ -3,144 +3,96 @@ layout: default
permalink: /leaderboard/vi/robustness-aware/text-classification
---
# Robustness-Aware Text Classification Leaderboard
+{% assign lang = 'vi' %}
-
-
- Models
- UiT-VSMEC
- PhoATIS
-
-
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.25 ± 0.00
- 0.16 ± 0.00
- 0.56 ± 0.02
- 0.20 ± 0.00
- 0.33 ± 0.00
- 0.16 ± 0.02
- 0.26 ± 0.03
- 0.79 ± 0.00
- 0.79 ± 0.02
- 0.08 ± 0.06
-
-
- URA-LLaMa 13B
- 0.30 ± 0.00
- 0.11 ± 0.00
- 0.51 ± 0.01
- 0.26 ± 0.00
- 0.44 ± 0.00
- 0.01 ± 0.01
- 0.05 ± 0.01
- 0.47 ± 0.01
- 0.84 ± 0.01
- 0.00 ± 0.04
-
-
- URA-LLaMa 7B
- 0.29 ± 0.00
- 0.10 ± 0.00
- 0.57 ± 0.01
- 0.17 ± 0.00
- 0.30 ± 0.00
- 0.02 ± 0.01
- 0.04 ± 0.00
- 0.55 ± 0.01
- 0.18 ± 0.01
- 0.01 ± 0.02
-
-
- LLaMa-2 13B
- 0.19 ± 0.00
- 0.07 ± 0.00
- 0.52 ± 0.01
- 0.47 ± 0.00
- 0.43 ± 0.00
- 0.02 ± 0.00
- 0.06 ± 0.00
- 0.57 ± 0.01
- 0.91 ± 0.00
- 0.01 ± 0.00
-
-
- LLaMa-2 7B
- 0.17 ± 0.00
- 0.10 ± 0.00
- 0.55 ± 0.00
- 0.33 ± 0.00
- 0.29 ± 0.00
- 0.01 ± 0.01
- 0.00 ± 0.00
- 0.56 ± 0.00
- 0.69 ± 0.01
- 0.02 ± 0.02
-
-
- Vietcuna 7B
- 0.09 ± 0.00
- 0.09 ± 0.00
- 0.51 ± 0.01
- 0.91 ± 0.00
- 0.09 ± 0.00
- 0.02 ± 0.01
- 0.01 ± 0.00
- 0.55 ± 0.01
- 0.23 ± 0.01
- 0.02 ± 0.01
-
-
- MixSUra 8x7B
- 0.35 ± -
- 0.27 ± -
- 0.70 ± -
- 0.58 ± -
- 0.70 ± -
- 0.80 ± -
- 55 ± -
- 0.94 ± -
- 0.15 ± -
- 0.88 ± -
-
-
- GPT-3.5
- 0.42 ± 0.00
- 0.41 ± 0.00
- -
- 0.28 ± 0.00
- 0.30 ± 0.00
- 0.68 ± 0.02
- 0.64 ± 0.03
- -
- 0.62 ± 0.02
- 0.70 ± 0.05
-
-
- GPT-4
- 0.48 ± 0.00
- 0.45 ± 0.00
- -
- 0.33 ± 0.00
- 0.40 ± 0.00
- 0.86 ± 0.01
- 0.80 ± 0.02
- -
- 0.80 ± 0.01
- 0.91 ± 0.03
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.text_classification %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.text_classification %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.text_classification %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/robustness-aware/toxicity-detection.md b/_pages/vi/robustness-aware/toxicity-detection.md
index dd7ba94..670fef2 100644
--- a/_pages/vi/robustness-aware/toxicity-detection.md
+++ b/_pages/vi/robustness-aware/toxicity-detection.md
@@ -3,144 +3,96 @@ layout: default
permalink: /leaderboard/vi/robustness-aware/toxicity-detection
---
# Robustness-Aware Toxicity Detection Leaderboard
+{% assign lang = 'vi' %}
-
-
- Models
- UiT-ViCTSD
- UiT-ViHSD
-
-
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.32 ± 0.00
- 0.21 ± 0.00
- 0.72 ± 0.01
- 0.62 ± 0.00
- 0.33 ± 0.00
- 0.14 ± 0.00
- 0.12 ± 0.00
- 0.64 ± 0.02
- 0.61 ± 0.00
- 0.23 ± 0.00
-
-
- URA-LLaMa 13B
- 0.27 ± 0.00
- 0.26 ± 0.00
- 0.56 ± 0.00
- 0.56 ± 0.00
- 0.12 ± 0.00
- 0.18 ± 0.00
- 0.11 ± 0.00
- 0.57 ± 0.01
- 0.45 ± 0.00
- 0.20 ± 0.00
-
-
- URA-LLaMa 7B
- 0.22 ± 0.00
- 0.21 ± 0.00
- 0.63 ± 0.00
- 0.39 ± 0.00
- 0.36 ± 0.00
- 0.12 ± 0.00
- 0.07 ± 0.00
- 0.62 ± 0.00
- 0.38 ± 0.00
- 0.19 ± 0.00
-
-
- LLaMa-2 13B
- 0.12 ± 0.00
- 0.11 ± 0.00
- 0.56 ± 0.01
- 0.66 ± 0.00
- 0.12 ± 0.00
- 0.10 ± 0.00
- 0.07 ± 0.00
- 0.59 ± 0.01
- 0.62 ± 0.00
- 0.24 ± 0.00
-
-
- LLaMa-2 7B
- 0.04 ± 0.00
- 0.04 ± 0.00
- 0.62 ± 0.00
- 0.86 ± 0.00
- 0.02 ± 0.00
- 0.01 ± 0.00
- 0.00 ± 0.00
- 0.54 ± 0.00
- 0.79 ± 0.00
- 0.00 ± 0.00
-
-
- Vietcuna 7B
- 0.11 ± 0.00
- 0.11 ± 0.00
- 0.54 ± 0.00
- 0.39 ± 0.00
- 0.13 ± 0.00
- 0.09 ± 0.00
- 0.05 ± 0.00
- 0.5 ± 0.00
- 0.24 ± 0.00
- 0.08 ± 0.00
-
-
- MixSUra 8x7B
- 0.72 ± -
- 0.39 ± -
- - ± -
- 0.25 ± -
- 0.81 ± -
- 0.66 ± -
- 0.31 ± -
- 0.67 ± -
- 0.21 ± -
- 0.82 ± -
-
-
- GPT-3.5
- 0.51 ± 0.00
- 0.46 ± 0.00
- 0.5 ± 0.00
- 0.01 ± 0.00
- 0.54 ± 0.00
- 0.64 ± 0.00
- 0.47 ± 0.00
- - ± -
- 0.30 ± 0.00
- 0.63 ± 0.00
-
-
- GPT-4
- 0.88 ± 0.00
- 0.71 ± 0.00
- - ± -
- 0.38 ± 0.00
- 0.88 ± 0.00
- 0.78 ± 0.00
- 0.56 ± 0.00
- - ± -
- 0.44 ± 0.00
- 0.78 ± 0.00
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.toxicity_detection %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.toxicity_detection %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.toxicity_detection %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/robustness-aware/translation.md b/_pages/vi/robustness-aware/translation.md
index 99b4d0b..fb9ea78 100644
--- a/_pages/vi/robustness-aware/translation.md
+++ b/_pages/vi/robustness-aware/translation.md
@@ -3,124 +3,84 @@ layout: default
permalink: /leaderboard/vi/robustness-aware/translation
---
# Robustness-Aware Translation Leaderboard
+{% assign lang = 'vi' %}
-
-
- Models
- PhoMT
- OPUS100
-
-
- (En → Vi)
- (Vi → En)
- (En → Vi)
- (Vi → En)
- (En → Vi)
- (Vi → En)
- (En → Vi)
- (Vi → En)
-
-
-
-
- URA-LLaMa 70B
- 0.25 ± 0.00
- 0.58 ± 0.00
- 0.11 ± 0.00
- 0.51 ± 0.00
- 0.05 ± 0.00
- 0.40 ± 0.01
- 0.06 ± 0.00
- 0.36 ± 0.00
-
-
- URA-LLaMa 13B
- 0.23 ± 0.00
- 0.55 ± 0.00
- 0.10 ± 0.00
- 0.50 ± 0.00
- 0.03 ± 0.00
- 0.38 ± 0.01
- 0.05 ± 0.00
- 0.38 ± 0.00
-
-
- URA-LLaMa 7B
- 0.15 ± 0.00
- 0.48 ± 0.00
- 0.06 ± 0.00
- 0.46 ± 0.00
- 0.02 ± 0.00
- 0.35 ± 0.00
- 0.03 ± 0.00
- 0.34 ± 0.01
-
-
- LLaMa-2 13B
- 0.20 ± 0.00
- 0.51 ± 0.00
- 0.07 ± 0.00
- 0.44 ± 0.00
- 0.03 ± 0.00
- 0.36 ± 0.01
- 0.04 ± 0.00
- 0.32 ± 0.00
-
-
- LLaMa-2 7B
- 0.13 ± 0.00
- 0.41 ± 0.00
- 0.05 ± 0.00
- 0.42 ± 0.00
- 0.02 ± 0.00
- 0.31 ± 0.00
- 0.03 ± 0.00
- 0.30 ± 0.00
-
-
- Vietcuna 7B
- 0.17 ± 0.00
- 0.43 ± 0.00
- 0.07 ± 0.01
- 0.41 ± 0.00
- 0.09 ± 0.01
- 0.38 ± 0.01
- 0.09 ± 0.01
- 0.33 ± 0.00
-
-
- MixSUra 8x7B
- 0.14 ± -
- 0.50 ± -
- 0.11 ± -
- 0.46 ± -
- 0.06 ± -
- 0.36 ± -
- 0.06 ± -
- 0.31 ± -
-
-
- GPT-3.5
- 0.31 ± 0.00
- 0.64 ± 0.00
- 0.17 ± 0.00
- 0.59 ± 0.00
- 0.15 ± 0.01
- 0.49 ± 0.01
- 0.21 ± 0.01
- 0.48 ± 0.00
-
-
- GPT-4
- 0.31 ± 0.00
- 0.65 ± 0.00
- 0.20 ± 0.00
- 0.62 ± 0.00
- 0.16 ± 0.01
- 0.50 ± 0.01
- 0.23 ± 0.01
- 0.51 ± 0.00
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.translation %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.translation %}
+ BLEU envi↑
+ BLEU vien↑
+ hLEPOR envi↑
+ hLEPOR vien↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].robustness_aware.translation %}
+ {% assign bleu_envi_best = 0 %}
+ {% assign bleu_vien_best = 0 %}
+ {% assign hlepor_envi_best = 0 %}
+ {% assign hlepor_vien_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m]["BLEU envi"] and dataset[1][m]["BLEU envi"] > bleu_envi_best %}
+ {% assign bleu_envi_best = dataset[1][m]["BLEU envi"] %}
+ {% endif %}
+ {% if dataset[1][m]["BLEU vien"] and dataset[1][m]["BLEU vien"] > bleu_vien_best %}
+ {% assign bleu_vien_best = dataset[1][m]["BLEU vien"] %}
+ {% endif %}
+ {% if dataset[1][m]["hLEPOR envi"] and dataset[1][m]["hLEPOR envi"] > hlepor_envi_best %}
+ {% assign hlepor_envi_best = dataset[1][m]["hLEPOR envi"] %}
+ {% endif %}
+ {% if dataset[1][m]["hLEPOR vien"] and dataset[1][m]["hLEPOR vien"] > hlepor_vien_best %}
+ {% assign hlepor_vien_best = dataset[1][m]["hLEPOR vien"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model]["BLEU envi"] %}
+ {{ dataset[1][model]["BLEU envi"] | round: 2 }} ± {{ dataset[1][model]["BLEU envi_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["BLEU vien"] %}
+ {{ dataset[1][model]["BLEU vien"] | round: 2 }} ± {{ dataset[1][model]["BLEU vien_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["hLEPOR envi"] %}
+ {{ dataset[1][model]["hLEPOR envi"] | round: 2 }} ± {{ dataset[1][model]["hLEPOR envi_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["hLEPOR vien"] %}
+ {{ dataset[1][model]["hLEPOR vien"] | round: 2 }} ± {{ dataset[1][model]["hLEPOR vien_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/weaker-prompt/question-answering.md b/_pages/vi/weaker-prompt/question-answering.md
index 6355ddf..25a17aa 100644
--- a/_pages/vi/weaker-prompt/question-answering.md
+++ b/_pages/vi/weaker-prompt/question-answering.md
@@ -3,63 +3,60 @@ layout: default
permalink: /leaderboard/vi/weaker-prompt/question-answering
---
# Weak-Prompt Question Answering Leaderboard
+{% assign lang = 'vi' %}
-
-
- Models
- XQuAD
- MLQA
-
-
- EM↑
- F1↑
- EM↑
- F1↑
-
-
-
-
- URA-LLaMa 70B
- 0.21 ± 0.01
- 0.47 ± 0.01
- 0.14 ± 0.01
- 0.41 ± 0.00
-
-
- URA-LLaMa 13B
- 0.22 ± 0.01
- 0.43 ± 0.01
- 0.17 ± 0.01
- 0.40 ± 0.01
-
-
- URA-LLaMa 7B
- 0.13 ± 0.00
- 0.32 ± 0.00
- 0.10 ± 0.00
- 0.32 ± 0.00
-
-
- LLaMa-2 13B
- 0.04 ± 0.00
- 0.28 ± 0.00
- 0.04 ± 0.00
- 0.28 ± 0.00
-
-
- LLaMa-2 7B
- 0.06 ± 0.00
- 0.24 ± 0.00
- 0.05 ± 0.00
- 0.24 ± 0.00
-
-
- MixSUra 8x7b
- 0.13 ±-
- 0.38 ± -
- 0.09 ± -
- 0.36 ± -
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].weaker_prompt.question_answering %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].weaker_prompt.question_answering %}
+ EM↑
+ F1↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].weaker_prompt.question_answering %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/weaker-prompt/summarization.md b/_pages/vi/weaker-prompt/summarization.md
index 2f6774c..2c1d732 100644
--- a/_pages/vi/weaker-prompt/summarization.md
+++ b/_pages/vi/weaker-prompt/summarization.md
@@ -3,147 +3,132 @@ layout: default
permalink: /leaderboard/vi/weaker-prompt/summarization
---
# Weak-Prompt Summarization Leaderboard
+{% assign lang = 'vi' %}
-
-
-Models
-VietNews
-WikiLingua
-
-
-R1↑
-R2↑
-RL↑
-SC↑
-BS↑
-Cv↑
-De↑
-Cp↑
-R1↑
-R2↑
-RL↑
-SC↑
-BS↑
-Cv↑
-De↑
-Cp↑
-
-
-
-
-URA-LLaMa 70B
-0.49 ± 0.00
-0.23 ± 0.00
-0.31 ± 0.00
--0.08 ± 0.00
-0.05 ± 0.11
-0.89 ± 0.00
-8.90 ± 0.03
-18.48 ± 0.59
-0.47 ± 0.00
-0.20 ± 0.00
-0.29 ± 0.00
--0.16 ± 0.00
-0.19 ± 0.13
-0.86 ± 0.00
-6.83 ± 0.09
-25.30 ± 1.86
-
-
-URA-LLaMa 13B
-0.27 ± 0.00
-0.12 ± 0.00
-0.18 ± 0.00
--0.09 ± 0.00
-0.05 ± 0.11
-0.56 ± 0.00
-5.00 ± 0.04
-153.55 ± 0.99
-0.22 ± 0.00
-0.09 ± 0.00
-0.14 ± 0.00
--0.16 ± 0.00
-0.20 ± 0.007
-0.48 ± 0.00
-3.49 ± 0.04
-190.09 ± 4.92
-
-
-URA-LLaMa 7B
-0.45 ± 0.00
-0.21 ± 0.00
-0.29 ± 0.00
--0.08 ± 0.00
-0.03 ± 0.09
-0.91 ± 0.00
-9.43 ± 0.03
-6.42 ± 0.05
-0.42 ± 0.00
-0.18 ± 0.00
-0.27 ± 0.00
--0.16 ± 0.00
-0.07 ± 0.12
-0.89 ± 0.00
-7.58 ± 0.05
-7.14 ± 0.14
-
-
-LLaMa-2 13B
-0.45 ± 0.00
-0.22 ± 0.00
-0.29 ± 0.00
--0.09 ± 0.00
-0.00 ± 0.14
-0.92 ± 0.00
-9.49 ± 0.02
-8.46 ± 0.29
-0.47 ± 0.00
-0.22 ± 0.00
-0.29 ± 0.00
--0.16 ± 0.00
-0.34 ± 0.12
-0.92 ± 0.00
-9.39 ± 0.05
-17.94 ± 2.84
-
-
-LLaMa-2 7B
-0.36 ± 0.00
-0.17 ± 0.00
-0.23 ± 0.00
--0.09 ± 0.00
--0.15 ± 0.12
-0.69 ± 0.00
-6.35 ± 0.03
-7.59 ± 0.21
-0.45 ± 0.00
-0.20 ± 0.00
-0.27 ± 0.00
--0.16 ± 0.00
-0.36 ± 0.00
-0.83 ± 0.00
-7.71 ± 0.07
-12.39 ± 1.46
-
-
-MixSUra 8x7B
-0.44 ± -
-0.22 ± -
-0.29 ± -
-- ± -
-0.07 ± -
-0.97 ± -
-35.67 ± -
-9.43 ± -
-0.47 ± -
-0.22 ± -
-0.29 ± -
-- ± -
-0.19 ± -
-0.97 ± -
-28.97 ± -
-10.27 ± -
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].weaker_prompt.summarization %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].weaker_prompt.summarization %}
+ R1↑
+ R2↑
+ RL↑
+ SC↑
+ BS↑
+ Cv↑
+ De↑
+ Cp↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].weaker_prompt.summarization %}
+ {% assign R1_best = 0 %}
+ {% assign R2_best = 0 %}
+ {% assign RL_best = 0 %}
+ {% assign SC_best = -1 %}
+ {% assign BS_best = 0 %}
+ {% assign Cv_best = 0 %}
+ {% assign De_best = 0 %}
+ {% assign Cp_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].R1 and dataset[1][m].R1 > R1_best %}
+ {% assign R1_best = dataset[1][m].R1 %}
+ {% endif %}
+ {% if dataset[1][m].R2 and dataset[1][m].R2 > R2_best %}
+ {% assign R2_best = dataset[1][m].R2 %}
+ {% endif %}
+ {% if dataset[1][m].RL and dataset[1][m].RL > RL_best %}
+ {% assign RL_best = dataset[1][m].RL %}
+ {% endif %}
+ {% if dataset[1][m].SC and dataset[1][m].SC > SC_best %}
+ {% assign SC_best = dataset[1][m].SC %}
+ {% endif %}
+ {% if dataset[1][m].BS and dataset[1][m].BS > BS_best %}
+ {% assign BS_best = dataset[1][m].BS %}
+ {% endif %}
+ {% if dataset[1][m].Cv and dataset[1][m].Cv > Cv_best %}
+ {% assign Cv_best = dataset[1][m].Cv %}
+ {% endif %}
+ {% if dataset[1][m].De and dataset[1][m].De > De_best %}
+ {% assign De_best = dataset[1][m].De %}
+ {% endif %}
+ {% if dataset[1][m].Cp and dataset[1][m].Cp > Cp_best %}
+ {% assign Cp_best = dataset[1][m].Cp %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].R1 %}
+ {{ dataset[1][model].R1 | round: 2 }} ± {{ dataset[1][model].R1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].R2 %}
+ {{ dataset[1][model].R2 | round: 2 }} ± {{ dataset[1][model].R2_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].RL %}
+ {{ dataset[1][model].RL | round: 2 }} ± {{ dataset[1][model].RL_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SC %}
+ {{ dataset[1][model].SC | round: 2 }} ± {{ dataset[1][model].SC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].BS %}
+ {{ dataset[1][model].BS | round: 2 }} ± {{ dataset[1][model].BS_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cv %}
+ {{ dataset[1][model].Cv | round: 2 }} ± {{ dataset[1][model].Cv_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].De %}
+ {{ dataset[1][model].De | round: 2 }} ± {{ dataset[1][model].De_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cp %}
+ {{ dataset[1][model].Cp | round: 2 }} ± {{ dataset[1][model].Cp_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/zero-shot/information-retrieval.md b/_pages/vi/zero-shot/information-retrieval.md
index c844a8f..6768e9d 100644
--- a/_pages/vi/zero-shot/information-retrieval.md
+++ b/_pages/vi/zero-shot/information-retrieval.md
@@ -3,113 +3,84 @@ layout: default
permalink: /leaderboard/vi/zero-shot/information-retrieval
---
# Zero-Shot Information Retrieval Leaderboard
+{% assign lang = 'vi' %}
-
-
- Models
- mMARCO
- mRobust04
-
-
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
- M@10↑
- M@10B↑
- N@10↑
- N@10B↑
-
-
-
-
- URA-LLaMa 70B
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- URA-LLaMa 13B
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- URA-LLaMa 7B
- 0.06 ± 0.00
- 0.14 ± 0.00
- 0.09 ± 0.00
- 0.21 ± 0.00
- -
- -
- -
- -
-
-
- LLaMa-2 13B
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- LLaMa-2 7B
- 0.06 ± 0.00
- 0.11 ± 0.00
- 0.08 ± 0.00
- 0.17 ± 0.00
- -
- -
- -
- -
-
-
- Vietcuna 7B
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- GPT-3.5
- -
- -
- -
- -
- -
- -
- -
- -
-
-
- GPT-4
- -
- -
- -
- -
- -
- -
- -
- -
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.information_retrieval %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.information_retrieval %}
+ M@10↑
+ M@10B↑
+ N@10↑
+ N@10B↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.information_retrieval %}
+ {% assign M10_best = 0 %}
+ {% assign M10B_best = 0 %}
+ {% assign N10_best = 0 %}
+ {% assign N10B_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m]["M@10"] and dataset[1][m]["M@10"] > M10_best %}
+ {% assign M10_best = dataset[1][m]["M@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["M@10B"] and dataset[1][m]["M@10B"] > M10B_best %}
+ {% assign M10B_best = dataset[1][m]["M@10B"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10"] and dataset[1][m]["N@10"] > N10_best %}
+ {% assign N10_best = dataset[1][m]["N@10"] %}
+ {% endif %}
+ {% if dataset[1][m]["N@10B"] and dataset[1][m]["N@10B"] > N10B_best %}
+ {% assign N10B_best = dataset[1][m]["N@10B"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model]["M@10"] %}
+ {{ dataset[1][model]["M@10"] | round: 2 }} ± {{ dataset[1][model]["M@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["M@10B"] %}
+ {{ dataset[1][model]["M@10B"] | round: 2 }} ± {{ dataset[1][model]["M@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10"] %}
+ {{ dataset[1][model]["N@10"] | round: 2 }} ± {{ dataset[1][model]["N@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["N@10B"] %}
+ {{ dataset[1][model]["N@10B"] | round: 2 }} ± {{ dataset[1][model]["N@10B_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/zero-shot/knowledge.md b/_pages/vi/zero-shot/knowledge.md
index 04ce434..63dd085 100644
--- a/_pages/vi/zero-shot/knowledge.md
+++ b/_pages/vi/zero-shot/knowledge.md
@@ -2,105 +2,129 @@
layout: default
permalink: /leaderboard/vi/zero-shot/knowledge
---
-# Zero-Shot Knowledge Leaderboard
+# Zero-shot Knowledge Leaderboard
+{% assign lang = 'vi' %}
-
-
-Models
-ZaloE2E
-ViMMRC
-
-
-EM↑
-F1↑
-AC↑
-F1↑
-AR↑
-ECE↓
-A@10↑
-
-
-
-
-URA-LLaMa 70B
-0.28 ± 0.02
-0.44 ± 0.02
-0.80 ± 0.02
-0.80 ± 0.02
-0.85 ± 0.01
-0.10 ± 0.02
-0.96 ± 0.03
-
-
-URA-LLaMa 13B
-0.12 ± 0.01
-0.22 ± 0.01
-0.40 ± 0.02
-0.31 ± 0.02
-0.57 ± 0.02
-0.48 ± 0.02
-0.42 ± 0.08
-
-
-URA-LLaMa 7B
-0.09 ± 0.01
-0.20 ± 0.02
-0.30 ± 0.02
-0.10 ± 0.01
-0.56 ± 0.02
-0.27 ± 0.02
-0.56 ± 0.07
-
-
-LLaMa-2 13B
-0.06 ± 0.01
-0.10 ± 0.01
-0.52 ± 0.02
-0.41 ± 0.02
-0.64 ± 0.02
-0.33 ± 0.02
-0.73 ± 0.07
-
-
-LLaMa-2 7B
-0.03 ± 0.01
-0.07 ± 0.01
-0.37 ± 0.02
-0.25 ± 0.02
-0.51 ± 0.02
-0.35 ± 0.02
-0.29 ± 0.06
-
-
-Vietcuna 7B
-0.03 ± 0.01
-0.06 ± 0.01
-0.32 ± 0.02
-0.22 ± 0.02
-0.50 ± 0.00
-0.07 ± 0.02
-0.33 ± 0.07
-
-
-GPT-3.5
-0.37 ± 0.02
-0.56 ± 0.02
-0.90 ± 0.01
-0.72 ± 0.01
--
-0.65 ± 0.01
-0.90 ± 0.04
-
-
-GPT-4
-0.38 ± 0.02
-0.55 ± 0.02
-0.92 ± 0.01
-0.73 ± 0.06
--
-0.67 ± 0.01
-0.90 ± 0.04
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.knowledge %}
+ {% if dataset[1].num_fields == 2 %}
+
+ {{ dataset[0] }}
+
+ {% else %}
+
+ {{ dataset[0] }}
+
+ {% endif %}
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.knowledge %}
+ {% if dataset[1].num_fields == 2 %}
+ EM↑
+ F1↑
+ {% else %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endif %}
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.knowledge %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AC_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+ {% if dataset[1].num_fields == 2 %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% else %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endif %}
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/zero-shot/language-modeling.md b/_pages/vi/zero-shot/language-modeling.md
index 20f125a..232093f 100644
--- a/_pages/vi/zero-shot/language-modeling.md
+++ b/_pages/vi/zero-shot/language-modeling.md
@@ -3,149 +3,108 @@ layout: default
permalink: /leaderboard/vi/zero-shot/language-modeling
---
# Zero-Shot Language Modeling Leaderboard
+{% assign lang = 'vi' %}
-
-
- Models
- MLQA-MLM
- VSEC
-
-
- EM↑
- CER↓
- WER↓
- CED↓
- WED↓
- PLX↓
- EM↑
- CER↓
- WER↓
- CED↓
- WED↓
- PLX↓
-
-
-
-
- URA-LLaMa 70B
- 0.00 ± 0.00
- 0.50 ± 0.01
- 0.64 ± 0.01
- 519.09 ± 10.96
- 115.82 ± 2.45
- 1.08 ± 0.01
- 0.00 ± 0.00
- 0.88 ± 0.00
- 1.01 ± 0.00
- 113.51 ± 0.57
- 29.91 ± 0.15
- 1.09 ± 0.00
-
-
- URA-LLaMa 13B
- 0.00 ± 0.00
- 0.67 ± 0.00
- 0.78 ± 0.00
- 697.85 ± 11.62
- 161.34 ± 2.64
- 1.16 ± 0.02
- 0.01 ± 0.00
- 0.42 ± 0.01
- 0.56 ± 0.01
- 54.88 ± 0.77
- 14.50 ± 0.19
- 1.26 ± 0.00
-
-
- URA-LLaMa 7B
- 0.00 ± 0.00
- 0.73 ± 0.00
- 0.88 ± 0.01
- 684.00 ± 13.18
- 166.87 ± 3.18
- 1.25 ± 0.01
- 0.01 ± 0.00
- 3.33 ± 0.04
- 3.14 ± 0.03
- 420.34 ± 5.66
- 85.79 ± 0.96
- 1.33 ± 0.00
-
-
- LLaMa-2 13B
- 0.00 ± 0.00
- 0.90 ± 0.00
- 1.00 ± 0.00
- 881.97 ± 11.23
- 208.52 ± 2.52
- 1.10 ± 0.01
- 0.00 ± 0.00
- 1.32 ± 0.01
- 1.40 ± 0.01
- 160.06 ± 1.16
- 38.12 ± 0.23
- 1.11 ± 0.00
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.95 ± 0.00
- 1.07 ± 0.01
- 860.42 ± 13.18
- 210.21 ± 3.18
- 1.25 ± 0.01
- 0.00 ± 0.00
- 1.54 ± 0.04
- 1.55 ± 0.03
- 171.28 ± 5.66
- 40.18 ± 0.96
- 1.14 ± 0.00
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 1.00 ± 0.00
- 1.00 ± 0.00
- 951.53 ± 12.37
- 208.57 ± 2.73
- 1.48 ± 0.01
- 0.01 ± 0.00
- 1.11 ± 0.01
- 1.20 ± 0.01
- 139.90 ± 1.39
- 33.94 ± 0.33
- 1.61 ± 0.00
-
-
- GPT-3.5
- 0.00 ± 0.00
- 0.34 ± 0.01
- 0.50 ± 0.01
- 422.30 ± 10.79
- 100.33 ± 2.44
- -
- 0.02 ± 0.00
- 0.16 ± 0.00
- 0.30 ± 0.00
- 12.63 ± 0.34
- 3.48 ± 0.09
- -
-
-
- GPT-4
- 0.04 ± 0.00
- 0.40 ± 0.01
- 0.45 ± 0.01
- 381.88 ± 10.26
- 93.34 ± 2.39
- -
- 0.60 ± 0.01
- 0.14 ± 0.00
- 0.26 ± 0.00
- 13.58 ± 0.45
- 3.67 ± 0.12
- -
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.language_modeling %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.language_modeling %}
+ EM↑
+ CER↓
+ WER↓
+ CED↓
+ WED↓
+ PLX↓
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.language_modeling %}
+ {% assign EM_best = 0 %}
+ {% assign CER_best = 1 %}
+ {% assign WER_best = 1 %}
+ {% assign CED_best = 10000 %}
+ {% assign WED_best = 10000 %}
+ {% assign PLX_best = 10000 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].CER and dataset[1][m].CER < CER_best %}
+ {% assign CER_best = dataset[1][m].CER %}
+ {% endif %}
+ {% if dataset[1][m].WER and dataset[1][m].WER < WER_best %}
+ {% assign WER_best = dataset[1][m].WER %}
+ {% endif %}
+ {% if dataset[1][m].CED and dataset[1][m].CED < CED_best %}
+ {% assign CED_best = dataset[1][m].CED %}
+ {% endif %}
+ {% if dataset[1][m].WED and dataset[1][m].WED < WED_best %}
+ {% assign WED_best = dataset[1][m].WED %}
+ {% endif %}
+ {% if dataset[1][m].PLX and dataset[1][m].PLX < PLX_best %}
+ {% assign PLX_best = dataset[1][m].PLX %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].CER %}
+ {{ dataset[1][model].CER | round: 2 }} ± {{ dataset[1][model].CER_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].WER %}
+ {{ dataset[1][model].WER | round: 2 }} ± {{ dataset[1][model].WER_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].CED %}
+ {{ dataset[1][model].CED | round: 2 }} ± {{ dataset[1][model].CED_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].WED %}
+ {{ dataset[1][model].WED | round: 2 }} ± {{ dataset[1][model].WED_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].PLX %}
+ {{ dataset[1][model].PLX | round: 2 }} ± {{ dataset[1][model].PLX_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/zero-shot/question-answering.md b/_pages/vi/zero-shot/question-answering.md
index 2b0501d..a50c437 100644
--- a/_pages/vi/zero-shot/question-answering.md
+++ b/_pages/vi/zero-shot/question-answering.md
@@ -3,77 +3,60 @@ layout: default
permalink: /leaderboard/vi/zero-shot/question-answering
---
# Zero-Shot Question Answering Leaderboard
+{% assign lang = 'vi' %}
-
-
- Models
- XQuAD
- MLQA
-
-
- EM↑
- F1↑
- EM↑
- F1↑
-
-
-
-
- URA-LLaMa 70B
- 0.06 ± 0.00
- 0.30 ± 0.00
- 0.04 ± 0.00
- 0.28 ± 0.00
-
-
- URA-LLaMa 13B
- 0.00 ± 0.00
- 0.14 ± 0.00
- 0.00 ± 0.00
- 0.15 ± 0.00
-
-
- URA-LLaMa 7B
- 0.00 ± 0.00
- 0.14 ± 0.00
- 0.00 ± 0.00
- 0.16 ± 0.00
-
-
- LLaMa-2 13B
- 0.00 ± 0.00
- 0.04 ± 0.00
- 0.00 ± 0.02
- 0.05 ± 0.00
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.05 ± 0.00
- 0.00 ± 0.00
- 0.06 ± 0.00
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
-
-
- GPT-3.5
- 0.00 ± 0.00
- 0.24 ± 0.00
- 0.00 ± 0.00
- 0.25 ± 0.00
-
-
- GPT-4
- 0.00 ± 0.00
- 0.27 ± 0.00
- 0.00 ± 0.00
- 0.27 ± 0.00
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.question_answering %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.question_answering %}
+ EM↑
+ F1↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.question_answering %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/zero-shot/reasoning.md b/_pages/vi/zero-shot/reasoning.md
index 627b97a..7fb07b7 100644
--- a/_pages/vi/zero-shot/reasoning.md
+++ b/_pages/vi/zero-shot/reasoning.md
@@ -3,123 +3,72 @@ layout: default
permalink: /leaderboard/vi/zero-shot/reasoning
---
# Zero-Shot Reasoning Leaderboard
+{% assign lang = 'vi' %}
- Models
- SR - Natural
- SR - Abstract symbol
- MATH
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.reasoning %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- EM↑
- F1↑
- Equ.↑
- EM↑
- F1↑
- Equ.↑
- EM↑
- F1↑
- Equ.↑
+ {% for dataset in site.data.leaderboard[lang].zero_shot.reasoning %}
+ EM↑
+ F1↑
+ Equ↑
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.06 ± 0.00
- 0.34 ± 0.00
- 0.06 ± 0.00
- 0.02 ± 0.00
- 0.24 ± 0.00
- 0.01 ± 0.00
- 0.00 ± 0.00
- 0.01 ± 0.00
- 0.24 ± 0.02
-
-
- URA-LLaMa 13B
- 0.01 ± 0.00
- 0.31 ± 0.00
- 0.02 ± 0.00
- 0.02 ± 0.00
- 0.24 ± 0.00
- 0.01 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.14 ± 0.02
-
-
- URA-LLaMa 7B
- 0.00 ± 0.00
- 0.26 ± 0.00
- 0.00 ± 0.00
- 0.01 ± 0.00
- 0.17 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.05 ± 0.01
-
-
- LLaMa-2 13B
- 0.00 ± 0.00
- 0.06 ± 0.00
- 0.00 ± 0.00
- 0.02 ± 0.00
- 0.19 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.16 ± 0.02
-
-
- LLaMa-2 7B
- 0.00 ± 0.00
- 0.04 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.05 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.06 ± 0.01
-
-
- Vietcuna 7B
- 0.00 ± 0.00
- 0.04 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.10 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.00 ± 0.00
- 0.01 ± 0.00
-
-
- GPT-3.5
- 0.21 ± 0.00
- 0.59 ± 0.00
- 0.32 ± 0.00
- 0.09 ± 0.00
- 0.28 ± 0.00
- 0.13 ± 0.00
- 0.00 ± 0.00
- 0.01 ± 0.00
- 0.72 ± 0.02
-
-
- GPT-4
- 0.21 ± 0.00
- 0.59 ± 0.00
- 0.32 ± 0.00
- 0.09 ± 0.00
- 0.28 ± 0.00
- 0.13 ± 0.00
- 0.00 ± 0.00
- 0.01 ± 0.00
- 0.76 ± 0.02
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.reasoning %}
+ {% assign EM_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign Equ_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].EM and dataset[1][m].EM > EM_best %}
+ {% assign EM_best = dataset[1][m].EM %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m]["Equ"] and dataset[1][m]["Equ"] > Equ_best %}
+ {% assign Equ_best = dataset[1][m]["Equ"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].EM %}
+ {{ dataset[1][model].EM | round: 2 }} ± {{ dataset[1][model].EM_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["Equ"] %}
+ {{ dataset[1][model]["Equ"] | round: 2 }} ± {{ dataset[1][model]["Equ_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/vi/zero-shot/sentiment-analysis.md b/_pages/vi/zero-shot/sentiment-analysis.md
index 58202ae..91f4074 100644
--- a/_pages/vi/zero-shot/sentiment-analysis.md
+++ b/_pages/vi/zero-shot/sentiment-analysis.md
@@ -3,157 +3,96 @@ layout: default
permalink: /leaderboard/vi/zero-shot/sentiment-analysis
---
# Zero-Shot Sentiment Analysis Leaderboard
+{% assign lang = 'vi' %}
- Models
- VLSP 2016
- UiT-VSFC
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.sentiment_analysis %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
+ {% for dataset in site.data.leaderboard[lang].zero_shot.sentiment_analysis %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.63 ± 0.02
- 0.63 ± 0.02
- 0.74 ± 0.01
- 0.15 ± 0.01
- 0.87 ± 0.03
- 0.64 ± 0.01
- 0.54 ± 0.01
- 0.85 ± 0.01
- 0.14 ± 0.00
- 0.98 ± 0.01
-
-
- URA-LLaMa 13B
- 0.52 ± 0.02
- 0.35 ± 0.01
- 0.60 ± 0.01
- 0.10 ± 0.01
- 0.64 ± 0.05
- 0.70 ± 0.01
- 0.40 ± 0.01
- 0.72 ± 0.01
- 0.23 ± 0.01
- 0.95 ± 0.01
-
-
- URA-LLaMa 7B
- 0.35 ± 0.02
- 0.24 ± 0.01
- 0.54 ± 0.01
- 0.24 ± 0.01
- 0.31 ± 0.05
- 0.27 ± 0.01
- 0.18 ± 0.00
- 0.52 ± 0.01
- 0.37 ± 0.01
- 0.03 ± 0.01
-
-
- LLaMa-2 13B
- 0.25 ± 0.01
- 0.25 ± 0.01
- 0.49 ± 0.01
- 0.39 ± 0.01
- 0.29 ± 0.05
- 0.29 ± 0.01
- 0.24 ± 0.01
- 0.52 ± 0.01
- 0.42 ± 0.01
- 0.30 ± 0.03
-
-
- LLaMa-2 7B
- 0.15 ± 0.01
- 0.15 ± 0.01
- 0.58 ± 0.01
- 0.73 ± 0.01
- 0.12 ± 0.03
- 0.04 ± 0.00
- 0.06 ± 0.01
- 0.49 ± 0.01
- 0.79 ± 0.00
- 0.01 ± 0.01
-
-
- Vietcuna 7B
- 0.11 ± 0.01
- 0.12 ± 0.01
- 0.49 ± 0.01
- 0.68 ± 0.01
- 0.11 ± 0.03
- 0.05 ± 0.00
- 0.06 ± 0.00
- 0.56 ± 0.01
- 0.73 ± 0.00
- 0.05 ± 0.01
-
-
- MixSUra 8x7B
- 0.45 ± -
- 0.30 ± -
- 0.62 ± -
- 0.50 ± -
- 0.49 ± -
- 0.55 ± -
- 0.40 ± -
- 0.66 ± -
- 0.41 ± -
- 0.60 ± -
-
-
- Gemini Pro
- 0.64 ± -
- 0.47 ± -
- -
- 0.31 ± -
- 0.53 ± -
- 0.76 ± -
- 0.49 ± -
- -
- 0.43 ± -
- 0.77 ± -
-
-
- GPT-3.5
- 0.62 ± 0.02
- 0.56 ± 0.01
- -
- 0.29 ± 0.02
- 0.62 ± 0.05
- 0.81 ± 0.31
- 0.68 ± 0.31
- -
- 0.48 ± 0.01
- 0.83 ± 0.02
-
-
- GPT-4
- 0.71 ± 0.01
- 0.68 ± 0.01
- -
- 0.37 ± 0.01
- 0.70 ± 0.04
- 0.80 ± 0.01
- 0.67 ± 0.01
- -
- 0.47 ± 0.01
- 0.85 ± 0.02
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.sentiment_analysis %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_pages/vi/zero-shot/summarization.md b/_pages/vi/zero-shot/summarization.md
index 3226c1f..c5f1a47 100644
--- a/_pages/vi/zero-shot/summarization.md
+++ b/_pages/vi/zero-shot/summarization.md
@@ -3,185 +3,132 @@ layout: default
permalink: /leaderboard/vi/zero-shot/summarization
---
# Zero-Shot Summarization Leaderboard
+{% assign lang = 'vi' %}
-
-
-Models
-VietNews
-WikiLingua
-
-
-R1↑
-R2↑
-RL↑
-SC↑
-BS↑
-Cv↑
-De↑
-Cp↑
-R1↑
-R2↑
-RL↑
-SC↑
-BS↑
-Cv↑
-De↑
-Cp↑
-
-
-
-
-URA-LLaMa 70B
-0.42 ± 0.17
-0.21 ± 0.12
-0.28 ± 0.00
--0.11 ± 0.00
-0.03 ± 0.19
-0.85 ± 0.00
-14.59 ± 0.05
-17.21 ± 0.33
-0.37 ± 0.00
-0.16 ± 0.00
-0.24 ± 0.00
--0.22 ± 0.00
-0.26 ± 0.16
-0.17 ± 0.00
-0.22 ± 0.00
-22.24 ± 0.97
-
-
-URA-LLaMa 13B
-0.38 ± 0.00
-0.18 ± 0.00
-0.25 ± 0.00
--0.09 ± 0.00
-0.01 ± 0.18
-0.71 ± 0.00
-6.01 ± 0.07
-24.27 ± 0.61
-0.22 ± 0.00
-0.08 ± 0.00
-0.14 ± 0.00
--0.16 ± 0.00
--0.13 ± 0.12
-0.42 ± 0.01
-3.06 ± 0.10
-49.58 ± 1.16
-
-
-URA-LLaMa 7B
-0.38 ± 0.00
-0.14 ± 0.00
-0.25 ± 0.00
--0.09 ± 0.00
-0.04 ± 0.12
-0.65 ± 0.00
-4.88 ± 0.03
-7.77 ± 0.05
-0.40 ± 0.00
-0.15 ± 0.00
-0.26 ± 0.00
--0.16 ± 0.00
-0.19 ± 0.07
-0.73 ± 0.00
-4.79 ± 0.07
-6.22 ± 0.07
-
-
-LLaMa-2 13B
-0.06 ± 0.00
-0.02 ± 0.00
-0.04 ± 0.00
--0.09 ± 0.00
--0.18 ± 0.04
-0.07 ± 0.00
-0.43 ± 0.01
-28.25 ± 0.24
-0.04 ± 0.00
-0.00 ± 0.00
-0.03 ± 0.00
--0.16 ± 0.00
--0.11 ± 0.08
-0.03 ± 0.00
-0.07 ± 0.01
-19.55 ± 0.51
-
-
-LLaMa-2 7B
-0.06 ± 0.00
-0.01 ± 0.00
-0.05 ± 0.00
--0.09 ± 0.00
--0.23 ± 0.04
-0.06 ± 0.00
-0.21 ± 0.00
-15.75 ± 0.20
-0.04 ± 0.00
-0.00 ± 0.00
-0.03 ± 0.00
--0.16 ± 0.00
--0.14 ± 0.07
-0.03 ± 0.00
-0.06 ± 0.00
-17.84 ± 0.50
-
-
-Vietcuna 7B
-0.28 ± 0.00
-0.06 ± 0.00
-0.18 ± 0.00
--0.09 ± 0.00
--0.09 ± 0.09
-0.31 ± 0.00
-0.80 ± 0.01
-171.63 ± 1.71
-0.24 ± 0.00
-0.06 ± 0.00
-0.15 ± 0.00
--0.16 ± 0.00
--0.18 ± 0.07
-0.51 ± 0.01
-1.16 ± 0.01
-238.67 ± 3.37
-
-
-GPT-3.5
-0.36 ± 0.00
-0.20 ± 0.00
-0.24 ± 0.00
--0.09 ± 0.00
-0.04 ± 0.13
-0.86 ± 0.00
-3.97 ± 0.02
-13.32 ± 0.65
-0.43 ± 0.00
-0.21 ± 0.00
-0.27 ± 0.00
--0.16 ± 0.00
-0.22 ± 0.03
-0.87 ± 0.00
-3.29 ± 0.03
-35.50 ± 0.82
-
-
-GPT-4
-0.41 ± 0.00
-0.21 ± 0.00
-0.26 ± 0.00
--0.08 ± 0.00
--0.04 ± 0.11
-0.84 ± 0.00
-3.45 ± 0.00
-15.43 ± 0.49
-0.44 ± 0.00
-0.21 ± 0.00
-0.27 ± 0.00
--0.16 ± 0.00
-0.24 ± 0.04
-0.82 ± 0.00
-2.37 ± 0.01
-6.61 ± 0.16
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.summarization %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.summarization %}
+ R1↑
+ R2↑
+ RL↑
+ SC↑
+ BS↑
+ Cv↑
+ De↑
+ Cp↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.summarization %}
+ {% assign R1_best = 0 %}
+ {% assign R2_best = 0 %}
+ {% assign RL_best = 0 %}
+ {% assign SC_best = -1 %}
+ {% assign BS_best = 0 %}
+ {% assign Cv_best = 0 %}
+ {% assign De_best = 0 %}
+ {% assign Cp_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].R1 and dataset[1][m].R1 > R1_best %}
+ {% assign R1_best = dataset[1][m].R1 %}
+ {% endif %}
+ {% if dataset[1][m].R2 and dataset[1][m].R2 > R2_best %}
+ {% assign R2_best = dataset[1][m].R2 %}
+ {% endif %}
+ {% if dataset[1][m].RL and dataset[1][m].RL > RL_best %}
+ {% assign RL_best = dataset[1][m].RL %}
+ {% endif %}
+ {% if dataset[1][m].SC and dataset[1][m].SC > SC_best %}
+ {% assign SC_best = dataset[1][m].SC %}
+ {% endif %}
+ {% if dataset[1][m].BS and dataset[1][m].BS > BS_best %}
+ {% assign BS_best = dataset[1][m].BS %}
+ {% endif %}
+ {% if dataset[1][m].Cv and dataset[1][m].Cv > Cv_best %}
+ {% assign Cv_best = dataset[1][m].Cv %}
+ {% endif %}
+ {% if dataset[1][m].De and dataset[1][m].De > De_best %}
+ {% assign De_best = dataset[1][m].De %}
+ {% endif %}
+ {% if dataset[1][m].Cp and dataset[1][m].Cp > Cp_best %}
+ {% assign Cp_best = dataset[1][m].Cp %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].R1 %}
+ {{ dataset[1][model].R1 | round: 2 }} ± {{ dataset[1][model].R1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].R2 %}
+ {{ dataset[1][model].R2 | round: 2 }} ± {{ dataset[1][model].R2_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].RL %}
+ {{ dataset[1][model].RL | round: 2 }} ± {{ dataset[1][model].RL_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].SC %}
+ {{ dataset[1][model].SC | round: 2 }} ± {{ dataset[1][model].SC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].BS %}
+ {{ dataset[1][model].BS | round: 2 }} ± {{ dataset[1][model].BS_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cv %}
+ {{ dataset[1][model].Cv | round: 2 }} ± {{ dataset[1][model].Cv_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].De %}
+ {{ dataset[1][model].De | round: 2 }} ± {{ dataset[1][model].De_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].Cp %}
+ {{ dataset[1][model].Cp | round: 2 }} ± {{ dataset[1][model].Cp_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/zero-shot/text-classification.md b/_pages/vi/zero-shot/text-classification.md
index 3e62b94..5ee8d17 100644
--- a/_pages/vi/zero-shot/text-classification.md
+++ b/_pages/vi/zero-shot/text-classification.md
@@ -3,131 +3,96 @@ layout: default
permalink: /leaderboard/vi/zero-shot/text-classification
---
# Zero-Shot Text Classification Leaderboard
+{% assign lang = 'vi' %}
-
-
- Models
- UiT-VSMEC
- PhoATIS
-
-
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
-
-
-
-
- URA-LLaMa 70B
- 0.40 ± 0.02
- 0.32 ± 0.02
- 0.68 ± 0.01
- 0.14 ± 0.02
- 0.60 ± 0.06
- 0.56 ± 0.02
- 0.48 ± 0.03
- 0.85 ± 0.00
- 0.25 ± 0.02
- 0.56 ± 0.06
-
-
- URA-LLaMa 13B
- 0.29 ± 0.02
- 0.25 ± 0.02
- 0.52 ± 0.01
- 0.09 ± 0.01
- 0.23 ± 0.05
- 0.10 ± 0.01
- 0.10 ± 0.01
- 0.72 ± 0.00
- 0.52 ± 0.01
- 0.14 ± 0.04
-
-
- URA-LLaMa 7B
- 0.13 ± 0.01
- 0.11 ± 0.01
- 0.50 ± 0.01
- 0.15 ± 0.01
- 0.21 ± 0.05
- 0.04 ± 0.01
- 0.04 ± 0.02
- 0.77 ± 0.00
- 0.30 ± 0.01
- 0.04 ± 0.02
-
-
- LLaMa-2 13B
- 0.11 ± 0.01
- 0.10 ± 0.01
- 0.49 ± 0.01
- 0.31 ± 0.01
- 0.09 ± 0.04
- 0.03 ± 0.01
- 0.02 ± 0.00
- 0.45 ± 0.01
- 0.28 ± 0.01
- 0.03 ± 0.02
-
-
- LLaMa-2 7B
- 0.07 ± 0.01
- 0.08 ± 0.01
- 0.52 ± 0.01
- 0.35 ± 0.01
- 0.07 ± 0.03
- 0.00 ± 0.06
- 0.00 ± 0.06
- 0.61 ± 0.01
- 0.32 ± 0.00
- 0.00 ± 0.00
-
-
- Vietcuna 7B
- 0.05 ± 0.01
- 0.02 ± 0.01
- 0.52 ± 0.01
- 0.95 ± 0.01
- 0.03 ± 0.02
- 0.05 ± 0.01
- 0.01 ± 0.00
- 0.66 ± 0.00
- 0.20 ± 0.01
- 0.01 ± 0.21
-
-
- GPT-3.5
- 0.43 ± 0.02
- 0.37 ± 0.02
- -
- 0.29 ± 0.02
- 0.43 ± 0.06
- 0.44 ± 0.02
- 0.38 ± 0.03
- -
- 0.38 ± 0.02
- 0.44 ± 0.05
-
-
- GPT-4
- 0.49 ± 0.02
- 0.46 ± 0.02
- -
- 0.35 ± 0.02
- 0.50 ± 0.06
- 0.89 ± 0.01
- 0.69 ± 0.02
- -
- 0.83 ± 0.01
- 0.89 ± 0.03
-
-
-
+
+
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.text_classification %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
+
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.text_classification %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+
+
+
+ {% for model in site.data.leaderboard[lang].models.models %}
+
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.text_classification %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+
+ {% endfor %}
+
+
\ No newline at end of file
diff --git a/_pages/vi/zero-shot/toxicity-detection.md b/_pages/vi/zero-shot/toxicity-detection.md
index 945112f..41f6688 100644
--- a/_pages/vi/zero-shot/toxicity-detection.md
+++ b/_pages/vi/zero-shot/toxicity-detection.md
@@ -3,131 +3,96 @@ layout: default
permalink: /leaderboard/vi/zero-shot/toxicity-detection
---
# Zero-Shot Toxicity Detection Leaderboard
+{% assign lang = 'vi' %}
- Models
- UiT-ViCTSD
- UiT-ViHSD
+
+ Models
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.toxicity_detection %}
+
+ {{ dataset[0] }}
+
+ {% endfor %}
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
- AC↑
- F1↑
- AR↑
- ECE↓
- A@10↑
+ {% for dataset in site.data.leaderboard[lang].zero_shot.toxicity_detection %}
+ AC↑
+ F1↑
+ AR↑
+ ECE↓
+ A@10↑
+ {% endfor %}
+ {% for model in site.data.leaderboard[lang].models.models %}
- URA-LLaMa 70B
- 0.61 ± 0.01
- 0.52 ± 0.01
- 0.77 ± 0.01
- 0.17 ± 0.01
- 0.97 ± 0.01
- 0.38 ± 0.01
- 0.34 ± 0.01
- 0.74 ± 0.01
- 0.25 ± 0.01
- 0.91 ± 0.01
-
-
- URA-LLaMa 13B
- 0.46 ± 0.01
- 0.28 ± 0.03
- 0.53 ± 0.02
- 0.22 ± 0.01
- 0.48 ± 0.03
- 0.33 ± 0.01
- 0.18 ± 0.00
- 0.60 ± 0.01
- 0.35 ± 0.01
- 0.54 ± 0.02
-
-
- URA-LLaMa 7B
- 0.25 ± 0.01
- 0.19 ± 0.01
- 0.53 ± 0.01
- 0.38 ± 0.01
- 0.13 ± 0.02
- 0.19 ± 0.00
- 0.13 ± 0.00
- 0.55 ± 0.01
- 0.46 ± 0.01
- 0.13 ± 0.01
-
-
- LLaMa-2 13B
- 0.16 ± 0.01
- 0.14 ± 0.00
- 0.40 ± 0.01
- 0.50 ± 0.01
- 0.24 ± 0.02
- 0.09 ± 0.00
- 0.13 ± 0.00
- 0.38 ± 0.01
- 0.63 ± 0.00
- 0.10 ± 0.01
-
-
- LLaMa-2 7B
- 0.13 ± 0.01
- 0.14 ± 0.01
- 0.45 ± 0.02
- 0.69 ± 0.01
- 0.09 ± 0.01
- 0.03 ± 0.00
- 0.05 ± 0.01
- 0.56 ± 0.01
- 0.75 ± 0.00
- 0.00 ± 0.00
-
-
- Vietcuna 7B
- 0.09 ± 0.00
- 0.07 ± 0.00
- 0.50 ± 0.00
- 0.41 ± 0.00
- 0.10 ± 0.03
- 0.07 ± 0.00
- 0.04 ± 0.00
- 0.50 ± 0.00
- 0.26 ± 0.00
- 0.07 ± 0.01
-
-
- GPT-3.5
- 0.75 ± 0.01
- 0.61 ± 0.02
- -
- 0.25 ± 0.01
- 0.80 ± 0.04
- 0.55 ± 0.01
- 0.42 ± 0.01
- -
- 0.22 ± 0.01
- 0.55 ± 0.02
-
-
- GPT-4
- 0.89 ± 0.01
- 0.69 ± 0.01
- -
- 0.39 ± 0.01
- 0.89 ± 0.03
- 0.75 ± 0.01
- 0.53 ± 0.01
- -
- 0.42 ± 0.01
- 0.75 ± 0.02
+
+ {{ model }}
+
+ {% for dataset in site.data.leaderboard[lang].zero_shot.toxicity_detection %}
+ {% assign AC_best = 0 %}
+ {% assign F1_best = 0 %}
+ {% assign AR_best = 0 %}
+ {% assign ECE_best = 1 %}
+ {% assign A10_best = 0 %}
+ {% for m in site.data.leaderboard[lang].models.models %}
+ {% if dataset[1][m].AC and dataset[1][m].AC > AC_best %}
+ {% assign AC_best = dataset[1][m].AC %}
+ {% endif %}
+ {% if dataset[1][m].F1 and dataset[1][m].F1 > F1_best %}
+ {% assign F1_best = dataset[1][m].F1 %}
+ {% endif %}
+ {% if dataset[1][m].AR and dataset[1][m].AR > AR_best %}
+ {% assign AR_best = dataset[1][m].AR %}
+ {% endif %}
+ {% if dataset[1][m].ECE and dataset[1][m].ECE < ECE_best %}
+ {% assign ECE_best = dataset[1][m].ECE %}
+ {% endif %}
+ {% if dataset[1][m]["A@10"] and dataset[1][m]["A@10"] > A10_best %}
+ {% assign A10_best = dataset[1][m]["A@10"] %}
+ {% endif %}
+ {% endfor %}
+
+ {% if dataset[1][model].AC %}
+ {{ dataset[1][model].AC | round: 2 }} ± {{ dataset[1][model].AC_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].F1 %}
+ {{ dataset[1][model].F1 | round: 2 }} ± {{ dataset[1][model].F1_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].AR %}
+ {{ dataset[1][model].AR | round: 2 }} ± {{ dataset[1][model].AR_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model].ECE %}
+ {{ dataset[1][model].ECE | round: 2 }} ± {{ dataset[1][model].ECE_std | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+
+ {% if dataset[1][model]["A@10"] %}
+ {{ dataset[1][model]["A@10"] | round: 2 }} ± {{ dataset[1][model]["A@10_std"] | round: 2 }}
+ {% else %}
+ -
+ {% endif %}
+
+ {% endfor %}
+ {% endfor %}
-
+
\ No newline at end of file
diff --git a/_site/contact/index.html b/_site/contact/index.html
index e8a1d62..dc222e4 100644
--- a/_site/contact/index.html
+++ b/_site/contact/index.html
@@ -12,7 +12,7 @@
MELT
-
+
@@ -20,7 +20,7 @@