Merge pull request #2 from stair-lab/1-multilingual-leaderboard

Multilingual Leaderboard
stair-lab · Sep 1, 2024 · 8eab66d · 8eab66d
2 parents 0db5440 + 0b9ca7c
commit 8eab66d
Show file tree

Hide file tree

Showing 167 changed files with 7,348 additions and 6,185 deletions.
diff --git a/_config.yml b/_config.yml
@@ -1,8 +1,8 @@
-title: ViLLM
-description: "Crossing Linguistic Horizons: Finetuning and Comprehensive Evaluation of Vietnamese Large Language Models"
+title: MELT
+description: "Multilingual Evaluation Toolkits"
 
 # disabled because we are using a custom domain
-baseurl: https://ai.stanford.edu/~sttruong/villm
+baseurl: https://ai.stanford.edu/~sttruong/melt
 
 color-primary: "#B1040E"
 color-light: "#E50808"

diff --git a/_data/categories.yml b/_data/categories.yml
@@ -0,0 +1,8 @@
+- zero-shot
+- few-shot
+- weaker-prompt
+- fairness-aware
+- robustness-aware
+- chain-of-thought
+- randomized-choice
+- bias-toxicity 
diff --git a/_data/lang_tasks.yml b/_data/lang_tasks.yml
@@ -0,0 +1,275 @@
+vi:
+  "Question Answering":
+    zero-shot: true
+    few-shot: false
+    weaker-prompt: true
+    fairness-aware: true
+    robustness-aware: true
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: true
+  "Summarization":
+    zero-shot: true
+    few-shot: false
+    weaker-prompt: true
+    fairness-aware: false
+    robustness-aware: true
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: true
+  "Sentiment Analysis":
+    zero-shot: true
+    few-shot: true
+    weaker-prompt: false
+    fairness-aware: true
+    robustness-aware: true
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Text Classification":
+    zero-shot: true
+    few-shot: true
+    weaker-prompt: false
+    fairness-aware: true
+    robustness-aware: true
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Knowledge":
+    zero-shot: true
+    few-shot: true
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: true
+    chain-of-thought: false
+    randomized-choice: true
+    bias-toxicity: false
+  "Toxicity Detection":
+    zero-shot: true
+    few-shot: true
+    weaker-prompt: false
+    fairness-aware: true
+    robustness-aware: true
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Information Retrieval":
+    zero-shot: true
+    few-shot: true
+    weaker-prompt: false
+    fairness-aware: true
+    robustness-aware: true
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Language Modeling":
+    zero-shot: true
+    few-shot: true
+    weaker-prompt: false
+    fairness-aware: true
+    robustness-aware: false
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Reasoning":
+    zero-shot: true
+    few-shot: true
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: false
+    chain-of-thought: true
+    randomized-choice: false
+    bias-toxicity: false
+  "Translation":
+    zero-shot: false
+    few-shot: true
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: true
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: true
+
+ind:
+  "Question Answering":
+    zero-shot: false
+    few-shot: false
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: false
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Summarization":
+    zero-shot: false
+    few-shot: false
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: false
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Sentiment Analysis":
+    zero-shot: false
+    few-shot: false
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: false
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Text Classification":
+    zero-shot: false
+    few-shot: false
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: false
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Knowledge":
+    zero-shot: false
+    few-shot: false
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: false
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Toxicity Detection":
+    zero-shot: false
+    few-shot: false
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: false
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Information Retrieval":
+    zero-shot: false
+    few-shot: false
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: false
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Language Modeling":
+    zero-shot: false
+    few-shot: false
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: false
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Reasoning":
+    zero-shot: false
+    few-shot: false
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: false
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Translation":
+    zero-shot: false
+    few-shot: false
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: false
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+
+kr:
+  "Question Answering":
+    zero-shot: false
+    few-shot: false
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: false
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Summarization":
+    zero-shot: false
+    few-shot: false
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: false
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Sentiment Analysis":
+    zero-shot: false
+    few-shot: false
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: false
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Text Classification":
+    zero-shot: false
+    few-shot: false
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: false
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Knowledge":
+    zero-shot: false
+    few-shot: false
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: false
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Toxicity Detection":
+    zero-shot: false
+    few-shot: false
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: false
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Information Retrieval":
+    zero-shot: false
+    few-shot: false
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: false
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Language Modeling":
+    zero-shot: false
+    few-shot: false
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: false
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Reasoning":
+    zero-shot: false
+    few-shot: false
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: false
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
+  "Translation":
+    zero-shot: false
+    few-shot: false
+    weaker-prompt: false
+    fairness-aware: false
+    robustness-aware: false
+    chain-of-thought: false
+    randomized-choice: false
+    bias-toxicity: false
diff --git a/_data/menu.yml b/_data/menu.yml
@@ -5,7 +5,7 @@
   link: "/leaderboard/"
 
 - title: GitHub
-  link: "https://github.com/koyejo-lab/villm"
+  link: "https://github.com/stair-lab/melt"
 
 - title: HuggingFace
   link: "https://huggingface.co/ura-hcmut"

diff --git a/_includes/footer.html b/_includes/footer.html
@@ -2,7 +2,7 @@
 <footer class="py-5 bg-dark">
   <div class="container">
     <p class="m-0 text-center text-white">
-      Copyright &copy; ViLLM @ 2024
+      Copyright &copy; MELT @ {{ site.time | date: '%Y' }}
     </p>
   </div>
   <!-- /.container -->

diff --git a/_includes/header.html b/_includes/header.html
@@ -4,11 +4,11 @@
     <div class="row h-100 align-items-center">
       <div class="col-lg-12">
         <h1 class="display-3 text-white mt-5 mb-2 text-center font-weight-bold">
-          Crossing Linguistic Horizons
+          Multilingual Evaluation Toolkits
         </h1>
-        <h3 class="mb-5 text-white text-center">
+        <!-- <h3 class="mb-5 text-white text-center">
           Finetuning and Comprehensive Evaluation of Vietnamese Large Language Models
-        </h3>
+        </h3> -->
       </div>
     </div>
   </div>

diff --git a/_includes/navigation.html b/_includes/navigation.html
@@ -2,7 +2,7 @@
 <nav class="navbar navbar-expand-lg navbar-dark fixed-top bg-dark py-3">
   <div class="container">
     <a class="navbar-brand js-scroll-trigger font-weight-bold" href="{{ site.baseurl }}/">
-      ViLLM
+      MELT
     </a>
     <button
       class="navbar-toggler ml-auto"