Skip to content

Commit

Permalink
Merge pull request #2 from stair-lab/1-multilingual-leaderboard
Browse files Browse the repository at this point in the history
Multilingual Leaderboard
  • Loading branch information
sangttruong authored Sep 1, 2024
2 parents 0db5440 + 0b9ca7c commit 8eab66d
Show file tree
Hide file tree
Showing 167 changed files with 7,348 additions and 6,185 deletions.
6 changes: 3 additions & 3 deletions _config.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
title: ViLLM
description: "Crossing Linguistic Horizons: Finetuning and Comprehensive Evaluation of Vietnamese Large Language Models"
title: MELT
description: "Multilingual Evaluation Toolkits"

# disabled because we are using a custom domain
baseurl: https://ai.stanford.edu/~sttruong/villm
baseurl: https://ai.stanford.edu/~sttruong/melt

color-primary: "#B1040E"
color-light: "#E50808"
Expand Down
8 changes: 8 additions & 0 deletions _data/categories.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
- zero-shot
- few-shot
- weaker-prompt
- fairness-aware
- robustness-aware
- chain-of-thought
- randomized-choice
- bias-toxicity
275 changes: 275 additions & 0 deletions _data/lang_tasks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,275 @@
vi:
"Question Answering":
zero-shot: true
few-shot: false
weaker-prompt: true
fairness-aware: true
robustness-aware: true
chain-of-thought: false
randomized-choice: false
bias-toxicity: true
"Summarization":
zero-shot: true
few-shot: false
weaker-prompt: true
fairness-aware: false
robustness-aware: true
chain-of-thought: false
randomized-choice: false
bias-toxicity: true
"Sentiment Analysis":
zero-shot: true
few-shot: true
weaker-prompt: false
fairness-aware: true
robustness-aware: true
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Text Classification":
zero-shot: true
few-shot: true
weaker-prompt: false
fairness-aware: true
robustness-aware: true
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Knowledge":
zero-shot: true
few-shot: true
weaker-prompt: false
fairness-aware: false
robustness-aware: true
chain-of-thought: false
randomized-choice: true
bias-toxicity: false
"Toxicity Detection":
zero-shot: true
few-shot: true
weaker-prompt: false
fairness-aware: true
robustness-aware: true
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Information Retrieval":
zero-shot: true
few-shot: true
weaker-prompt: false
fairness-aware: true
robustness-aware: true
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Language Modeling":
zero-shot: true
few-shot: true
weaker-prompt: false
fairness-aware: true
robustness-aware: false
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Reasoning":
zero-shot: true
few-shot: true
weaker-prompt: false
fairness-aware: false
robustness-aware: false
chain-of-thought: true
randomized-choice: false
bias-toxicity: false
"Translation":
zero-shot: false
few-shot: true
weaker-prompt: false
fairness-aware: false
robustness-aware: true
chain-of-thought: false
randomized-choice: false
bias-toxicity: true

ind:
"Question Answering":
zero-shot: false
few-shot: false
weaker-prompt: false
fairness-aware: false
robustness-aware: false
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Summarization":
zero-shot: false
few-shot: false
weaker-prompt: false
fairness-aware: false
robustness-aware: false
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Sentiment Analysis":
zero-shot: false
few-shot: false
weaker-prompt: false
fairness-aware: false
robustness-aware: false
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Text Classification":
zero-shot: false
few-shot: false
weaker-prompt: false
fairness-aware: false
robustness-aware: false
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Knowledge":
zero-shot: false
few-shot: false
weaker-prompt: false
fairness-aware: false
robustness-aware: false
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Toxicity Detection":
zero-shot: false
few-shot: false
weaker-prompt: false
fairness-aware: false
robustness-aware: false
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Information Retrieval":
zero-shot: false
few-shot: false
weaker-prompt: false
fairness-aware: false
robustness-aware: false
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Language Modeling":
zero-shot: false
few-shot: false
weaker-prompt: false
fairness-aware: false
robustness-aware: false
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Reasoning":
zero-shot: false
few-shot: false
weaker-prompt: false
fairness-aware: false
robustness-aware: false
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Translation":
zero-shot: false
few-shot: false
weaker-prompt: false
fairness-aware: false
robustness-aware: false
chain-of-thought: false
randomized-choice: false
bias-toxicity: false

kr:
"Question Answering":
zero-shot: false
few-shot: false
weaker-prompt: false
fairness-aware: false
robustness-aware: false
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Summarization":
zero-shot: false
few-shot: false
weaker-prompt: false
fairness-aware: false
robustness-aware: false
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Sentiment Analysis":
zero-shot: false
few-shot: false
weaker-prompt: false
fairness-aware: false
robustness-aware: false
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Text Classification":
zero-shot: false
few-shot: false
weaker-prompt: false
fairness-aware: false
robustness-aware: false
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Knowledge":
zero-shot: false
few-shot: false
weaker-prompt: false
fairness-aware: false
robustness-aware: false
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Toxicity Detection":
zero-shot: false
few-shot: false
weaker-prompt: false
fairness-aware: false
robustness-aware: false
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Information Retrieval":
zero-shot: false
few-shot: false
weaker-prompt: false
fairness-aware: false
robustness-aware: false
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Language Modeling":
zero-shot: false
few-shot: false
weaker-prompt: false
fairness-aware: false
robustness-aware: false
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Reasoning":
zero-shot: false
few-shot: false
weaker-prompt: false
fairness-aware: false
robustness-aware: false
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
"Translation":
zero-shot: false
few-shot: false
weaker-prompt: false
fairness-aware: false
robustness-aware: false
chain-of-thought: false
randomized-choice: false
bias-toxicity: false
2 changes: 1 addition & 1 deletion _data/menu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
link: "/leaderboard/"

- title: GitHub
link: "https://github.com/koyejo-lab/villm"
link: "https://github.com/stair-lab/melt"

- title: HuggingFace
link: "https://huggingface.co/ura-hcmut"
Expand Down
2 changes: 1 addition & 1 deletion _includes/footer.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<footer class="py-5 bg-dark">
<div class="container">
<p class="m-0 text-center text-white">
Copyright &copy; ViLLM @ 2024
Copyright &copy; MELT @ {{ site.time | date: '%Y' }}
</p>
</div>
<!-- /.container -->
Expand Down
6 changes: 3 additions & 3 deletions _includes/header.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
<div class="row h-100 align-items-center">
<div class="col-lg-12">
<h1 class="display-3 text-white mt-5 mb-2 text-center font-weight-bold">
Crossing Linguistic Horizons
Multilingual Evaluation Toolkits
</h1>
<h3 class="mb-5 text-white text-center">
<!-- <h3 class="mb-5 text-white text-center">
Finetuning and Comprehensive Evaluation of Vietnamese Large Language Models
</h3>
</h3> -->
</div>
</div>
</div>
Expand Down
2 changes: 1 addition & 1 deletion _includes/navigation.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<nav class="navbar navbar-expand-lg navbar-dark fixed-top bg-dark py-3">
<div class="container">
<a class="navbar-brand js-scroll-trigger font-weight-bold" href="{{ site.baseurl }}/">
ViLLM
MELT
</a>
<button
class="navbar-toggler ml-auto"
Expand Down
Loading

0 comments on commit 8eab66d

Please sign in to comment.