Skip to content

Commit

Permalink
make lint
Browse files Browse the repository at this point in the history
  • Loading branch information
isaac-chung committed Nov 22, 2024
1 parent ffa0cbb commit 2a55793
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 12 deletions.
12 changes: 7 additions & 5 deletions docs/create_tasks_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import mteb
from mteb.abstasks.TaskMetadata import PROGRAMMING_LANGS, TASK_TYPE
from mteb.languages import ISO_TO_LANGUAGE, ISO_TO_FAM_LEVEL0
from mteb.languages import ISO_TO_FAM_LEVEL0, ISO_TO_LANGUAGE


def author_from_bibtex(bibtex: str | None) -> str:
Expand Down Expand Up @@ -87,13 +87,15 @@ def create_task_lang_table(tasks: list[mteb.AbsTask], sort_by_sum=False) -> str:
pl_table_dict.append(d)

df = pl.DataFrame(pl_table_dict).sort(by="0-lang-code")
df = df.with_columns(pl.col('0-lang-code')
df = df.with_columns(
pl.col("0-lang-code")
.replace_strict(ISO_TO_LANGUAGE, default="unknown")
.alias('1-lang-name')
.alias("1-lang-name")
)
df = df.with_columns(pl.col('0-lang-code')
df = df.with_columns(
pl.col("0-lang-code")
.replace_strict(ISO_TO_FAM_LEVEL0, default="Unclassified")
.alias('2-lang-fam')
.alias("2-lang-fam")
)

df = df.with_columns(sum=pl.sum_horizontal(get_args(TASK_TYPE)))
Expand Down
3 changes: 2 additions & 1 deletion mteb/languages.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
with path_to_lang_fam.open("r") as f:
ISO_TO_FAM = json.load(f)

ISO_TO_FAM_LEVEL0 = {k:v['level0'] for k, v in ISO_TO_FAM.items()}
ISO_TO_FAM_LEVEL0 = {k: v["level0"] for k, v in ISO_TO_FAM.items()}


@dataclass
class LanguageScripts:
Expand Down
11 changes: 5 additions & 6 deletions scripts/create_language_family_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@

import json
from pathlib import Path
from tqdm import tqdm

from pyglottolog.api import Glottolog
from pyglottolog.api import lls
from pyglottolog.api import Glottolog, lls
from tqdm import tqdm

glottolog = Glottolog(
"/home/ubuntu/isaac/work/glottolog"
) # Download the Glottolog repository


def get_languages_with_iso_by_languoid(languoid, level=0, prev_fam=None):
# Recursively gather all descendant languages with ISO codes
if prev_fam is None:
Expand All @@ -23,7 +23,7 @@ def get_languages_with_iso_by_languoid(languoid, level=0, prev_fam=None):
# Create a copy of `prev_fam` to avoid overwriting
current_fam = prev_fam.copy()
current_fam[f"level{level}"] = languoid.name

if descendant.level.name == "language": # Direct languages
if descendant.iso:
iso_key = descendant.iso
Expand All @@ -34,7 +34,6 @@ def get_languages_with_iso_by_languoid(languoid, level=0, prev_fam=None):
get_languages_with_iso_by_languoid(descendant, level + 1, current_fam)



all_languoids = list(glottolog.languoids())
with Path("language_family.json").open("r") as f:
ISO2FAMILY = json.load(f)
Expand All @@ -45,4 +44,4 @@ def get_languages_with_iso_by_languoid(languoid, level=0, prev_fam=None):
ISO2FAMILY = dict(sorted(ISO2FAMILY.items()))

with Path("language_family.json").open("w") as f:
json.dump(ISO2FAMILY, f, indent=3)
json.dump(ISO2FAMILY, f, indent=3)

0 comments on commit 2a55793

Please sign in to comment.