Skip to content

Commit

Permalink
Refactor class variables _LANGUAGES to global for MKQA SEACrowd#34
Browse files Browse the repository at this point in the history
  • Loading branch information
fhudi committed Jan 31, 2024
1 parent 3d4808d commit 76ca2e8
Showing 1 changed file with 9 additions and 10 deletions.
19 changes: 9 additions & 10 deletions seacrowd/sea_datasets/mkqa/mkqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,13 @@

_SEACROWD_VERSION = "1.0.0"

_LANGUAGES = [
"khm",
"msa",
"tha",
"vie",
] # follows the convention of 3-letter code as suggested since NusaCrowd.


class MKQADataset(datasets.GeneratorBasedBuilder):
"""
Expand All @@ -76,14 +83,6 @@ class MKQADataset(datasets.GeneratorBasedBuilder):
SOURCE_VERSION = datasets.Version(_SOURCE_VERSION)
SEACROWD_VERSION = datasets.Version(_SEACROWD_VERSION)

LANGUAGES = [
"",
"khm",
"msa",
"tha",
"vie",
] # follows the convention of 3-letter code as suggested since NusaCrowd.

_SOURCE_LANGUAGES = [
"ar",
"da",
Expand Down Expand Up @@ -129,7 +128,7 @@ class MKQADataset(datasets.GeneratorBasedBuilder):
schema="source",
subset_id=f"{_DATASETNAME}_{subset_lang}",
)
for subset_lang in LANGUAGES
for subset_lang in ["", *_LANGUAGES]
],
*[
SEACrowdConfig(
Expand All @@ -139,7 +138,7 @@ class MKQADataset(datasets.GeneratorBasedBuilder):
schema="seacrowd_qa",
subset_id=f"{_DATASETNAME}_{subset_lang}",
)
for subset_lang in LANGUAGES
for subset_lang in ["", *_LANGUAGES]
],
]

Expand Down

0 comments on commit 76ca2e8

Please sign in to comment.