Skip to content

Commit

Permalink
download results
Browse files Browse the repository at this point in the history
  • Loading branch information
Samoed committed Nov 8, 2024
1 parent ff3321a commit 31ac260
Show file tree
Hide file tree
Showing 14,580 changed files with 851,225 additions and 45 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
112 changes: 67 additions & 45 deletions refresh.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import re
from pathlib import Path
from typing import Any

from huggingface_hub import HfApi, get_hf_file_metadata, hf_hub_download, hf_hub_url
from huggingface_hub.errors import NotASafetensorsRepoError
Expand All @@ -12,7 +13,6 @@
from mteb import ModelMeta, get_task

API = HfApi()
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -82,7 +82,7 @@ def get_dim_seq_size(model: ModelInfo) -> tuple[str | None, str | None, int, flo
return dim, seq, parameters, memory


def create_model_meta(model_info: ModelInfo) -> None:
def create_model_meta(model_info: ModelInfo) -> ModelMeta | None:
readme_path = hf_hub_download(model_info.id, filename="README.md", etag_timeout=30)
meta = metadata_load(readme_path)
dim, seq, parameters, memory = None, None, None, None
Expand All @@ -101,7 +101,7 @@ def create_model_meta(model_info: ModelInfo) -> None:
if languages[i] is False:
languages[i] = "no"

model_dict = ModelMeta(
model_meta = ModelMeta(
name=model_info.id,
revision=model_info.sha,
release_date=release_date,
Expand All @@ -113,13 +113,10 @@ def create_model_meta(model_info: ModelInfo) -> None:
n_parameters=parameters,
languages=languages,
)
model_dir = get_model_dir(model_info.id)
model_meta_path = model_dir / "model_meta.json"
with model_meta_path.open("w") as f:
json.dump(model_dict.model_dump(), f, indent=4)
return model_meta


def parse_readme(model_info: ModelInfo) -> None:
def parse_readme(model_info: ModelInfo) -> dict[str, dict[str, Any]] | None:
model_id = model_info.id
try:
readme_path = hf_hub_download(model_info.id, filename="README.md", etag_timeout=30)
Expand All @@ -131,33 +128,42 @@ def parse_readme(model_info: ModelInfo) -> None:
logger.info(f"Could not find model-index in {model_id}")
return
model_index = meta["model-index"][0]
model_name_from_readme = model_index["model_name"]
if not model_info.id.endswith(model_name_from_readme):
model_name_from_readme = model_index.get("name", None)
orgs = ["Alibaba-NLP", "HIT-TMG", "McGill-NLP", "Snowflake", "facebook", "jinaai", "nomic-ai"]
is_org = any([model_id.startswith(org) for org in orgs])
# There a lot of reuploads with tunes, quantization, etc. We only want the original model
# to prevent this most of the time we can check if the model name from the readme is the same as the model id
# but some orgs have a different naming in their readme
if model_name_from_readme and not model_info.id.endswith(model_name_from_readme) and not is_org:
logger.warning(f"Model name mismatch: {model_info.id} vs {model_name_from_readme}")
return
results = model_index.get("results", [])
model_results = {}
for result in results:
output_dict = {}
dataset = result["dataset"]
dataset_type = dataset.get("type", "")
dataset_type = dataset["type"] # type is repo of the dataset
if dataset_type not in model_results:
output_dict["dataset_revision"] = dataset.get("revision", "")
output_dict["task_name"] = simplify_dataset_name(dataset.get("name", ""))
output_dict["evaluation_time"] = -1
output_dict["mteb_version"] = "0.0.0"
output_dict["scores"] = {}
output_dict = {
"dataset_revision": dataset.get("revision", ""),
"task_name": simplify_dataset_name(dataset["name"]),
"evaluation_time": -1,
"mteb_version": "0.0.0",
"scores": {},
}
else:
output_dict = model_results[dataset_type]

try:
mteb_task = get_task(output_dict["task_name"])
mteb_task_metadata = mteb_task.metadata
mteb_task_eval_languages = mteb_task_metadata.eval_langs
except Exception:
logger.warning(f"Error getting task for {model_id} {output_dict['task_name']}")
continue

mteb_task_metadata = mteb_task.metadata
mteb_task_eval_languages = mteb_task_metadata.eval_langs

scores_dict = output_dict["scores"]
current_split = dataset.get("split", "")
current_split = dataset["split"]
current_config = dataset.get("config", "")
cur_split_metrics = {
"hf_subset": current_config,
Expand All @@ -168,48 +174,64 @@ def parse_readme(model_info: ModelInfo) -> None:

main_score_str = "main_score"
if main_score_str not in cur_split_metrics:
# sts old have cos_sim_pearson, but in model_meta cosine_spearman is main_score, sum_eval
# old sts and sum_eval have cos_sim_pearson, but in model_meta cosine_spearman is main_score
for old_metric, new_metric in zip(["cos_sim_pearson", "cos_sim_spearman"], ["cosine_pearson", "cosine_spearman"]):
if old_metric in cur_split_metrics:
cur_split_metrics[main_score_str] = cur_split_metrics[old_metric]
if cur_split_metrics.get(main_score_str, None) is None and mteb_task.metadata.main_score not in cur_split_metrics:
logger.warning(f"Could not find main score for {model_id} {output_dict['task_name']}")
cur_split_metrics[new_metric] = cur_split_metrics[old_metric]

if mteb_task.metadata.main_score not in cur_split_metrics:
logger.warning(f"Could not find main score for {model_id} {output_dict['task_name']}, mteb task {mteb_task.metadata.name}. Main score: {mteb_task.metadata.main_score}. Metrics: {cur_split_metrics}, result {result['metrics']}")
continue

cur_split_metrics[main_score_str] = cur_split_metrics[mteb_task.metadata.main_score]
cur_split_metrics[main_score_str] = cur_split_metrics.get(mteb_task.metadata.main_score, None)
split_metrics = scores_dict.get(current_split, [])
split_metrics.append(cur_split_metrics)
scores_dict[current_split] = split_metrics
model_results[dataset_type] = output_dict
for model_result in model_results:
model_dir = get_model_dir(model_id)
task_name = model_results[model_result]["task_name"]
result_file = model_dir / f"{task_name}.json"
with result_file.open("w") as f:
json.dump(model_results[model_result], f, indent=4)
return model_results


def get_mteb_data() -> None:
models = sorted(list(API.list_models(filter="mteb", full=True)), key=lambda x: x.id)
# models = [model for model in models if model.id == "intfloat/multilingual-e5-large"]
for i, model in enumerate(models, start=1):
logger.info(f"[{i}/{len(models)}] Processing {model.id}")
model_path = get_model_dir(model.id)
if (model_path / "model_meta.json").exists():
logger.info(f"Model meta already exists for {model.id}")
for i, model_info in enumerate(models, start=1):
logger.info(f"[{i}/{len(models)}] Processing {model_info.id}")
model_path = get_model_dir(model_info.id)
if (model_path / "model_meta.json").exists() and len(list(model_path.glob("*.json"))) > 1:
logger.info(f"Model meta already exists for {model_info.id}")
continue
if model.id.lower().endswith("gguf"):
logger.info(f"Skipping {model.id} GGUF model")
if model_info.id.lower().endswith("gguf"):
logger.info(f"Skipping {model_info.id} GGUF model")
continue
if model.id.startswith("ILKT"):
logger.info(f"Skipping {model.id} ILKT model")

spam_users = ["ILKT", "fine-tuned", "mlx-community"]
is_spam = False
for spam_user in spam_users:
if model_info.id.startswith(spam_user):
logger.info(f"Skipping {model_info.id}")
is_spam = True
continue
if is_spam:
continue
if model.id.startswith("fine-tuned"):
logger.info(f"Skipping {model.id} fine-tuned model")
model_meta = create_model_meta(model_info)
model_results = parse_readme(model_info)

if not model_meta or not model_results:
logger.warning(f"Could not get model meta or results for {model_info.id}")
continue
create_model_meta(model)
parse_readme(model)

model_dir = get_model_dir(model_info.id)
model_meta_path = model_dir / "model_meta.json"
with model_meta_path.open("w") as f:
json.dump(model_meta.model_dump(), f, indent=4)

for model_result in model_results:
task_name = model_results[model_result]["task_name"]
result_file = model_dir / f"{task_name}.json"
with result_file.open("w") as f:
json.dump(model_results[model_result], f, indent=4)


if __name__ == "__main__":
get_mteb_data()
logging.basicConfig(level=logging.INFO)
get_mteb_data()
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"dataset_revision": "faeb762787bd10488a50c8b5be4a3b82e411949c",
"task_name": "STS17",
"evaluation_time": -1,
"mteb_version": "0.0.0",
"scores": {
"test": [
{
"hf_subset": "ar-ar",
"languages": [
"ara-Arab"
],
"cosine_pearson": 84.5661961200634,
"cosine_spearman": 85.04839486363485,
"euclidean_pearson": 83.06651812356505,
"euclidean_spearman": 84.64285835335832,
"main_score": 85.04839486363485,
"manhattan_pearson": 83.12273085900199,
"manhattan_spearman": 84.51258675131285
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"name": "AbderrahmanSkiredj1/Arabic_text_embedding_for_sts",
"revision": "bbd8aaebafc25b38eaa96300313db0bbe0086d12",
"release_date": "2024-07-07",
"languages": [],
"loader": null,
"n_parameters": 135193344,
"memory_usage": null,
"max_tokens": 512,
"embed_dim": 768,
"license": null,
"open_weights": true,
"public_training_data": null,
"public_training_code": null,
"framework": [
"Sentence Transformers"
],
"reference": null,
"similarity_fn_name": null,
"use_instructions": null,
"zero_shot_benchmarks": null
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"dataset_revision": "faeb762787bd10488a50c8b5be4a3b82e411949c",
"task_name": "STS17",
"evaluation_time": -1,
"mteb_version": "0.0.0",
"scores": {
"test": [
{
"hf_subset": "ar-ar",
"languages": [
"ara-Arab"
],
"cosine_pearson": 84.4552565985224,
"cosine_spearman": 84.95895784502326,
"euclidean_pearson": 82.78929391924628,
"euclidean_spearman": 84.30172209203968,
"main_score": 84.95895784502326,
"manhattan_pearson": 82.8787314198733,
"manhattan_spearman": 84.31971710672602
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"name": "AbderrahmanSkiredj1/arabic_text_embedding_sts_arabertv02_arabicnlitriplet",
"revision": "a920aa3e6222ce4a03a5799ecb6f1e6dea6e4c46",
"release_date": "2024-07-06",
"languages": [],
"loader": null,
"n_parameters": 135193344,
"memory_usage": null,
"max_tokens": 512,
"embed_dim": 768,
"license": null,
"open_weights": true,
"public_training_data": null,
"public_training_code": null,
"framework": [
"Sentence Transformers"
],
"reference": null,
"similarity_fn_name": null,
"use_instructions": null,
"zero_shot_benchmarks": null
}
Loading

0 comments on commit 31ac260

Please sign in to comment.