Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: load model card json #2246

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 17 additions & 31 deletions doc/source/gen_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,15 @@ def get_metrics_from_url(metrics_url):
})
return result


def main():
template_dir = '../templates'
template_dir = '../templates'
env = Environment(loader=FileSystemLoader(template_dir))

with open('../../xinference/model/llm/llm_family.json', 'r') as model_file:
models = json.load(model_file)

model_by_names = { m['model_name']: m for m in models}
model_scope_file = open('../../xinference/model/llm/llm_family_modelscope.json')
models_modelscope = json.load(model_scope_file)
model_by_names_modelscope = { m['model_name']: m for m in models_modelscope}
model_by_names = {m['model_name']: m for m in models}

sorted_models = []
output_dir = './models/builtin/llm'
Expand All @@ -69,10 +67,16 @@ def main():
sorted_models.append(model)

for model_spec in model['model_specs']:
model_spec['model_hubs'] = [{
'name': MODEL_HUB_HUGGING_FACE,
'url': f"https://huggingface.co/{model_spec['model_id']}"
original_model_hubs = model_spec['model_hubs']
model_spec['model_hubs'] = [{
'name': MODEL_HUB_HUGGING_FACE,
'url': f"https://huggingface.co/{original_model_hubs['huggingface']['model_id']}"
}]
if 'modelscope' in original_model_hubs:
model_spec['model_hubs'].append({
'name': MODEL_HUB_MODELSCOPE,
'url': f"https://modelscope.cn/models/{original_model_hubs['modelscope']['model_id']}"
})

# model engines
engines = []
Expand All @@ -90,22 +94,6 @@ def main():
engines.append(engine)
model_spec['engines'] = sorted(list(set(engines)), reverse=True)

# manual merge
if model_name in model_by_names_modelscope.keys():

def get_unique_id(spec):
return spec['model_format'] + '-' + str(spec['model_size_in_billions'])

model_by_ids_modelscope = {get_unique_id(s) : s for s in model_by_names_modelscope[model_name]['model_specs']}

for model_spec in model['model_specs']:
spec_id = get_unique_id(model_spec)
if spec_id in model_by_ids_modelscope.keys():
model_spec['model_hubs'].append({
'name': MODEL_HUB_MODELSCOPE,
'url': f"https://modelscope.cn/models/{model_by_ids_modelscope[spec_id]['model_id']}"
})

rendered = env.get_template('llm.rst.jinja').render(model)
output_file_name = f"{model['model_name'].lower()}.rst"
if output_file_name in current_files:
Expand All @@ -125,16 +113,14 @@ def get_unique_id(spec):
rendered_index = env.get_template('llm_index.rst.jinja').render(models=sorted_models)
file.write(rendered_index)


with open('../../xinference/model/embedding/model_spec.json', 'r') as file:
models = json.load(file)

model_by_names = { m['model_name']: m for m in models}
model_by_names = {m['model_name']: m for m in models}
model_scope_file = open('../../xinference/model/embedding/model_spec_modelscope.json')
models_modelscope = json.load(model_scope_file)

model_by_names_modelscope = { s['model_name']: s for s in models_modelscope}

model_by_names_modelscope = {s['model_name']: s for s in models_modelscope}

sorted_models = []
output_dir = './models/builtin/embedding'
Expand All @@ -147,7 +133,7 @@ def get_unique_id(spec):

model['model_hubs'] = [
{
'name': MODEL_HUB_HUGGING_FACE,
'name': MODEL_HUB_HUGGING_FACE,
'url': f"https://huggingface.co/{model['model_id']}"
}
]
Expand All @@ -167,7 +153,7 @@ def get_unique_id(spec):
print(output_file_path)

index_file_path = os.path.join(output_dir, "index.rst")
with open(index_file_path, "w") as file:
with open(index_file_path, "w") as file:
rendered_index = env.get_template('embedding_index.rst.jinja').render(models=sorted_models)
file.write(rendered_index)

Expand All @@ -186,7 +172,7 @@ def get_unique_id(spec):

index_file_path = os.path.join(output_dir, "index.rst")
with open(index_file_path, "w") as file:

rendered_index = env.get_template('rerank_index.rst.jinja').render(models=sorted_models)
file.write(rendered_index)

Expand Down
141 changes: 62 additions & 79 deletions xinference/model/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import codecs
import copy
import json
import os
import warnings
Expand Down Expand Up @@ -195,85 +196,7 @@ def _install():
SUPPORTED_ENGINES["MLX"] = MLX_CLASSES
SUPPORTED_ENGINES["LMDEPLOY"] = LMDEPLOY_CLASSES

json_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "llm_family.json"
)
for json_obj in json.load(codecs.open(json_path, "r", encoding="utf-8")):
model_spec = LLMFamilyV1.parse_obj(json_obj)
BUILTIN_LLM_FAMILIES.append(model_spec)

# register chat_template
if "chat" in model_spec.model_ability and isinstance(
model_spec.chat_template, str
):
# note that the key is the model name,
# since there are multiple representations of the same prompt style name in json.
BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
"chat_template": model_spec.chat_template,
"stop_token_ids": model_spec.stop_token_ids,
"stop": model_spec.stop,
}
# register model family
if "chat" in model_spec.model_ability:
BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
else:
BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
if "tools" in model_spec.model_ability:
BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)

modelscope_json_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "llm_family_modelscope.json"
)
for json_obj in json.load(codecs.open(modelscope_json_path, "r", encoding="utf-8")):
model_spec = LLMFamilyV1.parse_obj(json_obj)
BUILTIN_MODELSCOPE_LLM_FAMILIES.append(model_spec)

# register prompt style, in case that we have something missed
# if duplicated with huggingface json, keep it as the huggingface style
if (
"chat" in model_spec.model_ability
and isinstance(model_spec.chat_template, str)
and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
):
BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
"chat_template": model_spec.chat_template,
"stop_token_ids": model_spec.stop_token_ids,
"stop": model_spec.stop,
}
# register model family
if "chat" in model_spec.model_ability:
BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
else:
BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
if "tools" in model_spec.model_ability:
BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)

csghub_json_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "llm_family_csghub.json"
)
for json_obj in json.load(codecs.open(csghub_json_path, "r", encoding="utf-8")):
model_spec = LLMFamilyV1.parse_obj(json_obj)
BUILTIN_CSGHUB_LLM_FAMILIES.append(model_spec)

# register prompt style, in case that we have something missed
# if duplicated with huggingface json, keep it as the huggingface style
if (
"chat" in model_spec.model_ability
and isinstance(model_spec.chat_template, str)
and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
):
BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
"chat_template": model_spec.chat_template,
"stop_token_ids": model_spec.stop_token_ids,
"stop": model_spec.stop,
}
# register model family
if "chat" in model_spec.model_ability:
BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
else:
BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
if "tools" in model_spec.model_ability:
BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
_load_from_json_new("llm_family.json")

for llm_specs in [
BUILTIN_LLM_FAMILIES,
Expand All @@ -298,3 +221,63 @@ def _install():
# register model description
for ud_llm in get_user_defined_llm_families():
LLM_MODEL_DESCRIPTIONS.update(generate_llm_description(ud_llm))


def _add_model_spec(model_spec: LLMFamilyV1, LLM_FAMILIES: list[LLMFamilyV1]):
LLM_FAMILIES.append(model_spec)

# register chat_template
# register prompt style, in case that we have something missed
# if duplicated with huggingface json, keep it as the huggingface style
if (
"chat" in model_spec.model_ability
and isinstance(model_spec.chat_template, str)
and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
):
# note that the key is the model name,
# since there are multiple representations of the same prompt style name in json.
BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
"chat_template": model_spec.chat_template,
"stop_token_ids": model_spec.stop_token_ids,
"stop": model_spec.stop,
}
# register model family
if "chat" in model_spec.model_ability:
BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
else:
BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
if "tools" in model_spec.model_ability:
BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)


def _load_from_json_new(file_name: str):
json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), file_name)
for json_obj in json.load(codecs.open(json_path, "r", encoding="utf-8")):
model_specs: list[dict] = json_obj["model_specs"]
hub_names = ["huggingface", "modelscope", "csghub"]
hub_specs: dict = {}
for hub_name in hub_names:
hub_specs[hub_name] = []

hub_families = {
"huggingface": BUILTIN_LLM_FAMILIES,
"modelscope": BUILTIN_MODELSCOPE_LLM_FAMILIES,
"csghub": BUILTIN_CSGHUB_LLM_FAMILIES,
}

for model_spec in model_specs:
for hub_name in hub_names:
if hub_name in model_spec["model_hubs"]:
hub_spec = copy.deepcopy(model_spec)
hub_spec.update(model_spec["model_hubs"][hub_name])
hub_spec["model_hub"] = hub_name
del hub_spec["model_hubs"]
hub_specs[hub_name].append(hub_spec)

for hub_name in hub_names:
a_hub_specs = hub_specs[hub_name]
if len(a_hub_specs) > 0:
model_obj = copy.deepcopy(json_obj)
model_obj["model_specs"] = a_hub_specs
converted_model_spec = LLMFamilyV1.parse_obj(model_obj)
_add_model_spec(converted_model_spec, hub_families[hub_name])
Loading
Loading