xorbitsai · Minamiyama · Sep 6, 2024 · Sep 7, 2024 · Sep 7, 2024 · Sep 7, 2024
diff --git a/doc/source/gen_docs.py b/doc/source/gen_docs.py
@@ -46,17 +46,15 @@ def get_metrics_from_url(metrics_url):
         })
     return result
 
+
 def main():
-    template_dir = '../templates' 
+    template_dir = '../templates'
     env = Environment(loader=FileSystemLoader(template_dir))
 
     with open('../../xinference/model/llm/llm_family.json', 'r') as model_file:
         models = json.load(model_file)
 
-        model_by_names = { m['model_name']: m for m in models}
-        model_scope_file = open('../../xinference/model/llm/llm_family_modelscope.json')
-        models_modelscope = json.load(model_scope_file)
-        model_by_names_modelscope = { m['model_name']: m for m in models_modelscope}
+        model_by_names = {m['model_name']: m for m in models}
 
         sorted_models = []
         output_dir = './models/builtin/llm'
@@ -69,10 +67,16 @@ def main():
             sorted_models.append(model)
 
             for model_spec in model['model_specs']:
-                model_spec['model_hubs'] = [{                                                                  
-                    'name': MODEL_HUB_HUGGING_FACE, 
-                    'url': f"https://huggingface.co/{model_spec['model_id']}"
+                original_model_hubs = model_spec['model_hubs']
+                model_spec['model_hubs'] = [{
+                    'name': MODEL_HUB_HUGGING_FACE,
+                    'url': f"https://huggingface.co/{original_model_hubs['huggingface']['model_id']}"
                 }]
+                if 'modelscope' in original_model_hubs:
+                    model_spec['model_hubs'].append({
+                        'name': MODEL_HUB_MODELSCOPE,
+                        'url': f"https://modelscope.cn/models/{original_model_hubs['modelscope']['model_id']}"
+                    })
 
                 # model engines
                 engines = []
@@ -90,22 +94,6 @@ def main():
                             engines.append(engine)
                 model_spec['engines'] = sorted(list(set(engines)), reverse=True)
 
-            # manual merge
-            if model_name in model_by_names_modelscope.keys():
-
-                def get_unique_id(spec):
-                    return spec['model_format'] + '-' + str(spec['model_size_in_billions'])
-
-                model_by_ids_modelscope = {get_unique_id(s) : s for s in model_by_names_modelscope[model_name]['model_specs']}
-
-                for model_spec in model['model_specs']:
-                    spec_id = get_unique_id(model_spec)
-                    if spec_id in model_by_ids_modelscope.keys():
-                        model_spec['model_hubs'].append({
-                            'name': MODEL_HUB_MODELSCOPE,
-                            'url': f"https://modelscope.cn/models/{model_by_ids_modelscope[spec_id]['model_id']}"
-                        })
-
             rendered = env.get_template('llm.rst.jinja').render(model)
             output_file_name = f"{model['model_name'].lower()}.rst"
             if output_file_name in current_files:
@@ -125,16 +113,14 @@ def get_unique_id(spec):
             rendered_index = env.get_template('llm_index.rst.jinja').render(models=sorted_models)
             file.write(rendered_index)
 
-
     with open('../../xinference/model/embedding/model_spec.json', 'r') as file:
         models = json.load(file)
 
-        model_by_names = { m['model_name']: m for m in models}
+        model_by_names = {m['model_name']: m for m in models}
         model_scope_file = open('../../xinference/model/embedding/model_spec_modelscope.json')
         models_modelscope = json.load(model_scope_file)
 
-        model_by_names_modelscope = { s['model_name']: s for s in models_modelscope}
-
+        model_by_names_modelscope = {s['model_name']: s for s in models_modelscope}
 
         sorted_models = []
         output_dir = './models/builtin/embedding'
@@ -147,7 +133,7 @@ def get_unique_id(spec):
 
             model['model_hubs'] = [
                 {
-                    'name': MODEL_HUB_HUGGING_FACE, 
+                    'name': MODEL_HUB_HUGGING_FACE,
                     'url': f"https://huggingface.co/{model['model_id']}"
                 }
             ]
@@ -167,7 +153,7 @@ def get_unique_id(spec):
                 print(output_file_path)
 
         index_file_path = os.path.join(output_dir, "index.rst")
-        with open(index_file_path, "w") as file:            
+        with open(index_file_path, "w") as file:
             rendered_index = env.get_template('embedding_index.rst.jinja').render(models=sorted_models)
             file.write(rendered_index)
 
@@ -186,7 +172,7 @@ def get_unique_id(spec):
 
         index_file_path = os.path.join(output_dir, "index.rst")
         with open(index_file_path, "w") as file:
-            
+
             rendered_index = env.get_template('rerank_index.rst.jinja').render(models=sorted_models)
             file.write(rendered_index)
 

diff --git a/xinference/model/llm/__init__.py b/xinference/model/llm/__init__.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import codecs
+import copy
 import json
 import os
 import warnings
@@ -195,85 +196,7 @@ def _install():
     SUPPORTED_ENGINES["MLX"] = MLX_CLASSES
     SUPPORTED_ENGINES["LMDEPLOY"] = LMDEPLOY_CLASSES
 
-    json_path = os.path.join(
-        os.path.dirname(os.path.abspath(__file__)), "llm_family.json"
-    )
-    for json_obj in json.load(codecs.open(json_path, "r", encoding="utf-8")):
-        model_spec = LLMFamilyV1.parse_obj(json_obj)
-        BUILTIN_LLM_FAMILIES.append(model_spec)
-
-        # register chat_template
-        if "chat" in model_spec.model_ability and isinstance(
-            model_spec.chat_template, str
-        ):
-            # note that the key is the model name,
-            # since there are multiple representations of the same prompt style name in json.
-            BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
-                "chat_template": model_spec.chat_template,
-                "stop_token_ids": model_spec.stop_token_ids,
-                "stop": model_spec.stop,
-            }
-        # register model family
-        if "chat" in model_spec.model_ability:
-            BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
-        else:
-            BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
-        if "tools" in model_spec.model_ability:
-            BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
-
-    modelscope_json_path = os.path.join(
-        os.path.dirname(os.path.abspath(__file__)), "llm_family_modelscope.json"
-    )
-    for json_obj in json.load(codecs.open(modelscope_json_path, "r", encoding="utf-8")):
-        model_spec = LLMFamilyV1.parse_obj(json_obj)
-        BUILTIN_MODELSCOPE_LLM_FAMILIES.append(model_spec)
-
-        # register prompt style, in case that we have something missed
-        # if duplicated with huggingface json, keep it as the huggingface style
-        if (
-            "chat" in model_spec.model_ability
-            and isinstance(model_spec.chat_template, str)
-            and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
-        ):
-            BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
-                "chat_template": model_spec.chat_template,
-                "stop_token_ids": model_spec.stop_token_ids,
-                "stop": model_spec.stop,
-            }
-        # register model family
-        if "chat" in model_spec.model_ability:
-            BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
-        else:
-            BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
-        if "tools" in model_spec.model_ability:
-            BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
-
-    csghub_json_path = os.path.join(
-        os.path.dirname(os.path.abspath(__file__)), "llm_family_csghub.json"
-    )
-    for json_obj in json.load(codecs.open(csghub_json_path, "r", encoding="utf-8")):
-        model_spec = LLMFamilyV1.parse_obj(json_obj)
-        BUILTIN_CSGHUB_LLM_FAMILIES.append(model_spec)
-
-        # register prompt style, in case that we have something missed
-        # if duplicated with huggingface json, keep it as the huggingface style
-        if (
-            "chat" in model_spec.model_ability
-            and isinstance(model_spec.chat_template, str)
-            and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
-        ):
-            BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
-                "chat_template": model_spec.chat_template,
-                "stop_token_ids": model_spec.stop_token_ids,
-                "stop": model_spec.stop,
-            }
-        # register model family
-        if "chat" in model_spec.model_ability:
-            BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
-        else:
-            BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
-        if "tools" in model_spec.model_ability:
-            BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
+    _load_from_json_new("llm_family.json")
 
     for llm_specs in [
         BUILTIN_LLM_FAMILIES,
@@ -298,3 +221,63 @@ def _install():
     # register model description
     for ud_llm in get_user_defined_llm_families():
         LLM_MODEL_DESCRIPTIONS.update(generate_llm_description(ud_llm))
+
+
+def _add_model_spec(model_spec: LLMFamilyV1, LLM_FAMILIES: list[LLMFamilyV1]):
+    LLM_FAMILIES.append(model_spec)
+
+    # register chat_template
+    # register prompt style, in case that we have something missed
+    # if duplicated with huggingface json, keep it as the huggingface style
+    if (
+        "chat" in model_spec.model_ability
+        and isinstance(model_spec.chat_template, str)
+        and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
+    ):
+        # note that the key is the model name,
+        # since there are multiple representations of the same prompt style name in json.
+        BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
+            "chat_template": model_spec.chat_template,
+            "stop_token_ids": model_spec.stop_token_ids,
+            "stop": model_spec.stop,
+        }
+    # register model family
+    if "chat" in model_spec.model_ability:
+        BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
+    else:
+        BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
+    if "tools" in model_spec.model_ability:
+        BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
+
+
+def _load_from_json_new(file_name: str):
+    json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), file_name)
+    for json_obj in json.load(codecs.open(json_path, "r", encoding="utf-8")):
+        model_specs: list[dict] = json_obj["model_specs"]
+        hub_names = ["huggingface", "modelscope", "csghub"]
+        hub_specs: dict = {}
+        for hub_name in hub_names:
+            hub_specs[hub_name] = []
+
+        hub_families = {
+            "huggingface": BUILTIN_LLM_FAMILIES,
+            "modelscope": BUILTIN_MODELSCOPE_LLM_FAMILIES,
+            "csghub": BUILTIN_CSGHUB_LLM_FAMILIES,
+        }
+
+        for model_spec in model_specs:
+            for hub_name in hub_names:
+                if hub_name in model_spec["model_hubs"]:
+                    hub_spec = copy.deepcopy(model_spec)
+                    hub_spec.update(model_spec["model_hubs"][hub_name])
+                    hub_spec["model_hub"] = hub_name
+                    del hub_spec["model_hubs"]
+                    hub_specs[hub_name].append(hub_spec)
+
+        for hub_name in hub_names:
+            a_hub_specs = hub_specs[hub_name]
+            if len(a_hub_specs) > 0:
+                model_obj = copy.deepcopy(json_obj)
+                model_obj["model_specs"] = a_hub_specs
+                converted_model_spec = LLMFamilyV1.parse_obj(model_obj)
+                _add_model_spec(converted_model_spec, hub_families[hub_name])