From 40bcbcf8bb0a7a5d617543af24794cdf800741cd Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Mon, 23 Dec 2024 21:48:14 +0800 Subject: [PATCH] refactor notebook and add vllm --- .../{ => notebook}/DiT_ImageNet_Demo.ipynb | 0 .../{ => notebook}/Latte-BestPractice.ipynb | 0 .../{ => notebook}/Omnigen_demo.ipynb | 0 .../{ => notebook}/SiT_ImageNet_Demo.ipynb | 0 .../{ => notebook}/UViT_ImageNet_demo.ipynb | 0 .../{ => notebook}/ViT-BestPractice.ipynb | 0 .../{ => notebook}/ViViT-BestPractice.ipynb | 0 AIGC-tutorial/{ => notebook}/ViViT-demo.ipynb | 0 .../comfyui_manager_animatediff.ipynb | 0 .../{ => notebook}/comfyui_modelscope.ipynb | 0 .../{ => notebook}/patch-BestPractice.ipynb | 0 ...344\275\263\345\256\236\350\267\265.ipynb" | 0 .../{ => notebook}/OpenVino-llm-chatbot.ipynb | 0 .../RAG+Rerank+Llamaindex.ipynb | 0 LLM-tutorial/{ => notebook}/nexa.ipynb | 0 LLM-tutorial/notebook/vllm.ipynb | 587 ++++++++++++++++++ ...351\233\206\350\256\255\347\273\203.ipynb" | 0 .../notebook/\350\256\255\347\273\203.ipynb" | 0 18 files changed, 587 insertions(+) rename AIGC-tutorial/{ => notebook}/DiT_ImageNet_Demo.ipynb (100%) rename AIGC-tutorial/{ => notebook}/Latte-BestPractice.ipynb (100%) rename AIGC-tutorial/{ => notebook}/Omnigen_demo.ipynb (100%) rename AIGC-tutorial/{ => notebook}/SiT_ImageNet_Demo.ipynb (100%) rename AIGC-tutorial/{ => notebook}/UViT_ImageNet_demo.ipynb (100%) rename AIGC-tutorial/{ => notebook}/ViT-BestPractice.ipynb (100%) rename AIGC-tutorial/{ => notebook}/ViViT-BestPractice.ipynb (100%) rename AIGC-tutorial/{ => notebook}/ViViT-demo.ipynb (100%) rename AIGC-tutorial/{ => notebook}/comfyui_manager_animatediff.ipynb (100%) rename AIGC-tutorial/{ => notebook}/comfyui_modelscope.ipynb (100%) rename AIGC-tutorial/{ => notebook}/patch-BestPractice.ipynb (100%) rename "AIGC-tutorial/sd-webui\346\234\200\344\275\263\345\256\236\350\267\265.ipynb" => "AIGC-tutorial/notebook/sd-webui\346\234\200\344\275\263\345\256\236\350\267\265.ipynb" (100%) rename LLM-tutorial/{ => notebook}/OpenVino-llm-chatbot.ipynb (100%) rename LLM-tutorial/{ => notebook}/RAG+Rerank+Llamaindex.ipynb (100%) rename LLM-tutorial/{ => notebook}/nexa.ipynb (100%) create mode 100644 LLM-tutorial/notebook/vllm.ipynb rename "LLM-tutorial/\345\205\250\346\265\201\347\250\213\347\237\245\344\271\216\346\225\260\346\215\256\351\233\206\350\256\255\347\273\203.ipynb" => "LLM-tutorial/notebook/\345\205\250\346\265\201\347\250\213\347\237\245\344\271\216\346\225\260\346\215\256\351\233\206\350\256\255\347\273\203.ipynb" (100%) rename "LLM-tutorial/P.\350\256\255\347\273\203.ipynb" => "LLM-tutorial/notebook/\350\256\255\347\273\203.ipynb" (100%) diff --git a/AIGC-tutorial/DiT_ImageNet_Demo.ipynb b/AIGC-tutorial/notebook/DiT_ImageNet_Demo.ipynb similarity index 100% rename from AIGC-tutorial/DiT_ImageNet_Demo.ipynb rename to AIGC-tutorial/notebook/DiT_ImageNet_Demo.ipynb diff --git a/AIGC-tutorial/Latte-BestPractice.ipynb b/AIGC-tutorial/notebook/Latte-BestPractice.ipynb similarity index 100% rename from AIGC-tutorial/Latte-BestPractice.ipynb rename to AIGC-tutorial/notebook/Latte-BestPractice.ipynb diff --git a/AIGC-tutorial/Omnigen_demo.ipynb b/AIGC-tutorial/notebook/Omnigen_demo.ipynb similarity index 100% rename from AIGC-tutorial/Omnigen_demo.ipynb rename to AIGC-tutorial/notebook/Omnigen_demo.ipynb diff --git a/AIGC-tutorial/SiT_ImageNet_Demo.ipynb b/AIGC-tutorial/notebook/SiT_ImageNet_Demo.ipynb similarity index 100% rename from AIGC-tutorial/SiT_ImageNet_Demo.ipynb rename to AIGC-tutorial/notebook/SiT_ImageNet_Demo.ipynb diff --git a/AIGC-tutorial/UViT_ImageNet_demo.ipynb b/AIGC-tutorial/notebook/UViT_ImageNet_demo.ipynb similarity index 100% rename from AIGC-tutorial/UViT_ImageNet_demo.ipynb rename to AIGC-tutorial/notebook/UViT_ImageNet_demo.ipynb diff --git a/AIGC-tutorial/ViT-BestPractice.ipynb b/AIGC-tutorial/notebook/ViT-BestPractice.ipynb similarity index 100% rename from AIGC-tutorial/ViT-BestPractice.ipynb rename to AIGC-tutorial/notebook/ViT-BestPractice.ipynb diff --git a/AIGC-tutorial/ViViT-BestPractice.ipynb b/AIGC-tutorial/notebook/ViViT-BestPractice.ipynb similarity index 100% rename from AIGC-tutorial/ViViT-BestPractice.ipynb rename to AIGC-tutorial/notebook/ViViT-BestPractice.ipynb diff --git a/AIGC-tutorial/ViViT-demo.ipynb b/AIGC-tutorial/notebook/ViViT-demo.ipynb similarity index 100% rename from AIGC-tutorial/ViViT-demo.ipynb rename to AIGC-tutorial/notebook/ViViT-demo.ipynb diff --git a/AIGC-tutorial/comfyui_manager_animatediff.ipynb b/AIGC-tutorial/notebook/comfyui_manager_animatediff.ipynb similarity index 100% rename from AIGC-tutorial/comfyui_manager_animatediff.ipynb rename to AIGC-tutorial/notebook/comfyui_manager_animatediff.ipynb diff --git a/AIGC-tutorial/comfyui_modelscope.ipynb b/AIGC-tutorial/notebook/comfyui_modelscope.ipynb similarity index 100% rename from AIGC-tutorial/comfyui_modelscope.ipynb rename to AIGC-tutorial/notebook/comfyui_modelscope.ipynb diff --git a/AIGC-tutorial/patch-BestPractice.ipynb b/AIGC-tutorial/notebook/patch-BestPractice.ipynb similarity index 100% rename from AIGC-tutorial/patch-BestPractice.ipynb rename to AIGC-tutorial/notebook/patch-BestPractice.ipynb diff --git "a/AIGC-tutorial/sd-webui\346\234\200\344\275\263\345\256\236\350\267\265.ipynb" "b/AIGC-tutorial/notebook/sd-webui\346\234\200\344\275\263\345\256\236\350\267\265.ipynb" similarity index 100% rename from "AIGC-tutorial/sd-webui\346\234\200\344\275\263\345\256\236\350\267\265.ipynb" rename to "AIGC-tutorial/notebook/sd-webui\346\234\200\344\275\263\345\256\236\350\267\265.ipynb" diff --git a/LLM-tutorial/OpenVino-llm-chatbot.ipynb b/LLM-tutorial/notebook/OpenVino-llm-chatbot.ipynb similarity index 100% rename from LLM-tutorial/OpenVino-llm-chatbot.ipynb rename to LLM-tutorial/notebook/OpenVino-llm-chatbot.ipynb diff --git a/LLM-tutorial/RAG+Rerank+Llamaindex.ipynb b/LLM-tutorial/notebook/RAG+Rerank+Llamaindex.ipynb similarity index 100% rename from LLM-tutorial/RAG+Rerank+Llamaindex.ipynb rename to LLM-tutorial/notebook/RAG+Rerank+Llamaindex.ipynb diff --git a/LLM-tutorial/nexa.ipynb b/LLM-tutorial/notebook/nexa.ipynb similarity index 100% rename from LLM-tutorial/nexa.ipynb rename to LLM-tutorial/notebook/nexa.ipynb diff --git a/LLM-tutorial/notebook/vllm.ipynb b/LLM-tutorial/notebook/vllm.ipynb new file mode 100644 index 0000000..ff32090 --- /dev/null +++ b/LLM-tutorial/notebook/vllm.ipynb @@ -0,0 +1,587 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "014d6a9c-fe02-47d0-994e-b1e725f0c54b", + "metadata": {}, + "source": [ + "## vLLM推理框架\n", + "\n", + "vLLM推理框架是一种高效的推理工具,旨在加速和优化大规模语言模型的推理过程。它通过以下几个关键技术实现了这一目标:\n", + "\n", + "1. **PagedAttention**: PagedAttention通过将KVCache存储在块状物理显存中,并使用逻辑显存对物理显存进行复用的技术,该技术可以增加显存寻址的连续性,并降低重复显存的数量。\n", + "2. **异步执行**: vLLM框架支持异步执行,这意味着可以在等待某些计算结果的同时,继续进行其他计算任务,从而提高整体推理效率。\n", + "3. **Continuous Batching**: 在多batch推理中,一般伴随着短sequence生成完成后等待长sequence完成的padding问题,这些padding不仅占用了额外内存,且占用了生成时间,而通过将新的sequence填充到短sequence后面会让生成时间大大缩短。\n", + "\n", + "vLLM框架支持了大部分的纯文本LLM,部分多模态LLM,以及部分GPTQ和AWQ量化模型。" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "4d80b8f0-2a31-4315-8bbb-2b368800289c", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-23T13:25:44.378340Z", + "iopub.status.busy": "2024-12-23T13:25:44.377842Z", + "iopub.status.idle": "2024-12-23T13:25:48.659961Z", + "shell.execute_reply": "2024-12-23T13:25:48.659028Z", + "shell.execute_reply.started": "2024-12-23T13:25:44.378314Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Looking in indexes: https://mirrors.aliyun.com/pypi/simple\n", + "Requirement already satisfied: vllm in /usr/local/lib/python3.10/site-packages (0.6.3.post1)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.10/site-packages (from vllm) (6.1.0)\n", + "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/site-packages (from vllm) (0.2.0)\n", + "Requirement already satisfied: numpy<2.0.0 in /usr/local/lib/python3.10/site-packages (from vllm) (1.26.4)\n", + "Requirement already satisfied: requests>=2.26.0 in /usr/local/lib/python3.10/site-packages (from vllm) (2.32.3)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/site-packages (from vllm) (4.67.1)\n", + "Requirement already satisfied: py-cpuinfo in /usr/local/lib/python3.10/site-packages (from vllm) (9.0.0)\n", + "Requirement already satisfied: transformers>=4.45.2 in /usr/local/lib/python3.10/site-packages (from vllm) (4.46.3)\n", + "Requirement already satisfied: tokenizers>=0.19.1 in /usr/local/lib/python3.10/site-packages (from vllm) (0.20.3)\n", + "Requirement already satisfied: protobuf in /usr/local/lib/python3.10/site-packages (from vllm) (5.29.1)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/site-packages (from vllm) (3.11.10)\n", + "Requirement already satisfied: openai>=1.40.0 in /usr/local/lib/python3.10/site-packages (from vllm) (1.57.0)\n", + "Requirement already satisfied: uvicorn[standard] in /usr/local/lib/python3.10/site-packages (from vllm) (0.32.1)\n", + "Requirement already satisfied: pydantic>=2.9 in /usr/local/lib/python3.10/site-packages (from vllm) (2.10.3)\n", + "Requirement already satisfied: pillow in /usr/local/lib/python3.10/site-packages (from vllm) (10.4.0)\n", + "Requirement already satisfied: prometheus-client>=0.18.0 in /usr/local/lib/python3.10/site-packages (from vllm) (0.21.1)\n", + "Requirement already satisfied: prometheus-fastapi-instrumentator>=7.0.0 in /usr/local/lib/python3.10/site-packages (from vllm) (7.0.0)\n", + "Requirement already satisfied: tiktoken>=0.6.0 in /usr/local/lib/python3.10/site-packages (from vllm) (0.7.0)\n", + "Requirement already satisfied: lm-format-enforcer==0.10.6 in /usr/local/lib/python3.10/site-packages (from vllm) (0.10.6)\n", + "Requirement already satisfied: outlines<0.1,>=0.0.43 in /usr/local/lib/python3.10/site-packages (from vllm) (0.0.46)\n", + "Requirement already satisfied: typing-extensions>=4.10 in /usr/local/lib/python3.10/site-packages (from vllm) (4.12.2)\n", + "Requirement already satisfied: filelock>=3.10.4 in /usr/local/lib/python3.10/site-packages (from vllm) (3.16.1)\n", + "Requirement already satisfied: partial-json-parser in /usr/local/lib/python3.10/site-packages (from vllm) (0.2.1.1.post4)\n", + "Requirement already satisfied: pyzmq in /usr/local/lib/python3.10/site-packages (from vllm) (26.2.0)\n", + "Requirement already satisfied: msgspec in /usr/local/lib/python3.10/site-packages (from vllm) (0.18.6)\n", + "Requirement already satisfied: gguf==0.10.0 in /usr/local/lib/python3.10/site-packages (from vllm) (0.10.0)\n", + "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.10/site-packages (from vllm) (8.5.0)\n", + "Requirement already satisfied: mistral-common>=1.4.4 in /usr/local/lib/python3.10/site-packages (from mistral-common[opencv]>=1.4.4->vllm) (1.5.1)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/site-packages (from vllm) (6.0.2)\n", + "Requirement already satisfied: einops in /usr/local/lib/python3.10/site-packages (from vllm) (0.8.0)\n", + "Requirement already satisfied: compressed-tensors==0.6.0 in /usr/local/lib/python3.10/site-packages (from vllm) (0.6.0)\n", + "Requirement already satisfied: ray>=2.9 in /usr/local/lib/python3.10/site-packages (from vllm) (2.40.0)\n", + "Requirement already satisfied: nvidia-ml-py in /usr/local/lib/python3.10/site-packages (from vllm) (12.560.30)\n", + "Requirement already satisfied: torch==2.4.0 in /usr/local/lib/python3.10/site-packages (from vllm) (2.4.0)\n", + "Requirement already satisfied: torchvision==0.19 in /usr/local/lib/python3.10/site-packages (from vllm) (0.19.0)\n", + "Requirement already satisfied: xformers==0.0.27.post2 in /usr/local/lib/python3.10/site-packages (from vllm) (0.0.27.post2)\n", + "Requirement already satisfied: fastapi!=0.113.*,!=0.114.0,>=0.107.0 in /usr/local/lib/python3.10/site-packages (from vllm) (0.115.6)\n", + "Requirement already satisfied: interegular>=0.3.2 in /usr/local/lib/python3.10/site-packages (from lm-format-enforcer==0.10.6->vllm) (0.3.3)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.10/site-packages (from lm-format-enforcer==0.10.6->vllm) (24.2)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/site-packages (from torch==2.4.0->vllm) (1.13.3)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/site-packages (from torch==2.4.0->vllm) (3.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/site-packages (from torch==2.4.0->vllm) (3.1.4)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/site-packages (from torch==2.4.0->vllm) (2024.6.1)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/site-packages (from torch==2.4.0->vllm) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/site-packages (from torch==2.4.0->vllm) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/site-packages (from torch==2.4.0->vllm) (12.1.105)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.10/site-packages (from torch==2.4.0->vllm) (9.1.0.70)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/site-packages (from torch==2.4.0->vllm) (12.1.3.1)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/site-packages (from torch==2.4.0->vllm) (11.0.2.54)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/site-packages (from torch==2.4.0->vllm) (10.3.2.106)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/site-packages (from torch==2.4.0->vllm) (11.4.5.107)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/site-packages (from torch==2.4.0->vllm) (12.1.0.106)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.20.5 in /usr/local/lib/python3.10/site-packages (from torch==2.4.0->vllm) (2.20.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/site-packages (from torch==2.4.0->vllm) (12.1.105)\n", + "Requirement already satisfied: triton==3.0.0 in /usr/local/lib/python3.10/site-packages (from torch==2.4.0->vllm) (3.0.0)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch==2.4.0->vllm) (12.6.85)\n", + "Requirement already satisfied: starlette<0.42.0,>=0.40.0 in /usr/local/lib/python3.10/site-packages (from fastapi!=0.113.*,!=0.114.0,>=0.107.0->vllm) (0.41.3)\n", + "Requirement already satisfied: jsonschema<5.0.0,>=4.21.1 in /usr/local/lib/python3.10/site-packages (from mistral-common>=1.4.4->mistral-common[opencv]>=1.4.4->vllm) (4.23.0)\n", + "Requirement already satisfied: opencv-python-headless<5.0.0,>=4.0.0 in /usr/local/lib/python3.10/site-packages (from mistral-common[opencv]>=1.4.4->vllm) (4.10.0.84)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/site-packages (from openai>=1.40.0->vllm) (4.7.0)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.10/site-packages (from openai>=1.40.0->vllm) (1.9.0)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.10/site-packages (from openai>=1.40.0->vllm) (0.27.2)\n", + "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.10/site-packages (from openai>=1.40.0->vllm) (0.8.0)\n", + "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/site-packages (from openai>=1.40.0->vllm) (1.3.1)\n", + "Requirement already satisfied: lark in /usr/local/lib/python3.10/site-packages (from outlines<0.1,>=0.0.43->vllm) (1.2.2)\n", + "Requirement already satisfied: nest-asyncio in /usr/local/lib/python3.10/site-packages (from outlines<0.1,>=0.0.43->vllm) (1.6.0)\n", + "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.10/site-packages (from outlines<0.1,>=0.0.43->vllm) (3.1.0)\n", + "Requirement already satisfied: diskcache in /usr/local/lib/python3.10/site-packages (from outlines<0.1,>=0.0.43->vllm) (5.6.3)\n", + "Requirement already satisfied: numba in /usr/local/lib/python3.10/site-packages (from outlines<0.1,>=0.0.43->vllm) (0.60.0)\n", + "Requirement already satisfied: referencing in /usr/local/lib/python3.10/site-packages (from outlines<0.1,>=0.0.43->vllm) (0.35.1)\n", + "Requirement already satisfied: datasets in /usr/local/lib/python3.10/site-packages (from outlines<0.1,>=0.0.43->vllm) (3.0.1)\n", + "Requirement already satisfied: pycountry in /usr/local/lib/python3.10/site-packages (from outlines<0.1,>=0.0.43->vllm) (24.6.1)\n", + "Requirement already satisfied: pyairports in /usr/local/lib/python3.10/site-packages (from outlines<0.1,>=0.0.43->vllm) (2.1.1)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/site-packages (from pydantic>=2.9->vllm) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.1 in /usr/local/lib/python3.10/site-packages (from pydantic>=2.9->vllm) (2.27.1)\n", + "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.10/site-packages (from ray>=2.9->vllm) (8.1.7)\n", + "Requirement already satisfied: msgpack<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/site-packages (from ray>=2.9->vllm) (1.1.0)\n", + "Requirement already satisfied: aiosignal in /usr/local/lib/python3.10/site-packages (from ray>=2.9->vllm) (1.3.1)\n", + "Requirement already satisfied: frozenlist in /usr/local/lib/python3.10/site-packages (from ray>=2.9->vllm) (1.5.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/site-packages (from requests>=2.26.0->vllm) (3.4.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/site-packages (from requests>=2.26.0->vllm) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/site-packages (from requests>=2.26.0->vllm) (2.2.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/site-packages (from requests>=2.26.0->vllm) (2024.8.30)\n", + "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/site-packages (from tiktoken>=0.6.0->vllm) (2024.11.6)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/site-packages (from tokenizers>=0.19.1->vllm) (0.25.2)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/site-packages (from transformers>=4.45.2->vllm) (0.4.5)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/site-packages (from aiohttp->vllm) (2.4.4)\n", + "Requirement already satisfied: async-timeout<6.0,>=4.0 in /usr/local/lib/python3.10/site-packages (from aiohttp->vllm) (4.0.3)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/site-packages (from aiohttp->vllm) (24.2.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/site-packages (from aiohttp->vllm) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/site-packages (from aiohttp->vllm) (0.2.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.10/site-packages (from aiohttp->vllm) (1.18.3)\n", + "Requirement already satisfied: zipp>=3.20 in /usr/local/lib/python3.10/site-packages (from importlib-metadata->vllm) (3.21.0)\n", + "Requirement already satisfied: h11>=0.8 in /usr/local/lib/python3.10/site-packages (from uvicorn[standard]->vllm) (0.14.0)\n", + "Requirement already satisfied: httptools>=0.6.3 in /usr/local/lib/python3.10/site-packages (from uvicorn[standard]->vllm) (0.6.4)\n", + "Requirement already satisfied: python-dotenv>=0.13 in /usr/local/lib/python3.10/site-packages (from uvicorn[standard]->vllm) (1.0.1)\n", + "Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /usr/local/lib/python3.10/site-packages (from uvicorn[standard]->vllm) (0.21.0)\n", + "Requirement already satisfied: watchfiles>=0.13 in /usr/local/lib/python3.10/site-packages (from uvicorn[standard]->vllm) (1.0.0)\n", + "Requirement already satisfied: websockets>=10.4 in /usr/local/lib/python3.10/site-packages (from uvicorn[standard]->vllm) (11.0.3)\n", + "Requirement already satisfied: exceptiongroup>=1.0.2 in /usr/local/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai>=1.40.0->vllm) (1.2.2)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/site-packages (from httpx<1,>=0.23.0->openai>=1.40.0->vllm) (1.0.7)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/site-packages (from jsonschema<5.0.0,>=4.21.1->mistral-common>=1.4.4->mistral-common[opencv]>=1.4.4->vllm) (2024.10.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/site-packages (from jsonschema<5.0.0,>=4.21.1->mistral-common>=1.4.4->mistral-common[opencv]>=1.4.4->vllm) (0.22.3)\n", + "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/site-packages (from datasets->outlines<0.1,>=0.0.43->vllm) (18.1.0)\n", + "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.10/site-packages (from datasets->outlines<0.1,>=0.0.43->vllm) (0.3.8)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/site-packages (from datasets->outlines<0.1,>=0.0.43->vllm) (2.2.3)\n", + "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/site-packages (from datasets->outlines<0.1,>=0.0.43->vllm) (3.5.0)\n", + "Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/site-packages (from datasets->outlines<0.1,>=0.0.43->vllm) (0.70.16)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/site-packages (from jinja2->torch==2.4.0->vllm) (2.1.5)\n", + "Requirement already satisfied: llvmlite<0.44,>=0.43.0dev0 in /usr/local/lib/python3.10/site-packages (from numba->outlines<0.1,>=0.0.43->vllm) (0.43.0)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/site-packages (from sympy->torch==2.4.0->vllm) (1.3.0)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/site-packages (from pandas->datasets->outlines<0.1,>=0.0.43->vllm) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/site-packages (from pandas->datasets->outlines<0.1,>=0.0.43->vllm) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/site-packages (from pandas->datasets->outlines<0.1,>=0.0.43->vllm) (2024.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets->outlines<0.1,>=0.0.43->vllm) (1.17.0)\n", + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", + "\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "# 安装vLLM只需要执行下面的命令\n", + "!pip install vllm" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "0cd854f3-d877-4228-8ae3-ebbed6929f67", + "metadata": { + "ExecutionIndicator": { + "show": true + }, + "execution": { + "iopub.execute_input": "2024-12-23T13:25:55.961095Z", + "iopub.status.busy": "2024-12-23T13:25:55.960554Z", + "iopub.status.idle": "2024-12-23T13:25:56.090921Z", + "shell.execute_reply": "2024-12-23T13:25:56.090088Z", + "shell.execute_reply.started": "2024-12-23T13:25:55.961071Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# 使用下面的环境变量使用ModelScope社区来进行下载提速\n", + "!export VLLM_USE_MODELSCOPE=1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "354fd96b-9c82-4607-a3a9-88ca63c8e558", + "metadata": {}, + "outputs": [], + "source": [ + "需要注意的是,vLLM会预占用大量显存来存储KVCache,显存占用越大速度提升越高。如果需要控制显存占用的量,请使用下面的参数:\n", + "\n", + "- gpu_memory_utilization: 从0-1的float小数,默认为0.9,代表了额外显存的占用量\n", + "\n", + "除此之外,还有下面的参数经常被用到:\n", + "\n", + "- tensor_parallel_size tensor并行数量,如果你有多个显卡可以用这个参数来拆分模型\n", + "- pipeline_parallel_size pipeline并行数量\n", + "- max_num_seqs 并行处理的最大sequence数量\n", + "\n", + "更多分布式推理的参数请查看vLLM的官方文档:https://docs.vllm.ai/en/latest/serving/distributed_serving.html" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e55cfa34-88da-45b1-bb6a-31a83c05437e", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-23T13:40:58.371721Z", + "iopub.status.busy": "2024-12-23T13:40:58.371110Z", + "iopub.status.idle": "2024-12-23T13:41:41.311904Z", + "shell.execute_reply": "2024-12-23T13:41:41.311344Z", + "shell.execute_reply.started": "2024-12-23T13:40:58.371674Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "2024-12-23 21:41:01,356\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading Model to directory: /mnt/workspace/.cache/modelscope/hub/Qwen/Qwen2.5-1.5B-Instruct\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-12-23 21:41:02,605 - modelscope - INFO - Target directory already exists, skipping creation.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading Model to directory: /mnt/workspace/.cache/modelscope/hub/Qwen/Qwen2.5-1.5B-Instruct\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-12-23 21:41:03,056 - modelscope - INFO - Target directory already exists, skipping creation.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO 12-23 21:41:07 llm_engine.py:237] Initializing an LLM engine (v0.6.3.post1) with config: model='Qwen/Qwen2.5-1.5B-Instruct', speculative_config=None, tokenizer='Qwen/Qwen2.5-1.5B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=32768, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=Qwen/Qwen2.5-1.5B-Instruct, num_scheduler_steps=1, chunked_prefill_enabled=False multi_step_stream_outputs=True, enable_prefix_caching=False, use_async_output_proc=True, use_cached_outputs=False, mm_processor_kwargs=None)\n", + "Downloading Model to directory: /mnt/workspace/.cache/modelscope/hub/Qwen/Qwen2.5-1.5B-Instruct\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-12-23 21:41:07,591 - modelscope - INFO - Target directory already exists, skipping creation.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading Model to directory: /mnt/workspace/.cache/modelscope/hub/Qwen/Qwen2.5-1.5B-Instruct\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-12-23 21:41:18,664 - modelscope - INFO - Target directory already exists, skipping creation.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading Model to directory: /mnt/workspace/.cache/modelscope/hub/Qwen/Qwen2.5-1.5B-Instruct\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-12-23 21:41:19,175 - modelscope - INFO - Target directory already exists, skipping creation.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO 12-23 21:41:19 model_runner.py:1056] Starting to load model Qwen/Qwen2.5-1.5B-Instruct...\n", + "Downloading Model to directory: /mnt/workspace/.cache/modelscope/hub/Qwen/Qwen2.5-1.5B-Instruct\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-12-23 21:41:20,154 - modelscope - INFO - Target directory already exists, skipping creation.\n", + "Loading safetensors checkpoint shards: 0% Completed | 0/1 [00:00=3.5.0 in /usr/local/lib/python3.10/site-packages (from openai) (4.7.0)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.10/site-packages (from openai) (1.9.0)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.10/site-packages (from openai) (0.27.2)\n", + "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.10/site-packages (from openai) (0.8.0)\n", + "Requirement already satisfied: pydantic<3,>=1.9.0 in /usr/local/lib/python3.10/site-packages (from openai) (2.10.3)\n", + "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/site-packages (from openai) (1.3.1)\n", + "Requirement already satisfied: tqdm>4 in /usr/local/lib/python3.10/site-packages (from openai) (4.67.1)\n", + "Requirement already satisfied: typing-extensions<5,>=4.11 in /usr/local/lib/python3.10/site-packages (from openai) (4.12.2)\n", + "Requirement already satisfied: exceptiongroup>=1.0.2 in /usr/local/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai) (1.2.2)\n", + "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai) (3.10)\n", + "Requirement already satisfied: certifi in /usr/local/lib/python3.10/site-packages (from httpx<1,>=0.23.0->openai) (2024.8.30)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/site-packages (from httpx<1,>=0.23.0->openai) (1.0.7)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai) (0.14.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->openai) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.1 in /usr/local/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->openai) (2.27.1)\n", + "Installing collected packages: openai\n", + " Attempting uninstall: openai\n", + " Found existing installation: openai 1.57.0\n", + " Uninstalling openai-1.57.0:\n", + " Successfully uninstalled openai-1.57.0\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "lmdeploy 0.6.2 requires peft<=0.11.1, but you have peft 0.12.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed openai-1.58.1\n", + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", + "\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Chat response: ChatCompletion(id='chat-8a3920662df64279ba847ff1bb0b8907', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why don't scientists trust atoms?\\n\\nBecause they make up everything.\", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=[]), stop_reason=None)], created=1734961117, model='Qwen/Qwen2.5-1.5B-Instruct', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=14, prompt_tokens=24, total_tokens=38, completion_tokens_details=None, prompt_tokens_details=None), prompt_logprobs=None)\n" + ] + } + ], + "source": [ + "# python调用\n", + "!pip install openai -U\n", + "from openai import OpenAI\n", + "# Set OpenAI's API key and API base to use vLLM's API server.\n", + "openai_api_key = \"EMPTY\"\n", + "openai_api_base = \"http://localhost:8000/v1\"\n", + "\n", + "client = OpenAI(\n", + " api_key=openai_api_key,\n", + " base_url=openai_api_base,\n", + ")\n", + "\n", + "chat_response = client.chat.completions.create(\n", + " model=\"Qwen/Qwen2.5-1.5B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n", + " {\"role\": \"user\", \"content\": \"Tell me a joke.\"},\n", + " ]\n", + ")\n", + "print(\"Chat response:\", chat_response)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git "a/LLM-tutorial/\345\205\250\346\265\201\347\250\213\347\237\245\344\271\216\346\225\260\346\215\256\351\233\206\350\256\255\347\273\203.ipynb" "b/LLM-tutorial/notebook/\345\205\250\346\265\201\347\250\213\347\237\245\344\271\216\346\225\260\346\215\256\351\233\206\350\256\255\347\273\203.ipynb" similarity index 100% rename from "LLM-tutorial/\345\205\250\346\265\201\347\250\213\347\237\245\344\271\216\346\225\260\346\215\256\351\233\206\350\256\255\347\273\203.ipynb" rename to "LLM-tutorial/notebook/\345\205\250\346\265\201\347\250\213\347\237\245\344\271\216\346\225\260\346\215\256\351\233\206\350\256\255\347\273\203.ipynb" diff --git "a/LLM-tutorial/P.\350\256\255\347\273\203.ipynb" "b/LLM-tutorial/notebook/\350\256\255\347\273\203.ipynb" similarity index 100% rename from "LLM-tutorial/P.\350\256\255\347\273\203.ipynb" rename to "LLM-tutorial/notebook/\350\256\255\347\273\203.ipynb"