Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add EngineFragment #3120

Open
wants to merge 178 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 159 commits
Commits
Show all changes
178 commits
Select commit Hold shift + click to select a range
a2f2f48
empty file
fzyzcjy Jan 25, 2025
815dbc3
empty class
fzyzcjy Jan 25, 2025
3c0e52f
mv MetricManager
fzyzcjy Jan 25, 2025
65b3a37
fix
fzyzcjy Jan 25, 2025
6ce5236
mv _ReqState
fzyzcjy Jan 25, 2025
7ca0a47
mv GenerationConverter.init
fzyzcjy Jan 25, 2025
b88e450
mv tokenize_request
fzyzcjy Jan 25, 2025
3b8ed7b
simp branch
fzyzcjy Jan 25, 2025
2f47f92
tokenize_requests
fzyzcjy Jan 25, 2025
e21a05e
mv postprocess_response
fzyzcjy Jan 25, 2025
ab5d79a
simp code
fzyzcjy Jan 25, 2025
053c8f4
extract _compute_meta_info
fzyzcjy Jan 25, 2025
02c451c
mv convert_logprob_style etc
fzyzcjy Jan 25, 2025
ccd5e8a
make private
fzyzcjy Jan 25, 2025
ecf5e21
mv GenerationManager.init
fzyzcjy Jan 25, 2025
818f8cd
mv GenerationManager body
fzyzcjy Jan 25, 2025
022eb4f
fix import
fzyzcjy Jan 25, 2025
dc53f8f
mv modelconfig
fzyzcjy Jan 25, 2025
c4f1668
call generation_converter
fzyzcjy Jan 25, 2025
1670ce1
fix metrics
fzyzcjy Jan 25, 2025
905d247
fix err
fzyzcjy Jan 25, 2025
41bee7d
handle tokenizer_manager.generate_request
fzyzcjy Jan 25, 2025
2b3ca96
handle abort_request
fzyzcjy Jan 25, 2025
e293f1f
add field
fzyzcjy Jan 25, 2025
2424cf2
rm empty func
fzyzcjy Jan 25, 2025
422ea33
extract _RequestDumper
fzyzcjy Jan 25, 2025
3e6e363
call setup
fzyzcjy Jan 25, 2025
56dcbd1
call handle_batch_output
fzyzcjy Jan 25, 2025
0c08f30
more tokenizer_manager call generation_manager
fzyzcjy Jan 25, 2025
deec6af
use property
fzyzcjy Jan 25, 2025
43dd4e2
call request_dumper
fzyzcjy Jan 25, 2025
2d09b58
call on_request
fzyzcjy Jan 25, 2025
5701e20
fix minor field names
fzyzcjy Jan 25, 2025
cff89f0
fix more field names
fzyzcjy Jan 25, 2025
ba0f1b1
more
fzyzcjy Jan 25, 2025
4b03255
extract _RequestLogger
fzyzcjy Jan 25, 2025
75dc737
extract logger body
fzyzcjy Jan 25, 2025
4100d60
fix err
fzyzcjy Jan 25, 2025
ba4ad8e
fix field
fzyzcjy Jan 25, 2025
5450fa3
empty package
fzyzcjy Jan 25, 2025
9080d45
fmt
fzyzcjy Jan 25, 2025
291bb24
Merge branch 'feat/generation_manager' into feat/rename_tokenizer_man…
fzyzcjy Jan 25, 2025
b28ca30
empty file
fzyzcjy Jan 25, 2025
e4f2393
mv file
fzyzcjy Jan 25, 2025
15049c2
rename class
fzyzcjy Jan 25, 2025
b6dcf81
fix import
fzyzcjy Jan 25, 2025
4572406
rename class
fzyzcjy Jan 25, 2025
6f01739
rename tokenizer_manager
fzyzcjy Jan 25, 2025
b1932a6
handle max_req_input_len
fzyzcjy Jan 25, 2025
559ecba
fmt
fzyzcjy Jan 25, 2025
dde151f
Merge branch 'feat/generation_manager' into feat/rename_tokenizer_man…
fzyzcjy Jan 25, 2025
fa4d9f6
fmt
fzyzcjy Jan 25, 2025
71c29d9
fmt
fzyzcjy Jan 25, 2025
4d5be79
empty file
fzyzcjy Jan 25, 2025
91c604d
extract detokenizer_manager.init
fzyzcjy Jan 25, 2025
e5992fd
extract
fzyzcjy Jan 25, 2025
7930368
Merge branch 'feat/detokenizer_use_dispatcher' into feat/separate_comm
fzyzcjy Jan 25, 2025
2dd8e8f
mv _request_dispatcher
fzyzcjy Jan 25, 2025
a68c2e6
mv event loop
fzyzcjy Jan 25, 2025
f887f2a
mv run process
fzyzcjy Jan 25, 2025
8d80871
update run process
fzyzcjy Jan 25, 2025
844226d
rm unused port_args
fzyzcjy Jan 25, 2025
14dd204
fmt
fzyzcjy Jan 25, 2025
c700fe8
Merge branch 'feat/detokenizer_use_dispatcher' into feat/separate_comm
fzyzcjy Jan 25, 2025
7e2ca83
update import
fzyzcjy Jan 25, 2025
edb751c
mv run_scheduler_process
fzyzcjy Jan 25, 2025
3939f96
empty class
fzyzcjy Jan 25, 2025
49908f1
mv scheduler.init
fzyzcjy Jan 25, 2025
8ff66b7
update func call
fzyzcjy Jan 25, 2025
de3ee31
field
fzyzcjy Jan 25, 2025
cddd94d
make private
fzyzcjy Jan 25, 2025
6bc0090
mv _process_input_requests
fzyzcjy Jan 25, 2025
c9afaef
call on_generation_output
fzyzcjy Jan 25, 2025
4cd5ec1
mv recv_requests
fzyzcjy Jan 25, 2025
4791588
update run process
fzyzcjy Jan 25, 2025
b54bf6c
extract process_batch
fzyzcjy Jan 25, 2025
f252b0f
fix name
fzyzcjy Jan 25, 2025
2318981
fix field name
fzyzcjy Jan 25, 2025
6f4f2c0
simp args
fzyzcjy Jan 25, 2025
4de6e76
fmt
fzyzcjy Jan 25, 2025
776d35a
fix err
fzyzcjy Jan 25, 2025
b0fa6c0
mv _launch_subprocesses
fzyzcjy Jan 25, 2025
5f073a1
rename
fzyzcjy Jan 25, 2025
ed2e250
fix logger
fzyzcjy Jan 25, 2025
8c8ca3f
fmt
fzyzcjy Jan 25, 2025
40b39b7
empty file
fzyzcjy Jan 25, 2025
7ea2563
cp spmd_orchestrator from old pr
fzyzcjy Jan 25, 2025
15da0ca
cp shutdown from old pr
fzyzcjy Jan 25, 2025
bff551c
cp engine_base
fzyzcjy Jan 25, 2025
970e357
extend base class
fzyzcjy Jan 25, 2025
4182505
cp engine_fragment from old pr
fzyzcjy Jan 25, 2025
2ce8771
cp examples
fzyzcjy Jan 25, 2025
298604b
update ci
fzyzcjy Jan 25, 2025
bda1243
empty
fzyzcjy Jan 25, 2025
d41fdc6
cp test from old pr
fzyzcjy Jan 25, 2025
6863225
fmt
fzyzcjy Jan 25, 2025
77acd55
fix import
fzyzcjy Jan 25, 2025
825db47
fix import
fzyzcjy Jan 25, 2025
d0c3de9
fix err
fzyzcjy Jan 25, 2025
5ab9cda
fix minor
fzyzcjy Jan 25, 2025
c96b014
fmt
fzyzcjy Jan 25, 2025
c80e610
fix import
fzyzcjy Jan 25, 2025
7448fe7
Merge branch 'feat/separate_comm' into feat/engine_fragment_0125
fzyzcjy Jan 25, 2025
a1b434e
fix import
fzyzcjy Jan 25, 2025
f889a6c
fix import
fzyzcjy Jan 25, 2025
8a10a42
Merge branch 'main' into feat/generation_manager
fzyzcjy Jan 25, 2025
45937e6
Merge branch 'main' into feat/generation_manager
fzyzcjy Jan 26, 2025
cfd3852
Merge branch 'main' into feat/generation_manager
fzyzcjy Jan 26, 2025
4543136
bump ci
fzyzcjy Jan 26, 2025
5e16f96
Merge remote-tracking branch 'origin/feat/generation_manager' into fe…
fzyzcjy Jan 26, 2025
b761936
Merge branch 'main' into feat/generation_manager
fzyzcjy Jan 26, 2025
aeed015
Revert "bump ci"
fzyzcjy Jan 26, 2025
c42431b
Merge remote-tracking branch 'origin/feat/generation_manager' into fe…
fzyzcjy Jan 26, 2025
03b5799
Merge branch 'main' into feat/generation_manager
fzyzcjy Jan 26, 2025
2588e23
Merge branch 'main' into feat/generation_manager
fzyzcjy Jan 26, 2025
7daa570
Merge branch 'main' into feat/generation_manager
fzyzcjy Jan 27, 2025
3c21082
Merge branch 'feat/generation_manager' into feat/rename_tokenizer_man…
fzyzcjy Jan 27, 2025
8239d52
Merge branch 'feat/rename_tokenizer_manager' into feat/separate_comm
fzyzcjy Jan 27, 2025
bc23bb4
Merge branch 'feat/separate_comm' into feat/engine_fragment_0125
fzyzcjy Jan 27, 2025
f62574e
fix import
fzyzcjy Jan 27, 2025
039df0a
Merge branch 'feat/separate_comm' into feat/engine_fragment_0125
fzyzcjy Jan 27, 2025
6f057aa
fix rename
fzyzcjy Jan 27, 2025
1aa2ef0
Merge branch 'feat/rename_tokenizer_manager' into feat/separate_comm
fzyzcjy Jan 27, 2025
c38c8c5
Merge branch 'feat/separate_comm' into feat/engine_fragment_0125
fzyzcjy Jan 27, 2025
bbd7908
bump ci
fzyzcjy Jan 26, 2025
ce8fbef
fix rename
fzyzcjy Jan 27, 2025
488b6cd
Merge branch 'feat/rename_tokenizer_manager' into feat/separate_comm
fzyzcjy Jan 27, 2025
0fca144
Merge branch 'feat/separate_comm' into feat/engine_fragment_0125
fzyzcjy Jan 27, 2025
e669e45
Revert "bump ci"
fzyzcjy Jan 27, 2025
5ed169c
bump ci
fzyzcjy Jan 26, 2025
3d46e2f
Revert "bump ci"
fzyzcjy Jan 27, 2025
570d657
Revert "Revert "bump ci""
fzyzcjy Jan 27, 2025
89e433e
Revert "Revert "Revert "bump ci"""
fzyzcjy Jan 27, 2025
8adc51e
Merge branch 'main' into feat/generation_manager
fzyzcjy Feb 21, 2025
528a834
merge
fzyzcjy Feb 21, 2025
38f4f65
Revert "merge"
fzyzcjy Feb 21, 2025
f5b5246
merge 3364
fzyzcjy Feb 21, 2025
74250fb
Merge branch 'feat/generation_manager' into feat/rename_tokenizer_man…
fzyzcjy Feb 21, 2025
8d0e1b5
Merge branch 'feat/rename_tokenizer_manager' into feat/separate_comm
fzyzcjy Feb 21, 2025
00ee8b2
engine.py
fzyzcjy Feb 21, 2025
6eb3975
fmt
fzyzcjy Feb 21, 2025
a637e3e
rename result_queue to match upstream
fzyzcjy Feb 21, 2025
c773969
detokenizer_manager
fzyzcjy Feb 21, 2025
9a348f7
Merge branch 'feat/separate_comm' into feat/engine_fragment_0125
fzyzcjy Feb 21, 2025
a98540c
engine_base from engine
fzyzcjy Feb 21, 2025
2850970
fix err
fzyzcjy Feb 21, 2025
26b2c25
Merge branch 'feat/generation_manager' into feat/rename_tokenizer_man…
fzyzcjy Feb 21, 2025
9cad89a
Merge branch 'feat/rename_tokenizer_manager' into feat/separate_comm
fzyzcjy Feb 21, 2025
1d373a0
Merge branch 'feat/separate_comm' into feat/engine_fragment_0125
fzyzcjy Feb 21, 2025
e452528
bump
fzyzcjy Feb 21, 2025
028b54e
bump
fzyzcjy Feb 21, 2025
12a76db
more
fzyzcjy Feb 21, 2025
4800eb1
Merge branch 'main' into feat/generation_manager
fzyzcjy Feb 22, 2025
0d56094
Merge branch 'feat/generation_manager' into feat/rename_tokenizer_man…
fzyzcjy Feb 22, 2025
c3f5c97
Merge branch 'feat/rename_tokenizer_manager' into feat/separate_comm
fzyzcjy Feb 22, 2025
2423dac
Merge branch 'feat/separate_comm' into feat/engine_fragment_0125
fzyzcjy Feb 22, 2025
1558252
fmt
fzyzcjy Feb 22, 2025
b4d3377
Merge branch 'feat/rename_tokenizer_manager' into feat/separate_comm
fzyzcjy Feb 22, 2025
6d3cd01
Merge branch 'feat/separate_comm' into feat/engine_fragment_0125
fzyzcjy Feb 22, 2025
5279ba5
Merge branch 'main' into feat/rename_tokenizer_manager
zhaochenyang20 Feb 23, 2025
63dc067
Merge branch 'main' into feat/rename_tokenizer_manager
fzyzcjy Feb 23, 2025
bc7f566
lint
fzyzcjy Feb 23, 2025
9e608f9
Merge branch 'feat/rename_tokenizer_manager' into feat/separate_comm
fzyzcjy Feb 23, 2025
3e45ea1
add copyright text
fzyzcjy Feb 23, 2025
c6e875a
Merge branch 'main' into feat/separate_comm
fzyzcjy Feb 23, 2025
bbccfeb
Merge branch 'main' into feat/separate_comm
shanyu-sys Feb 24, 2025
6795a90
mv no_grad
fzyzcjy Feb 24, 2025
7bd10ee
comments
fzyzcjy Feb 24, 2025
45a1ce7
Merge branch 'feat/separate_comm' of https://github.com/fzyzcjy/sglan…
fzyzcjy Feb 24, 2025
6eb99b9
doc
fzyzcjy Feb 24, 2025
a2c1d94
fmt
fzyzcjy Feb 24, 2025
68aa1f1
Merge branch 'feat/separate_comm' into feat/engine_fragment_0125
fzyzcjy Feb 24, 2025
c531e8c
comments
fzyzcjy Feb 24, 2025
6cbbb70
comments
fzyzcjy Feb 24, 2025
613cd9c
base method
fzyzcjy Feb 24, 2025
58bec25
doc
fzyzcjy Feb 24, 2025
dccb7eb
shutdown
fzyzcjy Feb 24, 2025
17a8874
fmt
fzyzcjy Feb 24, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/pr-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,12 @@ jobs:
cd test/srt
python3 test_update_weights_from_distributed.py
- name: Test EngineFragment
timeout-minutes: 10
run: |
cd test/srt
python3 test_fragment.py
- name: Test expert parallelism (EP=2)
timeout-minutes: 10
run: |
Expand Down
2 changes: 1 addition & 1 deletion docs/backend/function_calling.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@
"from sglang.srt.managers.io_struct import Tool, Function\n",
"\n",
"llm = sgl.Engine(model_path=\"meta-llama/Meta-Llama-3.1-8B-Instruct\")\n",
"tokenizer = llm.tokenizer_manager.tokenizer\n",
"tokenizer = llm.orchestrator.tokenizer\n",
"input_ids = tokenizer.apply_chat_template(\n",
" messages, tokenize=True, add_generation_prompt=True, tools=tools\n",
")\n",
Expand Down
80 changes: 80 additions & 0 deletions examples/runtime/engine/offline_batch_inference_torchrun.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import datetime
import os
import sys

from sglang.srt.entrypoints.engine_fragment import EngineFragment


def run():
"""
Example command:
```
torchrun --nproc_per_node=4 offline_batch_inference_torchrun.py
```
"""

local_rank = int(os.environ["LOCAL_RANK"])
rank = int(os.environ["RANK"])
world_size = int(os.environ["WORLD_SIZE"])

def _log(text):
t = datetime.datetime.now().strftime("%H:%M:%S")
print(f"[{t}] [rank={rank}] {text}")

_log(
f'start {local_rank=} {rank=} {world_size=} {sys.argv=} {os.environ.get("CUDA_VISIBLE_DEVICES")}'
)

tp_size = world_size
tp_rank = rank
_log(f"{tp_rank=} {tp_size=}")

model_name, mem_fraction_static = "meta-llama/Llama-3.2-1B-Instruct", 0.1
# model_name, mem_fraction_static = "meta-llama/Llama-3.1-70B-Instruct", 0.9 # test large models

# TODO remove this in next PR
for k in [
"GROUP_RANK",
"GROUP_WORLD_SIZE",
"LOCAL_RANK",
"LOCAL_WORLD_SIZE",
"MASTER_ADDR",
"MASTER_PORT",
"OMP_NUM_THREADS",
"RANK",
"ROLE_NAME",
"ROLE_RANK",
"ROLE_WORLD_SIZE",
"TORCHELASTIC_ERROR_FILE",
"TORCHELASTIC_MAX_RESTARTS",
"TORCHELASTIC_RESTART_COUNT",
"TORCHELASTIC_RUN_ID",
"TORCHELASTIC_USE_AGENT_STORE",
"TORCH_NCCL_ASYNC_ERROR_HANDLING",
"WORLD_SIZE",
]:
if k in os.environ:
del os.environ[k]

fragment = EngineFragment(
model_path=model_name,
mem_fraction_static=mem_fraction_static,
tp_size=tp_size,
tp_rank=tp_rank,
nccl_port=23456,
gpu_id=tp_rank,
)
_log(f"{fragment=}")

output = fragment.generate(
prompt=["1+1=2, 1+2=3, 1+3=4, 1+4=", "9-1=8, 8-1=7, 7-1="],
sampling_params=dict(max_new_tokens=16, temperature=0.0),
)
_log(f"{output=}")

fragment.shutdown()
_log(f"End script")


if __name__ == "__main__":
run()
2 changes: 1 addition & 1 deletion python/sglang/bench_one_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@
import torch.distributed as dist

from sglang.srt.configs.model_config import ModelConfig
from sglang.srt.entrypoints.engine import _set_envs_and_config
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.managers.schedule_batch import Req, ScheduleBatch
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.model_executor.model_runner import ModelRunner
from sglang.srt.orchestration.std.launcher import _set_envs_and_config
from sglang.srt.sampling.sampling_params import SamplingParams
from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
Expand Down
Loading
Loading