forked from mit-han-lab/qserve
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pyproject.toml
33 lines (30 loc) · 903 Bytes
/
pyproject.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "qserve"
version = "0.1.0"
description = "Efficient and accurate LLM serving system with W4A8KV4 quantization."
readme = "README.md"
requires-python = ">=3.9"
classifiers = [
"Programming Language :: Python :: 3",
"License :: OSI Approved :: Apache Software License",
]
dependencies = [
"accelerate", "sentencepiece", "tokenizers>=0.12.1",
"torch>=2.0.0", "torchvision",
"transformers==4.37.2", "datasets",
"lm_eval==0.3.0", "texttable",
"toml", "attributedict",
"xformers==0.0.26.post1",
"protobuf",
"gradio==3.35.2", "gradio_client==0.2.9",
"fastapi", "uvicorn",
"pydantic==1.10.14",
"webdataset",
]
[tool.setuptools.packages.find]
exclude = ["results*", "scripts*", "examples*"]
[tool.wheel]
exclude = ["results*", "scripts*", "examples*"]