-
Notifications
You must be signed in to change notification settings - Fork 16
/
server.py
55 lines (49 loc) · 1.74 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import json
from http.server import BaseHTTPRequestHandler, HTTPServer
from phi_3_vision_mlx import load, generate
preload = load()
class SimpleAPIHandler(BaseHTTPRequestHandler):
def do_POST(self):
if self.path == "/v1/completions":
content_length = int(self.headers['Content-Length'])
post_data = self.rfile.read(content_length)
request = json.loads(post_data.decode('utf-8'))
prompts = request.get('prompt', '')
max_tokens = request.get('max_tokens', 512)
if isinstance(prompts, str):
prompts = [prompts]
responses = generate(prompts, preload=preload, max_tokens=max_tokens)
if isinstance(responses, str):
responses = [responses]
response = {
"model": "phi-3-vision",
"responses": responses
}
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps(response).encode('utf-8'))
else:
self.send_error(404, "Not Found")
def run(server_class=HTTPServer, handler_class=SimpleAPIHandler, port=8000):
server_address = ('', port)
httpd = server_class(server_address, handler_class)
print(f"Starting server on port {port}")
httpd.serve_forever()
if __name__ == "__main__":
run()
"""
curl -X POST http://localhost:8000/v1/completions \
-H "Content-Type: application/json" \
-d '{
"prompt": "Hello, world!",
"max_tokens": 50}'
curl -X POST http://localhost:8000/v1/completions \
-H "Content-Type: application/json" \
-d '{
"prompt": [
"Hello, world!",
"Guten tag!"
],
"max_tokens": 50}'
"""