-
Notifications
You must be signed in to change notification settings - Fork 4
/
modal_vespa.py
38 lines (30 loc) · 977 Bytes
/
modal_vespa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import modal
# vespa_image = modal.Image.from_registry("vespaengine/vespa", add_python="3.11")
vespa_image = modal.Image.from_dockerfile("Dockerfile", add_python="3.11")
app = modal.App("dankvespa", image=vespa_image)
@modal.web_endpoint(method="POST", docs=True)
@app.cls(
enable_memory_snapshot=True,
# volumes={"/my_vol": modal.Volume.from_name("my-test-volume")}
)
class Vespa:
@modal.build()
def build(self):
# cache
print("build")
@modal.enter(snap=True)
def load(self):
# Create a memory snapshot with the model loaded in CPU memory.
print("save state")
@modal.enter(snap=False)
def setup(self):
# Move the model to a GPU before doing any work.
print("loaded from snapshot")
@modal.method()
def search(self, query: str):
print("search")
@app.local_entrypoint()
async def main():
# score the messages
m1 = await Vespa().search.remote.aio("test")
print(m1)