forked from a16z-infra/llama2-chatbot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
28 lines (20 loc) · 1.01 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import replicate
import time
# Initialize debounce variables
last_call_time = 0
debounce_interval = 2 # Set the debounce interval (in seconds) to your desired value
def debounce_replicate_run(llm, prompt, max_len, temperature, top_p, API_TOKEN):
global last_call_time
print("last call time: ", last_call_time)
# Get the current time
current_time = time.time()
# Calculate the time elapsed since the last call
elapsed_time = current_time - last_call_time
# Check if the elapsed time is less than the debounce interval
if elapsed_time < debounce_interval:
print("Debouncing")
return "Hello! You are sending requests too fast. Please wait a few seconds before sending another request."
# Update the last call time to the current time
last_call_time = time.time()
output = replicate.run(llm, input={"prompt": prompt + "Assistant: ", "max_length": max_len, "temperature": temperature, "top_p": top_p, "repetition_penalty": 1}, api_token=API_TOKEN)
return output