.env

# Sample and explanation can be found in .env.example

# `LOG_CHAT`: Whether to log the chat
LOG_CHAT=true

# `CACHE_BACKEND`: Options (MEMORY, LMDB, LevelDB)
CACHE_BACKEND=LMDB
CACHE_CHAT_COMPLETION=true
DEFAULT_REQUEST_CACHING_VALUE=false

#LOG_CACHE_DB_INFO=true

#BENCHMARK_MODE=true

# `OPENAI_BASE_URL`: Forward any service address in the style of OpenAI; multiple can be specified, separated by commas.
# If more than one is specified, neither OPENAI_ROUTE_PREFIX nor EXTRA_ROUTE_PREFIX can be the root route/
OPENAI_BASE_URL=https://api.openai.com

# `OPENAI_ROUTE_PREFIX`: Specify the forwarding route prefix for all services in OpenAI style (for logging purposes)
OPENAI_ROUTE_PREFIX=/

#OPENAI_API_KEY=
#FORWARD_KEY=

CHAT_COMPLETION_ROUTE=/v1/chat/completions
COMPLETION_ROUTE=/v1/completions

# `EXTRA_BASE_URL`: Specify any service for forwarding
#EXTRA_BASE_URL=
# `EXTRA_ROUTE_PREFIX`: Route prefix matching EXTRA_BASE_URL
#EXTRA_ROUTE_PREFIX=

# `REQ_RATE_LIMIT`: i.e., Request rate limit for specified routes, user specific
# format: {route: ratelimit-string}
# ratelimit-string format [count] [per|/] [n (optional)] [second|minute|hour|day|month|year] :ref:`ratelimit-string`: https://limits.readthedocs.io/en/stable/quickstart.html#rate-limit-string-notation
REQ_RATE_LIMIT={"/v1/chat/completions":"100/2minutes", "/v1/completions":"60/minute;600/hour"}

# Backend for rate limiting: [memory, redis, memcached, ...] :ref: https://limits.readthedocs.io/en/stable/storage.html#
#REQ_RATE_LIMIT_BACKEND=redis://localhost:6379

# `GLOBAL_RATE_LIMIT`: Limits all routes not specified by `REQ_RATE_LIMIT`. If not set, there's no limit by default.
GLOBAL_RATE_LIMIT=100/minute

#`RATE_LIMIT_STRATEGY` Options: (fixed-window, fixed-window-elastic-expiry, moving-window) :ref: https://limits.readthedocs.io/en/latest/strategies.html
# `fixed-window`: most memory efficient strategy; `moving-window`:most effective for preventing bursts but has higher memory cost.
RATE_LIMIT_STRATEGY=moving-window

# Rate limit for returned tokens
TOKEN_RATE_LIMIT={"/v1/chat/completions":"60/second","/v1/completions":"60/second"}

# TCP connection timeout duration (in seconds)
TIMEOUT=6

ITER_CHUNK_TYPE=one-by-one
#ITER_CHUNK_TYPE=efficiency

#IP_BLACKLIST=

# Set timezone
TZ=Asia/Shanghai