Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Formatting and Linting Enhancements #43

Merged
merged 4 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
"vscode": {
"extensions": [
"humao.rest-client",
"ms-python.black-formatter",
"yzhang.markdown-all-in-one",
"ms-python.pylint",
"ms-azuretools.vscode-bicep",
"timonwong.shellcheck",
"hediet.vscode-drawio"
"hediet.vscode-drawio",
"charliermarsh.ruff"
]
}
},
Expand Down
12 changes: 7 additions & 5 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@
"[python]": {
"editor.formatOnSave": true,
"editor.formatOnSaveMode": "file",
"editor.formatOnPaste": true,
"editor.formatOnType": true,
"editor.defaultFormatter": "ms-python.black-formatter"
"editor.defaultFormatter": "charliermarsh.ruff",
"editor.codeActionsOnSave": {
"source.fixAll": "explicit",
"source.organizeImports": "explicit"
}
},
"python.testing.pytestArgs": [
"tests"
],
"tests"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ install-requirements: ## Install PyPI requirements for all projects
pip install -r tests/requirements.txt
pip install -r tools/test-client/requirements.txt
pip install -r tools/test-client-web/requirements.txt
pip install -r tools/dev-requirements.txt

run-simulated-api: ## Launch the AOAI Simulated API locally
gunicorn \
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[tool.black]
[tool.ruff]
line-length = 120


Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import inspect
import logging
from typing import Callable, Awaitable
from typing import Awaitable, Callable

from aoai_api_simulator.models import RequestContext
from fastapi import HTTPException, Response

from aoai_api_simulator.models import RequestContext
from .openai import azure_openai_embedding, azure_openai_completion, azure_openai_chat_completion
from .openai import azure_openai_chat_completion, azure_openai_completion, azure_openai_embedding

logger = logging.getLogger(__name__)

Expand Down
24 changes: 12 additions & 12 deletions src/aoai-api-simulator/src/aoai_api_simulator/generator/openai.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,31 @@
import asyncio
import json
import logging
import time
import random
import time
from typing import Tuple

import nanoid


from fastapi import Response
from fastapi.responses import StreamingResponse

from aoai_api_simulator import constants
from aoai_api_simulator.auth import validate_api_key_header
from aoai_api_simulator.models import RequestContext, OpenAIDeployment
from aoai_api_simulator.constants import (
SIMULATOR_KEY_DEPLOYMENT_NAME,
SIMULATOR_KEY_OPENAI_PROMPT_TOKENS,
SIMULATOR_KEY_LIMITER,
SIMULATOR_KEY_OPENAI_COMPLETION_TOKENS,
SIMULATOR_KEY_OPENAI_MAX_TOKENS_EFFECTIVE,
SIMULATOR_KEY_OPENAI_MAX_TOKENS_REQUESTED,
SIMULATOR_KEY_OPENAI_PROMPT_TOKENS,
SIMULATOR_KEY_OPENAI_TOTAL_TOKENS,
SIMULATOR_KEY_LIMITER,
SIMULATOR_KEY_OPERATION_NAME,
SIMULATOR_KEY_OPENAI_MAX_TOKENS_REQUESTED,
SIMULATOR_KEY_OPENAI_MAX_TOKENS_EFFECTIVE,
)
from aoai_api_simulator.generator.openai_tokens import (
get_max_completion_tokens,
num_tokens_from_string,
num_tokens_from_messages,
num_tokens_from_string,
)
from aoai_api_simulator.models import OpenAIDeployment, RequestContext
from fastapi import Response
from fastapi.responses import StreamingResponse

# This file contains a default implementation of the openai generators
# You can configure your own generators by creating a generator_config.py file and setting the
Expand Down Expand Up @@ -212,6 +209,7 @@ def get_lorem_factor(max_tokens: int):
return 0.5


# pylint: disable-next=too-few-public-methods
class LoremReference:
"""
Generating large amounts of lorem text can be slow, so we pre-generate a set of reference values.
Expand Down Expand Up @@ -438,6 +436,7 @@ def create_completion_response(
)


# pylint: disable-next=too-many-arguments
def create_lorem_chat_completion_response(
context: RequestContext,
deployment_name: str,
Expand Down Expand Up @@ -466,6 +465,7 @@ def create_lorem_chat_completion_response(
)


# pylint: disable-next=too-many-arguments
def create_chat_completion_response(
context: RequestContext,
deployment_name: str,
Expand Down
11 changes: 4 additions & 7 deletions src/aoai-api-simulator/src/aoai_api_simulator/limiters.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
from dataclasses import dataclass
import inspect
import json
import logging
import math
import time
from dataclasses import dataclass
from typing import Awaitable, Callable

from fastapi import Response

from aoai_api_simulator import constants
from aoai_api_simulator.metrics import simulator_metrics
from aoai_api_simulator.models import Config, RequestContext
from fastapi import Response

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -96,6 +95,7 @@ class WindowAddResult:
retry_reason: str | None # "tokens" or "requests"


# pylint: disable-next=too-few-public-methods
class SlidingWindow:
"""
Represents a time window for rate-limiting
Expand All @@ -115,7 +115,6 @@ def _purge(self, cut_off: float):
self._requests.pop(0)

def _calculate_window_counts_for_request(self, token_cost: int, timestamp: float) -> tuple[int, int, float, float]:

# Iterate the the list in reverse order
# Track:
# - the number of requests in the last 10 seconds (including this request)
Expand Down Expand Up @@ -175,7 +174,6 @@ def add_request(self, token_cost: int, timestamp: float = -1) -> WindowAddResult
# to exceed the tokens_per_minute limit, i.e. we already used the limit for the current 60s window
# if requests_full_duration < 10 or tokens_full_duration < 60:
if token_count_in_60s > self._tokens_per_minute or request_count_in_10s > self._requests_per_10_seconds:

# Edge case where we've hit the max tokens and the current request is for max_tokens
# but haven't hit the request limit
# in this case, we wait until the last saved request is out of the window
Expand Down Expand Up @@ -227,9 +225,8 @@ def add_request(self, token_cost: int, timestamp: float = -1) -> WindowAddResult


def create_openai_sliding_window_limiter(
deployments: dict[str, int]
deployments: dict[str, int],
) -> Callable[[RequestContext, Response], Response | None]:

@dataclass
class OpenAISlidingWindowLimit:
deployment: str
Expand Down
1 change: 1 addition & 0 deletions tools/dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ruff==0.6.4