microsoft · stuartleeks · Sep 11, 2024 · Sep 11, 2024 · Sep 11, 2024 · Sep 11, 2024
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -13,12 +13,12 @@
 		"vscode": {
 			"extensions": [
 				"humao.rest-client",
-				"ms-python.black-formatter",
 				"yzhang.markdown-all-in-one",
 				"ms-python.pylint",
 				"ms-azuretools.vscode-bicep",
 				"timonwong.shellcheck",
-				"hediet.vscode-drawio"
+				"hediet.vscode-drawio",
+				"charliermarsh.ruff"
 			]
 		}
 	},

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -2,13 +2,15 @@
 	"[python]": {
 		"editor.formatOnSave": true,
 		"editor.formatOnSaveMode": "file",
-		"editor.formatOnPaste": true,
-		"editor.formatOnType": true,
-		"editor.defaultFormatter": "ms-python.black-formatter"
+		"editor.defaultFormatter": "charliermarsh.ruff",
+		"editor.codeActionsOnSave": {
+			"source.fixAll": "explicit",
+			"source.organizeImports": "explicit"
+		}
 	},
 	"python.testing.pytestArgs": [
-        "tests"
-    ],
+		"tests"
+	],
 	"python.testing.unittestEnabled": false,
 	"python.testing.pytestEnabled": true
 }
diff --git a/Makefile b/Makefile
@@ -17,6 +17,7 @@ install-requirements: ## Install PyPI requirements for all projects
 	pip install -r tests/requirements.txt
 	pip install -r tools/test-client/requirements.txt
 	pip install -r tools/test-client-web/requirements.txt
+	pip install -r tools/dev-requirements.txt
 
 run-simulated-api: ## Launch the AOAI Simulated API locally
 	gunicorn \

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,4 +1,4 @@
-[tool.black]
+[tool.ruff]
 line-length = 120
 
 

diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/generator/manager.py b/src/aoai-api-simulator/src/aoai_api_simulator/generator/manager.py
@@ -1,11 +1,11 @@
 import inspect
 import logging
-from typing import Callable, Awaitable
+from typing import Awaitable, Callable
 
+from aoai_api_simulator.models import RequestContext
 from fastapi import HTTPException, Response
 
-from aoai_api_simulator.models import RequestContext
-from .openai import azure_openai_embedding, azure_openai_completion, azure_openai_chat_completion
+from .openai import azure_openai_chat_completion, azure_openai_completion, azure_openai_embedding
 
 logger = logging.getLogger(__name__)
 

diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/generator/openai.py b/src/aoai-api-simulator/src/aoai_api_simulator/generator/openai.py
@@ -1,34 +1,31 @@
 import asyncio
 import json
 import logging
-import time
 import random
+import time
 from typing import Tuple
 
 import nanoid
-
-
-from fastapi import Response
-from fastapi.responses import StreamingResponse
-
 from aoai_api_simulator import constants
 from aoai_api_simulator.auth import validate_api_key_header
-from aoai_api_simulator.models import RequestContext, OpenAIDeployment
 from aoai_api_simulator.constants import (
     SIMULATOR_KEY_DEPLOYMENT_NAME,
-    SIMULATOR_KEY_OPENAI_PROMPT_TOKENS,
+    SIMULATOR_KEY_LIMITER,
     SIMULATOR_KEY_OPENAI_COMPLETION_TOKENS,
+    SIMULATOR_KEY_OPENAI_MAX_TOKENS_EFFECTIVE,
+    SIMULATOR_KEY_OPENAI_MAX_TOKENS_REQUESTED,
+    SIMULATOR_KEY_OPENAI_PROMPT_TOKENS,
     SIMULATOR_KEY_OPENAI_TOTAL_TOKENS,
-    SIMULATOR_KEY_LIMITER,
     SIMULATOR_KEY_OPERATION_NAME,
-    SIMULATOR_KEY_OPENAI_MAX_TOKENS_REQUESTED,
-    SIMULATOR_KEY_OPENAI_MAX_TOKENS_EFFECTIVE,
 )
 from aoai_api_simulator.generator.openai_tokens import (
     get_max_completion_tokens,
-    num_tokens_from_string,
     num_tokens_from_messages,
+    num_tokens_from_string,
 )
+from aoai_api_simulator.models import OpenAIDeployment, RequestContext
+from fastapi import Response
+from fastapi.responses import StreamingResponse
 
 # This file contains a default implementation of the openai generators
 # You can configure your own generators by creating a generator_config.py file and setting the
@@ -212,6 +209,7 @@ def get_lorem_factor(max_tokens: int):
     return 0.5
 
 
+# pylint: disable-next=too-few-public-methods
 class LoremReference:
     """
     Generating large amounts of lorem text can be slow, so we pre-generate a set of reference values.
@@ -438,6 +436,7 @@ def create_completion_response(
     )
 
 
+# pylint: disable-next=too-many-arguments
 def create_lorem_chat_completion_response(
     context: RequestContext,
     deployment_name: str,
@@ -466,6 +465,7 @@ def create_lorem_chat_completion_response(
     )
 
 
+# pylint: disable-next=too-many-arguments
 def create_chat_completion_response(
     context: RequestContext,
     deployment_name: str,

diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/limiters.py b/src/aoai-api-simulator/src/aoai_api_simulator/limiters.py
@@ -1,16 +1,15 @@
-from dataclasses import dataclass
 import inspect
 import json
 import logging
 import math
 import time
+from dataclasses import dataclass
 from typing import Awaitable, Callable
 
-from fastapi import Response
-
 from aoai_api_simulator import constants
 from aoai_api_simulator.metrics import simulator_metrics
 from aoai_api_simulator.models import Config, RequestContext
+from fastapi import Response
 
 logger = logging.getLogger(__name__)
 
@@ -96,6 +95,7 @@ class WindowAddResult:
     retry_reason: str | None  # "tokens" or "requests"
 
 
+# pylint: disable-next=too-few-public-methods
 class SlidingWindow:
     """
     Represents a time window for rate-limiting
@@ -115,7 +115,6 @@ def _purge(self, cut_off: float):
             self._requests.pop(0)
 
     def _calculate_window_counts_for_request(self, token_cost: int, timestamp: float) -> tuple[int, int, float, float]:
-
         # Iterate the the list in reverse order
         # Track:
         #  - the number of requests in the last 10 seconds (including this request)
@@ -175,7 +174,6 @@ def add_request(self, token_cost: int, timestamp: float = -1) -> WindowAddResult
         # to exceed the tokens_per_minute limit, i.e. we already used the limit for the current 60s window
         # if requests_full_duration < 10 or tokens_full_duration < 60:
         if token_count_in_60s > self._tokens_per_minute or request_count_in_10s > self._requests_per_10_seconds:
-
             # Edge case where we've hit the max tokens and the current request is for max_tokens
             # but haven't hit the request limit
             # in this case, we wait until the last saved request is out of the window
@@ -227,9 +225,8 @@ def add_request(self, token_cost: int, timestamp: float = -1) -> WindowAddResult
 
 
 def create_openai_sliding_window_limiter(
-    deployments: dict[str, int]
+    deployments: dict[str, int],
 ) -> Callable[[RequestContext, Response], Response | None]:
-
     @dataclass
     class OpenAISlidingWindowLimit:
         deployment: str

diff --git a/tools/dev-requirements.txt b/tools/dev-requirements.txt
@@ -0,0 +1 @@
+ruff==0.6.4