change all prints to logging. Lots of minor fixes

aaronkaplan · Jan 1, 2024 · ef3cc93 · ef3cc93
1 parent f1d2848
commit ef3cc93
Show file tree

Hide file tree

Showing 6 changed files with 45 additions and 34 deletions.
diff --git a/app/auth.py b/app/auth.py
@@ -1,3 +1,4 @@
+import os 
 
 from fastapi import Depends, HTTPException
 from fastapi.security import HTTPBasic, HTTPBasicCredentials
@@ -7,7 +8,7 @@
 
 # fake users to simulate authentication
 fake_users = {
-    "digit": "united we stand, divided we fall!",   # XXX FIXME: currently this is intentionally simple and in the code. We will replace this with proper authentication. It's just against bots misusing the service automatically.
+    os.getenv('BASIC_AUTH_USER'): os.getenv('BASIC_AUTH_PASSWD')
 }
 
 

diff --git a/app/main.py b/app/main.py
@@ -1,5 +1,7 @@
+"""Main FastAPI file. Provides the app WSGI entry point."""
 import os
 import sys
+import json
 
 import uvicorn
 from fastapi import FastAPI, Request, Form, Depends
@@ -20,32 +22,33 @@
 from distutils.util import strtobool
 
 
-DEBUG = True
+from settings import log
+
 
 # first get the env parametting
 if not load_dotenv(find_dotenv(), verbose=True, override=False):     # read local .env file
-    print("Could not find .env file! Assuming ENV vars work")
+    log.warning("Could not find .env file! Assuming ENV vars work")
 
 try:
     VERSION = open('../VERSION.txt', encoding='utf-8').readline().rstrip('\n')
 except Exception as e:
-    print("could not find VERSION.txt, bailing out.")
+    log.error("could not find VERSION.txt, bailing out.")
     sys.exit(-1)
 
 
 app = FastAPI(version=VERSION)
 templates = Jinja2Templates(directory="/templates")
 app.mount("/static", StaticFiles(directory="/static"), name="static")
 GO_AZURE = False    # default
+OUTPUT_JSON = bool(strtobool(os.getenv('OUTPUT_JSON', 'false'))) 
 DRY_RUN = bool(strtobool(os.getenv('DRY_RUN', 'false')))
 
 
 # First detect if we should invoke OpenAI via MS Azure or directly
 try:
     GO_AZURE = bool(strtobool(os.getenv('USE_MS_AZURE', 'false')))
 except Exception as e:
-    print(
-        f"Could not read 'USE_MS_AZURE' env var. Reason: '{str(e)}'. Reverting to false.")
+    log.warning(f"Could not read 'USE_MS_AZURE' env var. Reason: '{str(e)}'. Reverting to false.")
     GO_AZURE = False
 
 
@@ -59,7 +62,7 @@ async def dispatch(self, request: Request, call_next):
 
 app.add_middleware(HTTPSRedirectMiddleware)
 
-summarizer = Summarizer(go_azure=GO_AZURE, model='gpt-4-1106-preview', max_tokens=8192)
+summarizer = Summarizer(go_azure=GO_AZURE, model='gpt-4-1106-preview', max_tokens=8192, output_json=OUTPUT_JSON)
 
 
 async def fetch_text_from_url(url: str) -> str:

diff --git a/app/settings.py b/app/settings.py
@@ -1,14 +1,9 @@
-"""
-import uuid
-from pydantic_settings import SettingsConfigDict, BaseSettings
+import logging
 
 
-class Settings(BaseSettings):
-    OPENAI_API_KEY: str = ""    # The OpenAI API key
-    DRY_RUN: bool = True        # If true, don't send to the OpenAI API
-    SYSTEM_PROMPT: str = ""     # the system role prompt part to prime GPT in the right direction.
-    SESSION_UUID: uuid.UUID = uuid.uuid4()  # the "End-user ID" field for the OpenAI API
-    USE_MS_AZURE: bool = True   # go via MS AZURE's API which basically proxies OpenaI, but hey, it's GDPR compliant. *shrug*
-    API_BASE: str = ""          # only needed in case we need to go via MS Azure.
-    model_config = SettingsConfigDict(env_file='.env')
-"""
+# Configure logging
+logging.basicConfig(level=logging.INFO,
+                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+                    datefmt='%Y-%m-%d %H:%M:%S')
+
+log = logging.getLogger(__name__)
diff --git a/app/summarizer.py b/app/summarizer.py
@@ -1,9 +1,10 @@
 import os
-import pprint
+from typing import Tuple
 
 import openai
 from openai import AzureOpenAI
-from typing import Tuple
+
+from settings import log
 
 # first get the env parametting
 from dotenv import load_dotenv, find_dotenv
@@ -15,20 +16,22 @@ class Summarizer:
 
     client: openai._base_client.BaseClient
 
-    def __init__(self, model: str, max_tokens: int, system_prompt: str = "", go_azure: bool = False):
+    def __init__(self, model: str, max_tokens: int, system_prompt: str = "", go_azure: bool = False, output_json: bool = False):
         if system_prompt:
             self.system_prompt = system_prompt
         else:
             self.system_prompt = "You are a Cyber Threat Intelligence Analyst and need to summarise a report for upper management. The report shall be nicely formatted with two sections: one Executive Summary section and one 'TTPs and IoCs' section. The second section shall list all IP addresses, domains, URLs, tools and hashes (sha-1, sha256, md5, etc.) which can be found in the report. Nicely format the report as markdown. Use newlines between markdown headings."
         self.model = model
         self.max_tokens = max_tokens
         self.go_azure = go_azure
+        self.output_json = output_json
+
         if self.go_azure:
             api_version = os.environ['OPENAI_API_VERSION']
             azure_endpoint = os.environ['OPENAI_API_BASE']
             azure_deployment = os.environ['ENGINE']
             api_key = os.environ['AZURE_OPENAI_API_KEY']
-            print(f"""
+            log.debug(f"""
                 {api_version=},
                 {azure_endpoint=},
                 {azure_deployment=},
@@ -48,7 +51,7 @@ def __init__(self, model: str, max_tokens: int, system_prompt: str = "", go_azur
             openai.api_base = os.environ['OPENAI_API_BASE']             # "https://devmartiopenai.openai.azure.com/"
             openai.api_version = os.environ['OPENAI_API_VERSION']       # "2023-05-15"
             """
-            print(f"Using Azure client {self.client._version}")
+            log.info(f"Using Azure client {self.client._version}")
         else:
             self.client = openai.OpenAI(api_key=os.environ['OPENAI_API_KEY'])
 
@@ -65,7 +68,7 @@ def summarize(self, text: str, system_prompt: str = "") -> Tuple[str, str]:
 
         try:
             if self.go_azure:
-                print("Using MS AZURE!")
+                log.info("Using MS AZURE!")
                 response = self.client.chat.completions.create(model=os.environ['ENGINE'],
                                                                messages=messages,
                                                                temperature=0.7,
@@ -74,31 +77,40 @@ def summarize(self, text: str, system_prompt: str = "") -> Tuple[str, str]:
                                                                max_tokens=self.max_tokens,
                                                                n=1)
             else:       # go directly via OpenAI's API
-                print("Using OpenAI directly!")
+                log.info("Using OpenAI directly!")
+                if self.output_json:
+                    response_format = {"type": "json_object"}
+                else:
+                    response_format = None
                 response = self.client.chat.completions.create(model=self.model,
                                                                messages=messages,
                                                                temperature=0.7,
                                                                top_p=0.95,
                                                                stop=None,
                                                                max_tokens=self.max_tokens,
+                                                               response_format=response_format,
                                                                n=1)
-                print(f"Response (OpenAI): {response}")
-                print(80*"=")
-                print(f"response (Openai).choices[0].text: {response.choices[0].message}")
-            print(response.model_dump_json(indent=2))
+
+            log.debug(f"Full Response (OpenAI): {response}")
+            log.debug(f"response.choices[0].text: {response.choices[0].message}")
+            log.debug(response.model_dump_json(indent=2))
             result = response.choices[0].message.content
             error = None            # Or move the error handling back to main.py, not sure
         except openai.APIConnectionError as e:
             result = None
             error = f"The server could not be reached. Reason {e.__cause__}"
+            log.error(error)
         except openai.RateLimitError as e:
             result = None
             error = f"A 429 status code was received; we should back off a bit. {str(e)}"
+            log.error(error)
         except openai.APIStatusError as e:
             result = None
             error = f"Another non-200-range status code was received. Status code: {e.status_code}. \n\nResponse: {e.message}"
+            log.error(error)
         except Exception as e:
             result = None
             error = f"Unknown error! Error = '{str(e)}'"
+            log.error(error)
 
         return result, error        # type: ignore
diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,7 @@
-fastapi==0.104.1
+fastapi==0.108.0
 MarkupSafe
 multidict
-openai==1.3.6
+openai==1.6.1
 openpyxl
 platformdirs
 pydantic

diff --git a/templates/index.html b/templates/index.html
@@ -253,7 +253,7 @@ <h3>Summarize this</h3>
       <footer class="footer">
         <div class="container is-fluid">
           <div class="content has-text-centered">
-            <font size="-1">Version: 0.2. Copyright 2023 (C) by Aaron Kaplan. All rights reserved.<a
+            <font size="-1">Version: 0.3. Copyright 2023-2024 (C) by Aaron Kaplan. All rights reserved.<a
                 href="mailto:[email protected]">E-mail</a>.<p></p>
               <p class="font-size-8">Made with <font color="red">❤</font> in Vienna</p>
             </font>
@@ -293,4 +293,4 @@ <h3>Summarize this</h3>
   });
 </script>
 
-</html>
+</html>