trevorpfiz · trevorpfiz · Oct 20, 2024 · Oct 19, 2024 · Oct 19, 2024 · Oct 19, 2024
diff --git a/.env.example b/.env.example
@@ -5,10 +5,12 @@
 # If you are cloning this repo, create a copy of this file named `.env` and populate it with your secrets.
 
 # The database URL is used to connect to your Supabase database.
-POSTGRES_URL="postgres://postgres.[USERNAME]:[PASSWORD]@aws-0-eu-central-1.pooler.supabase.com:6543/postgres?workaround=supabase-pooler.vercel"
+POSTGRES_URL="postgres://postgres.[USERNAME]:[PASSWORD]@aws-0-us-east-1.pooler.supabase.com:6543/postgres?workaround=supabase-pooler.vercel"
 
 # FastAPI
 NEXT_PUBLIC_FASTAPI_URL="http://127.0.0.1:8000"
+NEXT_PUBLIC_FASTAPI_STAGE_URL=""
+NEXT_PUBLIC_USE_LAMBDA_API="false"  # Set to "true" when you want to test Lambda
 
 # Supabase
 NEXT_PUBLIC_SUPABASE_URL=""

diff --git a/apps/fastapi/.env.example b/apps/fastapi/.env.example
@@ -1,17 +1,17 @@
+BACKEND_CORS_ORIGINS="http://localhost:3000"
+
+# AWS
+S3_BUCKET_NAME=""
+SAGEMAKER_ENDPOINT_NAME=""
+EC2_TOOLS_API_URL=""
+
 # Supabase
 SUPABASE_URL=""
 SUPABASE_KEY=""
 JWT_SECRET=""
 SUPERUSER_EMAIL="[email protected]"
 SUPERUSER_PASSWORD="Zz030327#"
 
-# AWS
-REGION="us-east-1"
-PROJECT_NAME=""
-RAW_BUCKET=""
-ARTIFACTS_BUCKET=""
-OUTPUT_BUCKET=""
-
 # HuggingFace
 HUGGINGFACE_ACCESS_TOKEN=""
 

diff --git a/apps/fastapi/pyproject.toml b/apps/fastapi/pyproject.toml
@@ -9,6 +9,7 @@ authors = [
 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
+    "aioboto3>=13.2.0",
     "aiofiles>=24.1.0",
     "boto3>=1.35.29",
     "fastapi[standard]>=0.115.0",

diff --git a/apps/fastapi/requirements.txt b/apps/fastapi/requirements.txt
@@ -1,11 +1,21 @@
 # This file was autogenerated by uv via the following command:
 #    uv pip compile pyproject.toml -o requirements.txt
-aiofiles==24.1.0
+aioboto3==13.2.0
     # via fastapi-epitope-prediction (pyproject.toml)
+aiobotocore==2.15.2
+    # via aioboto3
+aiofiles==24.1.0
+    # via
+    #   fastapi-epitope-prediction (pyproject.toml)
+    #   aioboto3
 aiohappyeyeballs==2.4.3
     # via aiohttp
 aiohttp==3.10.8
-    # via realtime
+    # via
+    #   aiobotocore
+    #   realtime
+aioitertools==0.12.0
+    # via aiobotocore
 aiosignal==1.3.1
     # via aiohttp
 annotated-types==0.7.0
@@ -19,9 +29,12 @@ anyio==4.6.0
 attrs==24.2.0
     # via aiohttp
 boto3==1.35.32
-    # via fastapi-epitope-prediction (pyproject.toml)
+    # via
+    #   fastapi-epitope-prediction (pyproject.toml)
+    #   aiobotocore
 botocore==1.35.32
     # via
+    #   aiobotocore
     #   boto3
     #   s3transfer
 certifi==2024.8.30
@@ -77,6 +90,7 @@ httptools==0.6.1
     # via uvicorn
 httpx==0.27.2
     # via
+    #   fastapi-epitope-prediction (pyproject.toml)
     #   fastapi
     #   gotrue
     #   openai
@@ -235,5 +249,7 @@ websockets==13.1
     # via
     #   realtime
     #   uvicorn
+wrapt==1.16.0
+    # via aiobotocore
 yarl==1.13.1
     # via aiohttp
diff --git a/apps/fastapi/src/app/api/api_v1/endpoints/prediction.py b/apps/fastapi/src/app/api/api_v1/endpoints/prediction.py
@@ -44,6 +44,7 @@ async def create_conformational_b_prediction(
         chain=prediction_in.chain,
         is_structure_based=prediction_in.is_structure_based,
         prediction_type="conformational-b",
+        user_id=user.id,
         db=db,
     )
 
@@ -71,6 +72,7 @@ async def create_linear_b_prediction(
         job_id=job.id,
         sequence=prediction_in.sequence,
         prediction_type="linear-b",
+        user_id=user.id,
         db=db,
     )
 
@@ -97,7 +99,9 @@ async def create_mhc_i_prediction(
         process_and_update_prediction,
         job_id=job.id,
         sequence=prediction_in.sequence,
+        alleles=prediction_in.alleles,
         prediction_type="mhc-i",
+        user_id=user.id,
         db=db,
     )
 
@@ -124,7 +128,9 @@ async def create_mhc_ii_prediction(
         process_and_update_prediction,
         job_id=job.id,
         sequence=prediction_in.sequence,
+        alleles=prediction_in.alleles,
         prediction_type="mhc-ii",
+        user_id=user.id,
         db=db,
     )
 

diff --git a/apps/fastapi/src/app/core/config.py b/apps/fastapi/src/app/core/config.py
@@ -23,7 +23,7 @@
 
 class Settings(BaseSettings):
     ENV: str = Field(default="", env="ENV")
-    REGION: str = Field(default="us-east-1", env="REGION")
+    AWS_REGION: str = Field(default="us-east-1", env="AWS_REGION")
     BACKEND_CORS_ORIGINS: Union[List[AnyHttpUrl], List[str]] = Field(
         default=["http://localhost:3000"], env="BACKEND_CORS_ORIGINS"
     )
@@ -48,15 +48,19 @@ def assemble_cors_origins(cls, v: Union[str, List[str]]) -> Union[List[str], str
         default="huggingface-pytorch-inference-2024-10-16-20-16-41-824",
         env="SAGEMAKER_ENDPOINT_NAME",
     )
+    EC2_TOOLS_API_URL: str = Field(
+        ...,
+        env="EC2_TOOLS_API_URL",
+    )
 
     # Optional
     HUGGINGFACE_ACCESS_TOKEN: str = Field(None, env="HUGGINGFACE_ACCESS_TOKEN")
     OPENAI_API_KEY: str = Field(None, env="OPENAI_API_KEY")
 
     # Project details
     API_VERSION: str = "/api/v1"
-    PROJECT_NAME: str = "FastAPI App"
-    PROJECT_DESCRIPTION: str = "A simple FastAPI app"
+    PROJECT_NAME: str = "B-cell and T-cell Epitope Prediction FastAPI"
+    PROJECT_DESCRIPTION: str = "B-cell and T-cell Epitope Prediction"
 
     # Pydantic configuration to load environment variables from .env
     model_config = SettingsConfigDict(env_file=".env")

diff --git a/apps/fastapi/src/app/core/utils.py b/apps/fastapi/src/app/core/utils.py
@@ -1,9 +1,11 @@
 import csv
+import io
 import logging
 import re
 from io import StringIO
 from typing import List, Optional, Type, TypeVar
 
+import aioboto3
 import boto3
 import httpx
 from fastapi import HTTPException
@@ -60,7 +62,7 @@ def read_s3_csv(
 # CRUD Sagemaker Endpoints
 def get_endpoints(endpoint_name_filter, sagemaker_client=None):
     if sagemaker_client is None:
-        sagemaker_client = boto3.client("sagemaker", region_name=settings.REGION)
+        sagemaker_client = boto3.client("sagemaker", region_name=settings.AWS_REGION)
     # Retrieve all endpoints for filtered name
     response = sagemaker_client.list_endpoints(
         SortBy="Name", NameContains=endpoint_name_filter, MaxResults=100
@@ -85,7 +87,7 @@ def get_endpoints(endpoint_name_filter, sagemaker_client=None):
 
 def get_endpoint(endpoint_name_filter, sagemaker_client=None):
     if sagemaker_client is None:
-        sagemaker_client = boto3.client("sagemaker", region_name=settings.REGION)
+        sagemaker_client = boto3.client("sagemaker", region_name=settings.AWS_REGION)
     endpoints = get_endpoints(endpoint_name_filter, sagemaker_client=sagemaker_client)
     if len(endpoints) == 0:
         return None
@@ -169,3 +171,75 @@ async def fetch_pdb_data(pdb_id: str, chain: Optional[str] = None) -> dict:
                 status_code=response.status_code,
                 detail=f"Error fetching PDB data: {response.status_code}",
             )
+
+
+def split_protein_sequence(
+    protein_sequence: str, min_length: int, max_length: int
+) -> List[str]:
+    """
+    Splits a protein sequence into peptides based on the provided min and max lengths.
+    """
+    peptides = []
+    for length in range(min_length, max_length + 1):
+        peptides.extend(
+            [
+                protein_sequence[i : i + length]
+                for i in range(len(protein_sequence) - length + 1)
+            ]
+        )
+    return peptides
+
+
+def get_default_peptide_lengths(prediction_type: str):
+    if prediction_type == "mhc-i":
+        return 8, 11
+    elif prediction_type == "mhc-ii":
+        return 13, 25
+    # Add other prediction types if needed
+    return 8, 11  # Fallback default lengths
+
+
+def generate_csv_key(
+    user_id: str, job_id: str, timestamp: str, prediction_type: str
+) -> str:
+    """
+    Generates a unique S3 key for the CSV file based on user ID, job ID, and timestamp.
+    """
+    return f"predictions/{user_id}/{job_id}_{prediction_type}_{timestamp}.csv"
+
+
+async def upload_csv_to_s3(results: List[T], s3_key: str):
+    """
+    Uploads the processed results to S3 as a CSV file, using the Pydantic schema to generate the columns.
+    :param results: List of Pydantic models (MhcIPredictionResult, MhcIIPredictionResult, etc.)
+    :param s3_key: The key (path) where the CSV will be stored in S3.
+    """
+    if not results:
+        raise HTTPException(status_code=400, detail="No results to upload")
+
+    # Dynamically get the field names (columns) from the Pydantic schema
+    fieldnames = list(results[0].model_fields.keys())
+
+    output = io.StringIO()
+    writer = csv.DictWriter(output, fieldnames=fieldnames, quoting=csv.QUOTE_NONNUMERIC)
+    writer.writeheader()
+
+    # Convert results (Pydantic models) to dictionaries for CSV writing
+    for result in results:
+        writer.writerow(result.model_dump())
+
+    csv_content = output.getvalue()
+
+    # Create an aioboto3 session
+    session = aioboto3.Session()
+
+    # Use the session to create an S3 client with async context manager
+    async with session.client("s3", region_name=settings.AWS_REGION) as s3_client:
+        try:
+            await s3_client.put_object(
+                Bucket=settings.S3_BUCKET_NAME, Key=s3_key, Body=csv_content
+            )
+        except Exception as e:
+            raise HTTPException(
+                status_code=500, detail=f"Failed to upload CSV to S3: {str(e)}"
+            )
diff --git a/apps/fastapi/src/app/crud/crud_conformational_b_prediction.py b/apps/fastapi/src/app/crud/crud_conformational_b_prediction.py
@@ -50,7 +50,12 @@ async def create(
         return self.model(**created_prediction)
 
     async def update_result(
-        self, db: AsyncClient, *, job_id: str, result: List[PredictionResult]
+        self,
+        db: AsyncClient,
+        *,
+        job_id: str,
+        result: List[PredictionResult],
+        csv_download_url: str,
     ) -> ConformationalBPrediction:
         prediction = await self.get_by_job_id(db=db, job_id=job_id)
         if not prediction:
@@ -59,7 +64,12 @@ async def update_result(
         # Update the result field
         updated_prediction = (
             await db.table(self.model.table_name)
-            .update({"result": [res.model_dump() for res in result]})
+            .update(
+                {
+                    "result": [res.model_dump() for res in result],
+                    "csv_download_url": csv_download_url,
+                }
+            )
             .eq("job_id", job_id)
             .execute()
         )

diff --git a/apps/fastapi/src/app/crud/crud_linear_b_prediction.py b/apps/fastapi/src/app/crud/crud_linear_b_prediction.py
@@ -50,7 +50,12 @@ async def create(
         return self.model(**created_prediction)
 
     async def update_result(
-        self, db: AsyncClient, *, job_id: str, result: List[LBPredictionResult]
+        self,
+        db: AsyncClient,
+        *,
+        job_id: str,
+        result: List[LBPredictionResult],
+        csv_download_url: str,
     ) -> LinearBPrediction:
         prediction = await self.get_by_job_id(db=db, job_id=job_id)
         if not prediction:
@@ -59,7 +64,12 @@ async def update_result(
         # Update the result field
         updated_prediction = (
             await db.table(self.model.table_name)
-            .update({"result": [res.model_dump() for res in result]})
+            .update(
+                {
+                    "result": [res.model_dump() for res in result],
+                    "csv_download_url": csv_download_url,
+                }
+            )
             .eq("job_id", job_id)
             .execute()
         )

diff --git a/apps/fastapi/src/app/crud/crud_mhc_i_prediction.py b/apps/fastapi/src/app/crud/crud_mhc_i_prediction.py
@@ -50,7 +50,12 @@ async def create(
         return self.model(**created_prediction)
 
     async def update_result(
-        self, db: AsyncClient, *, job_id: str, result: List[MhcIPredictionResult]
+        self,
+        db: AsyncClient,
+        *,
+        job_id: str,
+        result: List[MhcIPredictionResult],
+        csv_download_url: str,
     ) -> MhcIPrediction:
         prediction = await self.get_by_job_id(db=db, job_id=job_id)
         if not prediction:
@@ -59,7 +64,12 @@ async def update_result(
         # Update the result field
         updated_prediction = (
             await db.table(self.model.table_name)
-            .update({"result": [res.model_dump() for res in result]})
+            .update(
+                {
+                    "result": [res.model_dump() for res in result],
+                    "csv_download_url": csv_download_url,
+                }
+            )
             .eq("job_id", job_id)
             .execute()
         )

diff --git a/apps/fastapi/src/app/crud/crud_mhc_ii_prediction.py b/apps/fastapi/src/app/crud/crud_mhc_ii_prediction.py
@@ -50,7 +50,12 @@ async def create(
         return self.model(**created_prediction)
 
     async def update_result(
-        self, db: AsyncClient, *, job_id: str, result: List[MhcIIPredictionResult]
+        self,
+        db: AsyncClient,
+        *,
+        job_id: str,
+        result: List[MhcIIPredictionResult],
+        csv_download_url: str,
     ) -> MhcIIPrediction:
         prediction = await self.get_by_job_id(db=db, job_id=job_id)
         if not prediction:
@@ -59,7 +64,12 @@ async def update_result(
         # Update the result field
         updated_prediction = (
             await db.table(self.model.table_name)
-            .update({"result": [res.model_dump() for res in result]})
+            .update(
+                {
+                    "result": [res.model_dump() for res in result],
+                    "csv_download_url": csv_download_url,
+                }
+            )
             .eq("job_id", job_id)
             .execute()
         )