Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented netMHCpan and netMHCIIpan #10

Merged
merged 5 commits into from
Oct 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
# If you are cloning this repo, create a copy of this file named `.env` and populate it with your secrets.

# The database URL is used to connect to your Supabase database.
POSTGRES_URL="postgres://postgres.[USERNAME]:[PASSWORD]@aws-0-eu-central-1.pooler.supabase.com:6543/postgres?workaround=supabase-pooler.vercel"
POSTGRES_URL="postgres://postgres.[USERNAME]:[PASSWORD]@aws-0-us-east-1.pooler.supabase.com:6543/postgres?workaround=supabase-pooler.vercel"

# FastAPI
NEXT_PUBLIC_FASTAPI_URL="http://127.0.0.1:8000"
NEXT_PUBLIC_FASTAPI_STAGE_URL=""
NEXT_PUBLIC_USE_LAMBDA_API="false" # Set to "true" when you want to test Lambda

# Supabase
NEXT_PUBLIC_SUPABASE_URL=""
Expand Down
14 changes: 7 additions & 7 deletions apps/fastapi/.env.example
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
BACKEND_CORS_ORIGINS="http://localhost:3000"

# AWS
S3_BUCKET_NAME=""
SAGEMAKER_ENDPOINT_NAME=""
EC2_TOOLS_API_URL=""

# Supabase
SUPABASE_URL=""
SUPABASE_KEY=""
JWT_SECRET=""
SUPERUSER_EMAIL="[email protected]"
SUPERUSER_PASSWORD="Zz030327#"

# AWS
REGION="us-east-1"
PROJECT_NAME=""
RAW_BUCKET=""
ARTIFACTS_BUCKET=""
OUTPUT_BUCKET=""

# HuggingFace
HUGGINGFACE_ACCESS_TOKEN=""

Expand Down
1 change: 1 addition & 0 deletions apps/fastapi/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ authors = [
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"aioboto3>=13.2.0",
"aiofiles>=24.1.0",
"boto3>=1.35.29",
"fastapi[standard]>=0.115.0",
Expand Down
22 changes: 19 additions & 3 deletions apps/fastapi/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
# This file was autogenerated by uv via the following command:
# uv pip compile pyproject.toml -o requirements.txt
aiofiles==24.1.0
aioboto3==13.2.0
# via fastapi-epitope-prediction (pyproject.toml)
aiobotocore==2.15.2
# via aioboto3
aiofiles==24.1.0
# via
# fastapi-epitope-prediction (pyproject.toml)
# aioboto3
aiohappyeyeballs==2.4.3
# via aiohttp
aiohttp==3.10.8
# via realtime
# via
# aiobotocore
# realtime
aioitertools==0.12.0
# via aiobotocore
aiosignal==1.3.1
# via aiohttp
annotated-types==0.7.0
Expand All @@ -19,9 +29,12 @@ anyio==4.6.0
attrs==24.2.0
# via aiohttp
boto3==1.35.32
# via fastapi-epitope-prediction (pyproject.toml)
# via
# fastapi-epitope-prediction (pyproject.toml)
# aiobotocore
botocore==1.35.32
# via
# aiobotocore
# boto3
# s3transfer
certifi==2024.8.30
Expand Down Expand Up @@ -77,6 +90,7 @@ httptools==0.6.1
# via uvicorn
httpx==0.27.2
# via
# fastapi-epitope-prediction (pyproject.toml)
# fastapi
# gotrue
# openai
Expand Down Expand Up @@ -235,5 +249,7 @@ websockets==13.1
# via
# realtime
# uvicorn
wrapt==1.16.0
# via aiobotocore
yarl==1.13.1
# via aiohttp
6 changes: 6 additions & 0 deletions apps/fastapi/src/app/api/api_v1/endpoints/prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ async def create_conformational_b_prediction(
chain=prediction_in.chain,
is_structure_based=prediction_in.is_structure_based,
prediction_type="conformational-b",
user_id=user.id,
db=db,
)

Expand Down Expand Up @@ -71,6 +72,7 @@ async def create_linear_b_prediction(
job_id=job.id,
sequence=prediction_in.sequence,
prediction_type="linear-b",
user_id=user.id,
db=db,
)

Expand All @@ -97,7 +99,9 @@ async def create_mhc_i_prediction(
process_and_update_prediction,
job_id=job.id,
sequence=prediction_in.sequence,
alleles=prediction_in.alleles,
prediction_type="mhc-i",
user_id=user.id,
db=db,
)

Expand All @@ -124,7 +128,9 @@ async def create_mhc_ii_prediction(
process_and_update_prediction,
job_id=job.id,
sequence=prediction_in.sequence,
alleles=prediction_in.alleles,
prediction_type="mhc-ii",
user_id=user.id,
db=db,
)

Expand Down
10 changes: 7 additions & 3 deletions apps/fastapi/src/app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

class Settings(BaseSettings):
ENV: str = Field(default="", env="ENV")
REGION: str = Field(default="us-east-1", env="REGION")
AWS_REGION: str = Field(default="us-east-1", env="AWS_REGION")
BACKEND_CORS_ORIGINS: Union[List[AnyHttpUrl], List[str]] = Field(
default=["http://localhost:3000"], env="BACKEND_CORS_ORIGINS"
)
Expand All @@ -48,15 +48,19 @@ def assemble_cors_origins(cls, v: Union[str, List[str]]) -> Union[List[str], str
default="huggingface-pytorch-inference-2024-10-16-20-16-41-824",
env="SAGEMAKER_ENDPOINT_NAME",
)
EC2_TOOLS_API_URL: str = Field(
...,
env="EC2_TOOLS_API_URL",
)

# Optional
HUGGINGFACE_ACCESS_TOKEN: str = Field(None, env="HUGGINGFACE_ACCESS_TOKEN")
OPENAI_API_KEY: str = Field(None, env="OPENAI_API_KEY")

# Project details
API_VERSION: str = "/api/v1"
PROJECT_NAME: str = "FastAPI App"
PROJECT_DESCRIPTION: str = "A simple FastAPI app"
PROJECT_NAME: str = "B-cell and T-cell Epitope Prediction FastAPI"
PROJECT_DESCRIPTION: str = "B-cell and T-cell Epitope Prediction"

# Pydantic configuration to load environment variables from .env
model_config = SettingsConfigDict(env_file=".env")
Expand Down
78 changes: 76 additions & 2 deletions apps/fastapi/src/app/core/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import csv
import io
import logging
import re
from io import StringIO
from typing import List, Optional, Type, TypeVar

import aioboto3
import boto3
import httpx
from fastapi import HTTPException
Expand Down Expand Up @@ -60,7 +62,7 @@ def read_s3_csv(
# CRUD Sagemaker Endpoints
def get_endpoints(endpoint_name_filter, sagemaker_client=None):
if sagemaker_client is None:
sagemaker_client = boto3.client("sagemaker", region_name=settings.REGION)
sagemaker_client = boto3.client("sagemaker", region_name=settings.AWS_REGION)
# Retrieve all endpoints for filtered name
response = sagemaker_client.list_endpoints(
SortBy="Name", NameContains=endpoint_name_filter, MaxResults=100
Expand All @@ -85,7 +87,7 @@ def get_endpoints(endpoint_name_filter, sagemaker_client=None):

def get_endpoint(endpoint_name_filter, sagemaker_client=None):
if sagemaker_client is None:
sagemaker_client = boto3.client("sagemaker", region_name=settings.REGION)
sagemaker_client = boto3.client("sagemaker", region_name=settings.AWS_REGION)
endpoints = get_endpoints(endpoint_name_filter, sagemaker_client=sagemaker_client)
if len(endpoints) == 0:
return None
Expand Down Expand Up @@ -169,3 +171,75 @@ async def fetch_pdb_data(pdb_id: str, chain: Optional[str] = None) -> dict:
status_code=response.status_code,
detail=f"Error fetching PDB data: {response.status_code}",
)


def split_protein_sequence(
protein_sequence: str, min_length: int, max_length: int
) -> List[str]:
"""
Splits a protein sequence into peptides based on the provided min and max lengths.
"""
peptides = []
for length in range(min_length, max_length + 1):
peptides.extend(
[
protein_sequence[i : i + length]
for i in range(len(protein_sequence) - length + 1)
]
)
return peptides


def get_default_peptide_lengths(prediction_type: str):
if prediction_type == "mhc-i":
return 8, 11
elif prediction_type == "mhc-ii":
return 13, 25
# Add other prediction types if needed
return 8, 11 # Fallback default lengths


def generate_csv_key(
user_id: str, job_id: str, timestamp: str, prediction_type: str
) -> str:
"""
Generates a unique S3 key for the CSV file based on user ID, job ID, and timestamp.
"""
return f"predictions/{user_id}/{job_id}_{prediction_type}_{timestamp}.csv"


async def upload_csv_to_s3(results: List[T], s3_key: str):
"""
Uploads the processed results to S3 as a CSV file, using the Pydantic schema to generate the columns.
:param results: List of Pydantic models (MhcIPredictionResult, MhcIIPredictionResult, etc.)
:param s3_key: The key (path) where the CSV will be stored in S3.
"""
if not results:
raise HTTPException(status_code=400, detail="No results to upload")

# Dynamically get the field names (columns) from the Pydantic schema
fieldnames = list(results[0].model_fields.keys())

output = io.StringIO()
writer = csv.DictWriter(output, fieldnames=fieldnames, quoting=csv.QUOTE_NONNUMERIC)
writer.writeheader()

# Convert results (Pydantic models) to dictionaries for CSV writing
for result in results:
writer.writerow(result.model_dump())

csv_content = output.getvalue()

# Create an aioboto3 session
session = aioboto3.Session()

# Use the session to create an S3 client with async context manager
async with session.client("s3", region_name=settings.AWS_REGION) as s3_client:
try:
await s3_client.put_object(
Bucket=settings.S3_BUCKET_NAME, Key=s3_key, Body=csv_content
)
except Exception as e:
raise HTTPException(
status_code=500, detail=f"Failed to upload CSV to S3: {str(e)}"
)
14 changes: 12 additions & 2 deletions apps/fastapi/src/app/crud/crud_conformational_b_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,12 @@ async def create(
return self.model(**created_prediction)

async def update_result(
self, db: AsyncClient, *, job_id: str, result: List[PredictionResult]
self,
db: AsyncClient,
*,
job_id: str,
result: List[PredictionResult],
csv_download_url: str,
) -> ConformationalBPrediction:
prediction = await self.get_by_job_id(db=db, job_id=job_id)
if not prediction:
Expand All @@ -59,7 +64,12 @@ async def update_result(
# Update the result field
updated_prediction = (
await db.table(self.model.table_name)
.update({"result": [res.model_dump() for res in result]})
.update(
{
"result": [res.model_dump() for res in result],
"csv_download_url": csv_download_url,
}
)
.eq("job_id", job_id)
.execute()
)
Expand Down
14 changes: 12 additions & 2 deletions apps/fastapi/src/app/crud/crud_linear_b_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,12 @@ async def create(
return self.model(**created_prediction)

async def update_result(
self, db: AsyncClient, *, job_id: str, result: List[LBPredictionResult]
self,
db: AsyncClient,
*,
job_id: str,
result: List[LBPredictionResult],
csv_download_url: str,
) -> LinearBPrediction:
prediction = await self.get_by_job_id(db=db, job_id=job_id)
if not prediction:
Expand All @@ -59,7 +64,12 @@ async def update_result(
# Update the result field
updated_prediction = (
await db.table(self.model.table_name)
.update({"result": [res.model_dump() for res in result]})
.update(
{
"result": [res.model_dump() for res in result],
"csv_download_url": csv_download_url,
}
)
.eq("job_id", job_id)
.execute()
)
Expand Down
14 changes: 12 additions & 2 deletions apps/fastapi/src/app/crud/crud_mhc_i_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,12 @@ async def create(
return self.model(**created_prediction)

async def update_result(
self, db: AsyncClient, *, job_id: str, result: List[MhcIPredictionResult]
self,
db: AsyncClient,
*,
job_id: str,
result: List[MhcIPredictionResult],
csv_download_url: str,
) -> MhcIPrediction:
prediction = await self.get_by_job_id(db=db, job_id=job_id)
if not prediction:
Expand All @@ -59,7 +64,12 @@ async def update_result(
# Update the result field
updated_prediction = (
await db.table(self.model.table_name)
.update({"result": [res.model_dump() for res in result]})
.update(
{
"result": [res.model_dump() for res in result],
"csv_download_url": csv_download_url,
}
)
.eq("job_id", job_id)
.execute()
)
Expand Down
14 changes: 12 additions & 2 deletions apps/fastapi/src/app/crud/crud_mhc_ii_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,12 @@ async def create(
return self.model(**created_prediction)

async def update_result(
self, db: AsyncClient, *, job_id: str, result: List[MhcIIPredictionResult]
self,
db: AsyncClient,
*,
job_id: str,
result: List[MhcIIPredictionResult],
csv_download_url: str,
) -> MhcIIPrediction:
prediction = await self.get_by_job_id(db=db, job_id=job_id)
if not prediction:
Expand All @@ -59,7 +64,12 @@ async def update_result(
# Update the result field
updated_prediction = (
await db.table(self.model.table_name)
.update({"result": [res.model_dump() for res in result]})
.update(
{
"result": [res.model_dump() for res in result],
"csv_download_url": csv_download_url,
}
)
.eq("job_id", job_id)
.execute()
)
Expand Down
Loading
Loading